From 69d97098fa798a5dfb9aa6e5c46bb0d10bf6419a Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Wed, 19 Mar 2025 14:01:18 -0400 Subject: [PATCH 01/61] add litellm apm integration --- ddtrace/_monkey.py | 1 + ddtrace/contrib/internal/litellm/patch.py | 95 +++++++++++++++++++++++ ddtrace/contrib/internal/litellm/utils.py | 7 ++ ddtrace/contrib/litellm/__init__.py | 14 ++++ ddtrace/llmobs/_integrations/__init__.py | 2 + ddtrace/llmobs/_integrations/litellm.py | 18 +++++ ddtrace/settings/_config.py | 1 + 7 files changed, 138 insertions(+) create mode 100644 ddtrace/contrib/internal/litellm/patch.py create mode 100644 ddtrace/contrib/internal/litellm/utils.py create mode 100644 ddtrace/contrib/litellm/__init__.py create mode 100644 ddtrace/llmobs/_integrations/litellm.py diff --git a/ddtrace/_monkey.py b/ddtrace/_monkey.py index 4f04ec8fca2..0ac4d56d672 100644 --- a/ddtrace/_monkey.py +++ b/ddtrace/_monkey.py @@ -51,6 +51,7 @@ "httpx": True, "kafka": True, "langgraph": False, + "litellm": True, "mongoengine": True, "mysql": True, "mysqldb": True, diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py new file mode 100644 index 00000000000..7ffa141551d --- /dev/null +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -0,0 +1,95 @@ +import os +import sys + +import litellm + +from ddtrace import config +from ddtrace.contrib.trace_utils import unwrap +from ddtrace.contrib.trace_utils import with_traced_module +from ddtrace.contrib.trace_utils import wrap +from ddtrace.contrib.internal.litellm.utils import get_provider +from ddtrace.llmobs._integrations import LiteLLMIntegration +from ddtrace.trace import Pin +from ddtrace.internal.utils import get_argument_value + + +config._add( + "litellm", + { + "span_prompt_completion_sample_rate": float(os.getenv("DD_LITELLM_SPAN_PROMPT_COMPLETION_SAMPLE_RATE", 1.0)), + "span_char_limit": int(os.getenv("DD_LITELLM_SPAN_CHAR_LIMIT", 128)), + }, +) + + +def get_version(): + # type: () -> str + return getattr(litellm, "__version__", "") + + +def _create_span(litellm, pin, func, instance, args, kwargs): + """Helper function to create and configure a traced span.""" + integration = litellm._datadog_integration + model = get_argument_value(args, kwargs, 0, "model", None) + span = integration.trace( + pin, + "litellm.%s" % func.__name__, + model=model, + provider=get_provider(model), + submit_to_llmobs=False, + ) + return span + + +@with_traced_module +def traced_completion(litellm, pin, func, instance, args, kwargs): + span = _create_span(litellm, pin, func, instance, args, kwargs) + try: + return func(*args, **kwargs) + except Exception: + span.set_exc_info(*sys.exc_info()) + raise + finally: + span.finish() + + +@with_traced_module +async def traced_acompletion(litellm, pin, func, instance, args, kwargs): + span = _create_span(litellm, pin, func, instance, args, kwargs) + try: + return await func(*args, **kwargs) + except Exception: + span.set_exc_info(*sys.exc_info()) + raise + finally: + span.finish() + + +def patch(): + if getattr(litellm, "_datadog_patch", False): + return + + litellm._datadog_patch = True + + Pin().onto(litellm) + integration = LiteLLMIntegration(integration_config=config.litellm) + litellm._datadog_integration = integration + + wrap("litellm", "completion", traced_completion(litellm)) + wrap("litellm", "acompletion", traced_acompletion(litellm)) + wrap("litellm", "text_completion", traced_completion(litellm)) + wrap("litellm", "atext_completion", traced_acompletion(litellm)) + + +def unpatch(): + if not getattr(litellm, "_datadog_patch", False): + return + + litellm._datadog_patch = False + + unwrap("litellm", "completion", traced_completion(litellm)) + unwrap("litellm", "acompletion", traced_acompletion(litellm)) + unwrap("litellm", "text_completion", traced_completion(litellm)) + unwrap("litellm", "atext_completion", traced_acompletion(litellm)) + + delattr(litellm, "_datadog_integration") \ No newline at end of file diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py new file mode 100644 index 00000000000..dd6d766ad2b --- /dev/null +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -0,0 +1,7 @@ +# TODO: temporary since we may want to intercept get_llm_provider response +def get_provider(model): + parsed_model = model.split("/") + if len(parsed_model) == 2: + return parsed_model[0] + else: + return "" diff --git a/ddtrace/contrib/litellm/__init__.py b/ddtrace/contrib/litellm/__init__.py new file mode 100644 index 00000000000..af284d63775 --- /dev/null +++ b/ddtrace/contrib/litellm/__init__.py @@ -0,0 +1,14 @@ +# TODO: documentation + +from ddtrace.internal.utils.importlib import require_modules + + +required_modules = ["litellm"] + +with require_modules(required_modules) as missing_modules: + if not missing_modules: + from ddtrace.contrib.internal.litellm.patch import get_version + from ddtrace.contrib.internal.litellm.patch import patch + from ddtrace.contrib.internal.litellm.patch import unpatch + + __all__ = ["patch", "unpatch", "get_version"] \ No newline at end of file diff --git a/ddtrace/llmobs/_integrations/__init__.py b/ddtrace/llmobs/_integrations/__init__.py index 71cae092197..d7c5bdab0da 100644 --- a/ddtrace/llmobs/_integrations/__init__.py +++ b/ddtrace/llmobs/_integrations/__init__.py @@ -3,6 +3,7 @@ from .bedrock import BedrockIntegration from .gemini import GeminiIntegration from .langchain import LangChainIntegration +from .litellm import LiteLLMIntegration from .openai import OpenAIIntegration from .vertexai import VertexAIIntegration @@ -13,6 +14,7 @@ "BedrockIntegration", "GeminiIntegration", "LangChainIntegration", + "LiteLLMIntegration", "OpenAIIntegration", "VertexAIIntegration", ] diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py new file mode 100644 index 00000000000..afe7a373f06 --- /dev/null +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -0,0 +1,18 @@ +from typing import Any +from typing import Dict +from typing import Optional + +from ddtrace.trace import Span +from ddtrace.llmobs._integrations.base import BaseLLMIntegration + + +class LiteLLMIntegration(BaseLLMIntegration): + _integration_name = "litellm" + + def _set_base_span_tags( + self, span: Span, provider: Optional[str] = None, model: Optional[str] = None, **kwargs: Dict[str, Any] + ) -> None: + if provider is not None: + span.set_tag_str("litellm.request.provider", provider) + if model is not None: + span.set_tag_str("litellm.request.model", model) \ No newline at end of file diff --git a/ddtrace/settings/_config.py b/ddtrace/settings/_config.py index f91845fe4b9..7921de0c288 100644 --- a/ddtrace/settings/_config.py +++ b/ddtrace/settings/_config.py @@ -111,6 +111,7 @@ "unittest", "falcon", "langgraph", + "litellm", "aioredis", "test_visibility", "redis", From f6c9ead0b7b91764696a18acf16c2f884b556d26 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Wed, 19 Mar 2025 17:04:23 -0400 Subject: [PATCH 02/61] add tests for litellm --- .riot/requirements/17b7978.txt | 64 +++++ .riot/requirements/1db92d0.txt | 67 +++++ .riot/requirements/1f657b3.txt | 64 +++++ .riot/requirements/8c9f21c.txt | 67 +++++ ddtrace/contrib/internal/litellm/patch.py | 14 +- riotfile.py | 10 + tests/contrib/litellm/__init__.py | 0 .../litellm/cassettes/acompletion.yaml | 106 +++++++ .../acompletion.yaml_multiple_choices | 107 ++++++++ .../litellm/cassettes/acompletion_stream.yaml | 145 ++++++++++ .../acompletion_stream.yaml_multiple_choices | 258 ++++++++++++++++++ .../litellm/cassettes/atext_completion.yaml | 106 +++++++ .../atext_completion.yaml_multiple_choices | 106 +++++++ .../cassettes/atext_completion_stream.yaml | 141 ++++++++++ ...xt_completion_stream.yaml_multiple_choices | 180 ++++++++++++ .../contrib/litellm/cassettes/completion.yaml | 107 ++++++++ .../completion.yaml_multiple_choices | 107 ++++++++ .../litellm/cassettes/completion_stream.yaml | 181 ++++++++++++ .../completion_stream.yaml_multiple_choices | 234 ++++++++++++++++ .../litellm/cassettes/text_completion.yaml | 106 +++++++ .../text_completion.yaml_multiple_choices | 106 +++++++ .../cassettes/text_completion_stream.yaml | 133 +++++++++ ...xt_completion_stream.yaml_multiple_choices | 177 ++++++++++++ tests/contrib/litellm/conftest.py | 57 ++++ tests/contrib/litellm/test_litellm.py | 55 ++++ tests/contrib/litellm/test_litellm_patch.py | 30 ++ tests/contrib/litellm/utils.py | 15 + ....test_litellm.test_litellm_completion.json | 28 ++ 28 files changed, 2766 insertions(+), 5 deletions(-) create mode 100644 .riot/requirements/17b7978.txt create mode 100644 .riot/requirements/1db92d0.txt create mode 100644 .riot/requirements/1f657b3.txt create mode 100644 .riot/requirements/8c9f21c.txt create mode 100644 tests/contrib/litellm/__init__.py create mode 100644 tests/contrib/litellm/cassettes/acompletion.yaml create mode 100644 tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices create mode 100644 tests/contrib/litellm/cassettes/acompletion_stream.yaml create mode 100644 tests/contrib/litellm/cassettes/acompletion_stream.yaml_multiple_choices create mode 100644 tests/contrib/litellm/cassettes/atext_completion.yaml create mode 100644 tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices create mode 100644 tests/contrib/litellm/cassettes/atext_completion_stream.yaml create mode 100644 tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices create mode 100644 tests/contrib/litellm/cassettes/completion.yaml create mode 100644 tests/contrib/litellm/cassettes/completion.yaml_multiple_choices create mode 100644 tests/contrib/litellm/cassettes/completion_stream.yaml create mode 100644 tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices create mode 100644 tests/contrib/litellm/cassettes/text_completion.yaml create mode 100644 tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices create mode 100644 tests/contrib/litellm/cassettes/text_completion_stream.yaml create mode 100644 tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices create mode 100644 tests/contrib/litellm/conftest.py create mode 100644 tests/contrib/litellm/test_litellm.py create mode 100644 tests/contrib/litellm/test_litellm_patch.py create mode 100644 tests/contrib/litellm/utils.py create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json diff --git a/.riot/requirements/17b7978.txt b/.riot/requirements/17b7978.txt new file mode 100644 index 00000000000..798b258db60 --- /dev/null +++ b/.riot/requirements/17b7978.txt @@ -0,0 +1,64 @@ +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# pip-compile --allow-unsafe --no-annotate .riot/requirements/17b7978.in +# +aiohappyeyeballs==2.6.1 +aiohttp==3.11.14 +aiosignal==1.3.2 +annotated-types==0.7.0 +anyio==4.9.0 +attrs==25.3.0 +certifi==2025.1.31 +charset-normalizer==3.4.1 +click==8.1.8 +coverage[toml]==7.7.0 +distro==1.9.0 +filelock==3.18.0 +frozenlist==1.5.0 +fsspec==2025.3.0 +h11==0.14.0 +httpcore==1.0.7 +httpx==0.28.1 +huggingface-hub==0.29.3 +hypothesis==6.45.0 +idna==3.10 +importlib-metadata==8.6.1 +iniconfig==2.1.0 +jinja2==3.1.6 +jiter==0.9.0 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +litellm==1.63.12 +markupsafe==3.0.2 +mock==5.2.0 +multidict==6.2.0 +openai==1.66.5 +opentracing==2.4.0 +packaging==24.2 +pluggy==1.5.0 +propcache==0.3.0 +pydantic==2.10.6 +pydantic-core==2.27.2 +pytest==8.3.5 +pytest-asyncio==0.25.3 +pytest-cov==6.0.0 +pytest-mock==3.14.0 +python-dotenv==1.0.1 +pyyaml==6.0.2 +referencing==0.36.2 +regex==2024.11.6 +requests==2.32.3 +rpds-py==0.23.1 +sniffio==1.3.1 +sortedcontainers==2.4.0 +tiktoken==0.9.0 +tokenizers==0.21.1 +tqdm==4.67.1 +typing-extensions==4.12.2 +urllib3==2.3.0 +vcrpy==7.0.0 +wrapt==1.17.2 +yarl==1.18.3 +zipp==3.21.0 diff --git a/.riot/requirements/1db92d0.txt b/.riot/requirements/1db92d0.txt new file mode 100644 index 00000000000..e86bb4cb0aa --- /dev/null +++ b/.riot/requirements/1db92d0.txt @@ -0,0 +1,67 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --allow-unsafe --no-annotate .riot/requirements/1db92d0.in +# +aiohappyeyeballs==2.6.1 +aiohttp==3.11.14 +aiosignal==1.3.2 +annotated-types==0.7.0 +anyio==4.9.0 +async-timeout==5.0.1 +attrs==25.3.0 +certifi==2025.1.31 +charset-normalizer==3.4.1 +click==8.1.8 +coverage[toml]==7.7.0 +distro==1.9.0 +exceptiongroup==1.2.2 +filelock==3.18.0 +frozenlist==1.5.0 +fsspec==2025.3.0 +h11==0.14.0 +httpcore==1.0.7 +httpx==0.28.1 +huggingface-hub==0.29.3 +hypothesis==6.45.0 +idna==3.10 +importlib-metadata==8.6.1 +iniconfig==2.1.0 +jinja2==3.1.6 +jiter==0.9.0 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +litellm==1.63.12 +markupsafe==3.0.2 +mock==5.2.0 +multidict==6.2.0 +openai==1.66.5 +opentracing==2.4.0 +packaging==24.2 +pluggy==1.5.0 +propcache==0.3.0 +pydantic==2.10.6 +pydantic-core==2.27.2 +pytest==8.3.5 +pytest-asyncio==0.25.3 +pytest-cov==6.0.0 +pytest-mock==3.14.0 +python-dotenv==1.0.1 +pyyaml==6.0.2 +referencing==0.36.2 +regex==2024.11.6 +requests==2.32.3 +rpds-py==0.23.1 +sniffio==1.3.1 +sortedcontainers==2.4.0 +tiktoken==0.9.0 +tokenizers==0.21.1 +tomli==2.2.1 +tqdm==4.67.1 +typing-extensions==4.12.2 +urllib3==2.3.0 +vcrpy==7.0.0 +wrapt==1.17.2 +yarl==1.18.3 +zipp==3.21.0 diff --git a/.riot/requirements/1f657b3.txt b/.riot/requirements/1f657b3.txt new file mode 100644 index 00000000000..d1a93e65777 --- /dev/null +++ b/.riot/requirements/1f657b3.txt @@ -0,0 +1,64 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --allow-unsafe --no-annotate .riot/requirements/1f657b3.in +# +aiohappyeyeballs==2.6.1 +aiohttp==3.11.14 +aiosignal==1.3.2 +annotated-types==0.7.0 +anyio==4.9.0 +attrs==25.3.0 +certifi==2025.1.31 +charset-normalizer==3.4.1 +click==8.1.8 +coverage[toml]==7.7.0 +distro==1.9.0 +filelock==3.18.0 +frozenlist==1.5.0 +fsspec==2025.3.0 +h11==0.14.0 +httpcore==1.0.7 +httpx==0.28.1 +huggingface-hub==0.29.3 +hypothesis==6.45.0 +idna==3.10 +importlib-metadata==8.6.1 +iniconfig==2.1.0 +jinja2==3.1.6 +jiter==0.9.0 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +litellm==1.63.12 +markupsafe==3.0.2 +mock==5.2.0 +multidict==6.2.0 +openai==1.66.5 +opentracing==2.4.0 +packaging==24.2 +pluggy==1.5.0 +propcache==0.3.0 +pydantic==2.10.6 +pydantic-core==2.27.2 +pytest==8.3.5 +pytest-asyncio==0.25.3 +pytest-cov==6.0.0 +pytest-mock==3.14.0 +python-dotenv==1.0.1 +pyyaml==6.0.2 +referencing==0.36.2 +regex==2024.11.6 +requests==2.32.3 +rpds-py==0.23.1 +sniffio==1.3.1 +sortedcontainers==2.4.0 +tiktoken==0.9.0 +tokenizers==0.21.1 +tqdm==4.67.1 +typing-extensions==4.12.2 +urllib3==2.3.0 +vcrpy==7.0.0 +wrapt==1.17.2 +yarl==1.18.3 +zipp==3.21.0 diff --git a/.riot/requirements/8c9f21c.txt b/.riot/requirements/8c9f21c.txt new file mode 100644 index 00000000000..4c7ee2bb6e4 --- /dev/null +++ b/.riot/requirements/8c9f21c.txt @@ -0,0 +1,67 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: +# +# pip-compile --allow-unsafe --no-annotate .riot/requirements/8c9f21c.in +# +aiohappyeyeballs==2.6.1 +aiohttp==3.11.14 +aiosignal==1.3.2 +annotated-types==0.7.0 +anyio==4.9.0 +async-timeout==5.0.1 +attrs==25.3.0 +certifi==2025.1.31 +charset-normalizer==3.4.1 +click==8.1.8 +coverage[toml]==7.7.0 +distro==1.9.0 +exceptiongroup==1.2.2 +filelock==3.18.0 +frozenlist==1.5.0 +fsspec==2025.3.0 +h11==0.14.0 +httpcore==1.0.7 +httpx==0.28.1 +huggingface-hub==0.29.3 +hypothesis==6.45.0 +idna==3.10 +importlib-metadata==8.6.1 +iniconfig==2.1.0 +jinja2==3.1.6 +jiter==0.9.0 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +litellm==1.63.12 +markupsafe==3.0.2 +mock==5.2.0 +multidict==6.2.0 +openai==1.66.5 +opentracing==2.4.0 +packaging==24.2 +pluggy==1.5.0 +propcache==0.3.0 +pydantic==2.10.6 +pydantic-core==2.27.2 +pytest==8.3.5 +pytest-asyncio==0.25.3 +pytest-cov==6.0.0 +pytest-mock==3.14.0 +python-dotenv==1.0.1 +pyyaml==6.0.2 +referencing==0.36.2 +regex==2024.11.6 +requests==2.32.3 +rpds-py==0.23.1 +sniffio==1.3.1 +sortedcontainers==2.4.0 +tiktoken==0.9.0 +tokenizers==0.21.1 +tomli==2.2.1 +tqdm==4.67.1 +typing-extensions==4.12.2 +urllib3==1.26.20 +vcrpy==7.0.0 +wrapt==1.17.2 +yarl==1.18.3 +zipp==3.21.0 diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 7ffa141551d..1ce2e073c73 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -1,5 +1,6 @@ import os import sys +from importlib.metadata import version import litellm @@ -24,7 +25,10 @@ def get_version(): # type: () -> str - return getattr(litellm, "__version__", "") + try: + return version("litellm") + except Exception: + return "" def _create_span(litellm, pin, func, instance, args, kwargs): @@ -87,9 +91,9 @@ def unpatch(): litellm._datadog_patch = False - unwrap("litellm", "completion", traced_completion(litellm)) - unwrap("litellm", "acompletion", traced_acompletion(litellm)) - unwrap("litellm", "text_completion", traced_completion(litellm)) - unwrap("litellm", "atext_completion", traced_acompletion(litellm)) + unwrap(litellm, "completion") + unwrap(litellm, "acompletion") + unwrap(litellm, "text_completion") + unwrap(litellm, "atext_completion") delattr(litellm, "_datadog_integration") \ No newline at end of file diff --git a/riotfile.py b/riotfile.py index de16e76ed3e..5fcf5d52097 100644 --- a/riotfile.py +++ b/riotfile.py @@ -2576,6 +2576,16 @@ def select_pys(min_version: str = MIN_PYTHON_VERSION, max_version: str = MAX_PYT "langgraph": "~=0.2.60", }, ), + Venv( + name="litellm", + command="pytest {cmdargs} tests/contrib/litellm", + pys=select_pys(min_version="3.9", max_version="3.12"), + pkgs={ + "litellm": latest, + "vcrpy": latest, + "pytest-asyncio": latest, + }, + ), Venv( name="anthropic", command="pytest {cmdargs} tests/contrib/anthropic", diff --git a/tests/contrib/litellm/__init__.py b/tests/contrib/litellm/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/contrib/litellm/cassettes/acompletion.yaml b/tests/contrib/litellm/cassettes/acompletion.yaml new file mode 100644 index 00000000000..1ef458cb6cf --- /dev/null +++ b/tests/contrib/litellm/cassettes/acompletion.yaml @@ -0,0 +1,106 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '83' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFLBahsxEL3vV0x16cU2cew4jS+B5NBATaEk0EIJiyzN7irVaoQ0cmuC + /71o7XjXbQK56DBv3tN7M/NcAAijxRKEaiSr1tvxzW2636x89bCd39+mL9/iQ9h8Xq8WP1Z3qhKj + zKD1Eyp+YU0Utd4iG3J7WAWUjFl1ejk/n08Xi09XHdCSRptptefxbHIx5hTWND6bnl8cmA0ZhVEs + 4WcBAPDcvdmj0/hHLOFs9FJpMUZZo1gemwBEIJsrQsZoIkvHYtSDihyj62zf4Ra4wYAf4CsxtEk1 + I3hKkaGRrjauBko8ge+N5I8RkoffhhvYUroeCgasUpQ5kEvWDgDpHLHMA+miPB6Q3dG8pdoHWsd/ + qKIyzsSmDCgjuWw0MnnRobsC4LEbUjrJLXyg1nPJ9Au776azvZzo1zIALw8gE0vb12eHwZ6qlRpZ + GhsHQxZKqgZ1z+w3IpM2NACKQeb/zbymvc9tXP0e+R5QCj2jLn1AbdRp4L4tYD7at9qOM+4Mi4hh + YxSWbDDkPWisZLL7cxJxGxnbsjKuxuCD6W4q77HYFX8BAAD//wMAtRqFIFIDAAA= + headers: + CF-RAY: + - 922fc592ca8ee61b-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 19 Mar 2025 20:38:09 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=Og1MhUPOSLUYFX6sLOZjXUt6_Ii7DeHec6bu0xEwveU-1742416689-1.0.1.1-24FPrr2zTb6eb.iYCyCZ5tyNDAaMwFjbJQ6MuU6ZZfarYDu945JdhKsS.h0Vc5bvUu4cEVkN072A15WuJ.KstgoBD.hgm.Owir7t6Mfrs4A; + path=/; expires=Wed, 19-Mar-25 21:08:09 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=igKx8CPA0FCh5KeOwdnLy585rLuR2kKt7gGASW5nWSA-1742416689508-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '312' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999978' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_c35f7392746227673bdca3d80b74722e + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices new file mode 100644 index 00000000000..69fe82a0cc4 --- /dev/null +++ b/tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices @@ -0,0 +1,107 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '89' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA8RTwW7bMAy9+ysInZMgTho0y2VYOwwdMHSnYYehMBSJsdXKoipRa7Oi/z7ITmNn + 3YBdhl184ON7fnykngoAYbTYgFCNZNV6O724THc/vnxaf31/9eH68l7rz/PHhb+450TvSjHJDNre + ouIX1kxR6y2yIdfDKqBkzKrl+dnirDxfz990QEsababVnqfL2WrKKWxpOi8XqwOzIaMwig18KwAA + nrpv9ug0PooNzCcvlRZjlDWKzbEJQASyuSJkjCaydCwmA6jIMbrO9jUxtEk1E7hNkaHBgMAEPQ32 + lGZwRQ+gpIOP0KD1uQZMWu7fjiUD7lKUeSSXrB0B0jlimSPphrk5IM9H+5ZqH2gbf6GKnXEmNlVA + Gcllq5HJi2JEfpVJ+U8zOU7/YLgB6fbcGFd3FYeox0EN8f33qAqAm+6i0kkgwgdqPVdMd9j9rlz2 + cmK44QFcrg8gE0s71FeHxE/VKo0sjY2j9IWSqkE9MIfzlUkbGgHjDb828zvtfm7j6r+RHwCl0DPq + ygfURp0OPLQFzC/8T23HjDvDImL4bhRWbDDkPWjcyWT7OxNxHxnbamdcjcEH0x1b3mPxXPwEAAD/ + /wMAj6bv838EAAA= + headers: + CF-RAY: + - 922fe0ef9f095818-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 19 Mar 2025 20:56:50 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=aPjfuM9i_nmKcJm5fCd1envIdCwOQVscUCICrBTkJYU-1742417810-1.0.1.1-ADp9ZNwLfBcVhfTOqZ9bjV6taFMKE6YhPfsBJvKDzRAsgzcYNT1xHhcwjuBPt.NhHPZYGalO3QieMp1UU1gMwYJMN1JlwKpDJS5CoYVFdps; + path=/; expires=Wed, 19-Mar-25 21:26:50 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=vDmdz8TD9L7irSgxD2Y3qy54hE9HNQW7QTl3s0jxFGk-1742417810365-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '404' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999962' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 1ms + x-request-id: + - req_a2320c1466e6e6e7e4ca227666ddef05 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/acompletion_stream.yaml b/tests/contrib/litellm/cassettes/acompletion_stream.yaml new file mode 100644 index 00000000000..eaa66f573f1 --- /dev/null +++ b/tests/contrib/litellm/cassettes/acompletion_stream.yaml @@ -0,0 +1,145 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '137' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + much"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + how"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":11,"total_tokens":24,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 922fc595fb4f9c5e-IAD + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Wed, 19 Mar 2025 20:38:09 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=qJIrfX0cSgQGfLPmiIO9pJtmBk4d0TDxYojb3KwrOAQ-1742416689-1.0.1.1-H5.hTje2ckYwcrDNDdl7MVOpEbekK8vcdWlkX69z8CQtZFgoHg3xBQ2p0ijtOfOZoSsO.dkOlaQsLHQLTsnsPz5Ku2XJpBJx48ai9xyu_b4; + path=/; expires=Wed, 19-Mar-25 21:08:09 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=L6iVhGFB5poEf4mgyHgsiQer_LAFpmPwx7BsZksUMxA-1742416689952-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '174' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999979' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_7d0818d359483bfb4ae5dd041857b220 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/acompletion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/acompletion_stream.yaml_multiple_choices new file mode 100644 index 00000000000..b3f1528ee49 --- /dev/null +++ b/tests/contrib/litellm/cassettes/acompletion_stream.yaml_multiple_choices @@ -0,0 +1,258 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '143' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + much"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + much"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + just"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + just"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + here"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + here"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + ready"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + help"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + help"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + and"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + with"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + chat"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + anything"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + need"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":40,"total_tokens":53,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 922fe0e1892df27e-IAD + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Wed, 19 Mar 2025 20:56:49 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=_sDHtJttvjcmNqUWyQLC0HY_6ceDdhabvgxP_mSWetQ-1742417809-1.0.1.1-CqlTg5EIrNFOnvIRAcesGqLwPwg3FZ18khnoA0HR26ZkfsHWDW2u.nJYbbUMztUsr2FmgqcE_dOzuuEF.u5QN04xVbjgSkJ9zBXVj1Y5Ei0; + path=/; expires=Wed, 19-Mar-25 21:26:49 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=3J8OnKlA8uybv8hIzboB47mhL0FVEQahZhquEcDPxcM-1742417809367-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '329' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999963' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 1ms + x-request-id: + - req_95fb491365244b0b18b9daf1bc26cf5f + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/atext_completion.yaml b/tests/contrib/litellm/cassettes/atext_completion.yaml new file mode 100644 index 00000000000..dd13fc8e98e --- /dev/null +++ b/tests/contrib/litellm/cassettes/atext_completion.yaml @@ -0,0 +1,106 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '78' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFLBTuMwEL3nKwafKWq6LYheVoJL97x7Qyhy7Uni4ngse7K0Qv13ZKc0 + 6QLSXnyYN+/5vZl5KwCE0WINQrWSVeft7OGx//26n2+Wrlwc9n/cQ1PXZFd3tGp2G3GdGLTdoeIP + 1o2izltkQ26AVUDJmFTLu+ViWd7e3s8z0JFGm2iN59mPm9WM+7Cl2bxcrE7MlozCKNbwVAAAvOU3 + eXQa92INWSdXOoxRNijW5yYAEcimipAxmsjSsbgeQUWO0WXbG7SWgFsMeAUbegUlHfyCgQYH6oFJ + y8PPKT1g3UeZ7Lve2gkgnSOWKX42/nxCjmerlhofaBv/oYraOBPbKqCM5JKtyORFRo8FwHMeSX+R + UvhAneeK6QXzd/eDmhh3MGJleQKZWNqxvjhN8VKs0sjS2DiZqFBStahH5jh+2WtDE6CYRP5s5ivt + IbZxzf/Ij4BS6Bl15QNqoy4Dj20B04V+13YecTYsIoa/RmHFBkNag8Za9na4HREPkbGrauMaDD6Y + fEBpjcWxeAcAAP//AwBZaYouPwMAAA== + headers: + CF-RAY: + - 922fc59a1ac781f9-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 19 Mar 2025 20:38:10 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=Bz9ucbRhHir1yYpu5unXOWIq57sqhNmDYXwU8KYxhHk-1742416690-1.0.1.1-SaoVbDJEo.BTnLOYloqvADWkOFfKvIPkYdRataztswTUOadi9nBdpnxOrxqBTCDld5JP_w__0pINDjdJi4sSxdlMlK3f0SG9r54Vxu19sPQ; + path=/; expires=Wed, 19-Mar-25 21:08:10 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=CocuhkG0h7KH_xyamzsz3.bJ.3F_Hbx3vpGagUoqjNk-1742416690671-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '292' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999979' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_a2a6419c8a6accc5e78eef471a5b26f5 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices new file mode 100644 index 00000000000..7c33ea81265 --- /dev/null +++ b/tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices @@ -0,0 +1,106 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '84' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA9xTsW7bMBDd9RXXm+0gUmIk8RK0XVIU6dAOHYpAoMmzzJTiMeSpqRHk3wtKtiUn + LdA5C4d7947v8R6fCgC0BpeAeqNEt8HNP3zsXPX965VpzuRRPXxZPNxe3r6vWrr/LN9wlhm8uict + e9aJ5jY4Est+gHUkJZSnlhfn1Xl5cVlWPdCyIZdpTZD52cliLl1c8fy0rBY75oatpoRL+FEAADz1 + Z9boDf3GJZzO9pWWUlIN4fLQBICRXa6gSskmUV5wNoKavZDvZd+Qc/wObvgRtPLwCQYCbLkDYaO2 + 11NipHWXVBbuO+cmgPKeRWXjveS7HfJ8EOm4CZFX6QUV19bbtKkjqcQ+C0rCAYsJ+ZXz8u07LwDu + +hh0R/4wRG6D1MI/qb/uapiGY+5GrNolBIVFuUl9TzoaVhsSZV2avCVqpTdkRuYYOdUZyxNguq/X + Yv42e7BtffM/40dAawpCpg6RjNXHhse2SPlX/qvt8MS9YEwUf1lNtViKeQ2G1qpzQ2owbZNQW6+t + byiGaPvo5DUWz8UfAAAA//8DALgzECozBAAA + headers: + CF-RAY: + - 922fe0fe5c567048-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 19 Mar 2025 20:56:52 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=gCkAh45BSGsfWua_FvRAhLkamJr4XgRd0lcnofceYBY-1742417812-1.0.1.1-_Ift61wmuekINzK5SeNl.ZXlL8hJS1voTYf6n8_6aUhDSUOfy.a2z5vgSwlHYq9IvWGj3LuIw32DO0HcDW_yICnwNQSWKsmr9e1.SAjWwMY; + path=/; expires=Wed, 19-Mar-25 21:26:52 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=QA9rcm0xWUBveoAGWFSSbnWbR50iE8_T0TS2HZr70GQ-1742417812613-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '284' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999964' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 1ms + x-request-id: + - req_4f430f54186e98254720edf4049f781c + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/atext_completion_stream.yaml b/tests/contrib/litellm/cassettes/atext_completion_stream.yaml new file mode 100644 index 00000000000..bfb6b17bb6d --- /dev/null +++ b/tests/contrib/litellm/cassettes/atext_completion_stream.yaml @@ -0,0 +1,141 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '132' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":10,"total_tokens":19,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 922fd11f3abc6fd4-IAD + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Wed, 19 Mar 2025 20:46:02 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=V_JLXb_xlWAgshE0jP2yNY0KBReG3a8K9k.1.dtFMCg-1742417162-1.0.1.1-FZF9YKksh6mPiowW_hJcxYsQgyc8V.sjUl892Qq3mA7LD_3uGUbH7U.DjmnY8HxjecXeWIVp3wTlLleq10jNmS8WvmrJg76.LaSNBV6tQ4U; + path=/; expires=Wed, 19-Mar-25 21:16:02 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=WqtIXB3UQHUJ15_13eNn1X9VBnzvnBe8zPBAhTHs9K0-1742417162393-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '163' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999979' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_e2fdb82e16db2663c51b4ae5540e74e5 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices new file mode 100644 index 00000000000..7980c0ce553 --- /dev/null +++ b/tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices @@ -0,0 +1,180 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '138' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":20,"total_tokens":29,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 922fe0faab2cc9bb-IAD + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Wed, 19 Mar 2025 20:56:51 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=T4a2h0oJlCPxhohnl5Y8ce47sMTKxTSiMx4YnMHdCeE-1742417811-1.0.1.1-7h_MyNOSh.23MJSDS07CHWQSCd54y1UXY03vB9MV_upmjlqus6.JWajf9T9VvkjWxvSy_46nZsEU.neeA_2Ok7EBScbstRENB_le3WcBoCs; + path=/; expires=Wed, 19-Mar-25 21:26:51 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=s1wC2SePnrWoNLlq9w_9HSceo63N6ZY6bl87NsDSz7U-1742417811921-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '183' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999964' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 1ms + x-request-id: + - req_947acd4ad0207061c681399bb70031ac + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion.yaml b/tests/contrib/litellm/cassettes/completion.yaml new file mode 100644 index 00000000000..8b1c5e95ffb --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion.yaml @@ -0,0 +1,107 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '83' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jJJNb9swDIbv/hWEzknQfGFbLgPSAUUvvXTDDkNhKDJjq5FFRaTaBUX+ + +yAnjd21A3bRgQ9fii/JlwJA2UqtQJlGi2mDG6+v080u/Vi399c3e6zx2634u/XefN88870aZQVt + HtHIq2piqA0OxZI/YRNRC+aq00+L2WK6/DJfdKClCl2W1UHG88lyLCluaHw1nS3PyoasQVYr+FUA + ALx0b+7RV/hbreBq9BppkVnXqFaXJAAVyeWI0syWRXtRox4a8oK+a/uOBNpkmhE8JhZoMCIIwUkG + B0rwbKUB7Q+wT8jZGgNFEM077nirD9DoJ5zAz0YLGO3hFhp04aL+Ovw74jaxzt59cm4AtPckuvsg + u344k+PFp6M6RNrwX1K1td5yU0bUTD57YqGgOnosAB66eaY3I1IhUhukFNph9910fiqn+g32cLY8 + QyHRro/PP48+qFZWKNo6HuxDGW0arHplvzydKksDUAw8v2/mo9on39bX/1O+B8ZgEKzKELGy5q3h + Pi1ivu9/pV1m3DWsGOOTNViKxZj3UOFWJ3e6PMUHFmzLrfU1xhBtd355j8Wx+AMAAP//AwCIlZxL + fQMAAA== + headers: + CF-RAY: + - 922fb3249bf7e5c3-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 19 Mar 2025 20:25:34 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=j1UnG6U2N_m2w7_ZZs_IcvkLv3zZHG2XAZJvgnwKIX4-1742415934-1.0.1.1-3q9GhK0Y_WF039ZDEh.fvsi1rJCJJCq6hn15xodxGjAHi3Ard0mxXU2Ae709hsZPlid0d8teeWknL0oS5TuPPsBimWG1qT6LRUFmo1J3pTU; + path=/; expires=Wed, 19-Mar-25 20:55:34 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=YonKtY9iy9G7TG4c1XY31ZGLtQkwk9WEsAUQm55fiHs-1742415934844-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '536' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999978' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_831c50d74f79d3c6f55b46c6165ad726 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/completion.yaml_multiple_choices new file mode 100644 index 00000000000..3284d55e07f --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion.yaml_multiple_choices @@ -0,0 +1,107 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '89' + content-type: + - application/json + cookie: + - _cfuvid=YonKtY9iy9G7TG4c1XY31ZGLtQkwk9WEsAUQm55fiHs-1742415934844-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA8RTTWsbMRC9768YdLaDHTs49aU0oV+UFkpvKWGRpfGuYq1GSKMkJuS/F2kd76ZJ + oZfSyx7mzZt9783ooQIQRos1CNVKVp2304vLtLs/t1fvLr/srn7suu9f31y8d/Zjsh/wVkwygzY3 + qPiJdaKo8xbZkOthFVAy5qnz1fJ0OV+dz1YF6EijzbTG83RxcjblFDY0nc1Pzw7MlozCKNbwswIA + eCjfrNFpvBdrmE2eKh3GKBsU62MTgAhkc0XIGE1k6VhMBlCRY3RF9jdi6JJqJ3CTIkOLAYEJihv4 + RHcgA8Ke0tsxP+A2RZn1u2TtCJDOEcvsvyi/PiCPR62WGh9oE3+jiq1xJrZ1QBnJZV2RyYtqRH4R + wPyfBtDTsnO4M9yCdHtujWtKxSHqPh0lHXyGFq0vAJOW+/8YVQVwXc4nPQtE+ECd55pph+V380U/ + TgwHO4CL5QFkYmmH+nI1eWVarZGlsXGUvlBStagH5nCrMmlDI2C84ZdiXpvd+zau+ZvxA6AUekZd + +4DaqOeGh7aA+Tn/qe2YcREsIoZbo7BmgyHvQeNWJtvfmYj7yNjVW+MaDD6Ycmx5j9Vj9QsAAP// + AwBjQR3ubAQAAA== + headers: + CF-RAY: + - 922fe0dd3980081e-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 19 Mar 2025 20:56:47 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=4IvKUKDYbguDfkct3LJSPjqtZQXESBNXlj0FYX2EhDw-1742417807-1.0.1.1-NheqJGPsBtnZt86lvpEQ399jpX9C0.Meer7zqTrBFvtM1nDS.F2nb3Am2CumeUA9gl3hKjHDRDn2VRJSIEJL1F4Ki3Doz2f86LPah_teN_M; + path=/; expires=Wed, 19-Mar-25 21:26:47 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '429' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999963' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 1ms + x-request-id: + - req_039283032c4707703c924619286ae6b1 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream.yaml b/tests/contrib/litellm/cassettes/completion_stream.yaml new file mode 100644 index 00000000000..8b9ee2258a0 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream.yaml @@ -0,0 +1,181 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '137' + content-type: + - application/json + cookie: + - __cf_bm=j1UnG6U2N_m2w7_ZZs_IcvkLv3zZHG2XAZJvgnwKIX4-1742415934-1.0.1.1-3q9GhK0Y_WF039ZDEh.fvsi1rJCJJCq6hn15xodxGjAHi3Ard0mxXU2Ae709hsZPlid0d8teeWknL0oS5TuPPsBimWG1qT6LRUFmo1J3pTU; + _cfuvid=YonKtY9iy9G7TG4c1XY31ZGLtQkwk9WEsAUQm55fiHs-1742415934844-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + much"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + just"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + here"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + with"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + anything"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + need"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + help"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":21,"total_tokens":34,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 922fbf33c84f05d4-IAD + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Wed, 19 Mar 2025 20:33:49 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '484' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999978' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_ed79ffc55abcad13d82ad45a85b50cef + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices new file mode 100644 index 00000000000..71352afe403 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices @@ -0,0 +1,234 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '143' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + much"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + much"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + just"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + just"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + here"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + here"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + chat"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + help"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + with"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + anything"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + about"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + might"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + need"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":34,"total_tokens":47,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 922fe0d84bc32081-IAD + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Wed, 19 Mar 2025 20:56:46 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=bUH6Gw.1xWGTdrtJJp3Kd1QwQu.citY3dSw84SuP9a8-1742417806-1.0.1.1-nd3_tPgN5caA927YXL7MDwbkwDcsY2.cOUvLdkUaaYxi7UqUPwCwwGgjDsSpkg1AHFx7aR.wS8GKU2eBr2aujsFMmkWLmL_ohd4qBtE6K84; + path=/; expires=Wed, 19-Mar-25 21:26:46 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=spcOdmzqi_GTX7NW.qMkPKuD0G7qOz1ab6PGnOf_m_s-1742417806409-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '172' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999962' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 1ms + x-request-id: + - req_68b16a7f5aec94206951718ec91e4166 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/text_completion.yaml b/tests/contrib/litellm/cassettes/text_completion.yaml new file mode 100644 index 00000000000..80fd89bcb39 --- /dev/null +++ b/tests/contrib/litellm/cassettes/text_completion.yaml @@ -0,0 +1,106 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '78' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFLLbtswELzrK7Z7tgM7jtvGlwINiqSPQ9Fji0CgyZXEhOIS5KqtEfjf + C0q2JbcNkIsOOzujmeE+FQBoDW4AdaNEt8HN3990tw/fFu6Tflvx6sutbR4/7D5///q6dTcOZ5nB + 2wfScmRdaG6DI7HsB1hHUkJZdfnm6vJqub5erXqgZUMu0+og89XFei5d3PJ8sbxcH5gNW00JN/Cj + AAB46r/Zozf0GzewmB0nLaWkasLNaQkAI7s8QZWSTaK84GwENXsh39u+I+f4FdzxL9DKw0cYCLDj + DoSN2r2bEiNVXVLZuO+cmwDKexaVg/eW7w/I/mTScR0ib9NfVKyst6kpI6nEPhtKwgF7dF8A3Pdl + dGf5MERug5TCj9T/7npQw7H9EVseekJhUW4yP5LOxEpDoqxLky5RK92QGZlj8aozlidAMYn8r5n/ + aQ+xra9fIj8CWlMQMmWIZKw+DzyuRcq3+dzaqeLeMCaKP62mUizF/AyGKtW54Wow7ZJQW1bW1xRD + tP3p5Gcs9sUfAAAA//8DAEy0bTM5AwAA + headers: + CF-RAY: + - 922fb31c6b4a3b86-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 19 Mar 2025 20:25:33 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=OVR0c7pmjNlvkgITOy_5zxiGhdaeoh2rYi0sMIwKUGw-1742415933-1.0.1.1-9ldeCN1Z0Gzz63GmLhFMPkykl_aiDMZHh9jdn_aB8Mwaq8j8c3UX0EJL_RDMTdRgRFAjD7RDiSRhuM45kkZ8yvViyheANvDky_wdNXC7pmg; + path=/; expires=Wed, 19-Mar-25 20:55:33 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=pYi67r3mdZAGs2TdjYlNetH8he_L8lXNKD2EhBsZw4s-1742415933301-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '310' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999980' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_f68897fc201c7e0cbeac660a95b4e136 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices new file mode 100644 index 00000000000..743d1a085e2 --- /dev/null +++ b/tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices @@ -0,0 +1,106 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '84' + content-type: + - application/json + cookie: + - _cfuvid=pYi67r3mdZAGs2TdjYlNetH8he_L8lXNKD2EhBsZw4s-1742415933301-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA9xTwW4TMRC971cMc06qbGjUNhdEikSBWw9cULVy7Mmuweux7FlKqPrvyLtJdkNB + 4szFh3nzxu95np8KALQG14C6UaLb4Oab286VPzcf79/fNtvN4nH5rnt7f/1Z6NMqRJxlBm+/kpYj + 60JzGxyJZT/AOpISylPLq8vlZXl1XZY90LIhl2l1kPnri9Vcurjl+aJcrg7Mhq2mhGv4UgAAPPVn + 1ugN/cA1LGbHSkspqZpwfWoCwMguV1ClZJMoLzgbQc1eyPey78g5fgV3/AhaefgAAwH23IGwUfs3 + U2KkXZdUFu475yaA8p5FZeO95IcD8nwS6bgOkbfpNyrurLepqSKpxD4LSsIBiwn5hfPy/3deADz0 + MejO/GGI3AaphL9Rf93NMA3H3I3Y8pAQFBblJvUj6WxYZUiUdWnylqiVbsiMzDFyqjOWJ8B0Xy/F + /Gn2YNv6+l/Gj4DWFIRMFSIZq88Nj22R8q/8W9vpiXvBmCh+t5oqsRTzGgztVOeG1GDaJ6G22llf + UwzR9tHJayyei18AAAD//wMABa0m/DMEAAA= + headers: + CF-RAY: + - 922fe0f67e45081e-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 19 Mar 2025 20:56:51 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=FlHOj93lCOi0G8vOplzZPeLSW3Vdv0Hb2HoLy7MM6Us-1742417811-1.0.1.1-kahp3aLVlFkNG2jXCdMC.nUEAogyYTUDKmeLcLdMShR8EnclldclSdDIIWq6EgH9RGnwBGy5.NfkQ9REHjpAGzXjaNfX3IBb1LAF6cQGWOE; + path=/; expires=Wed, 19-Mar-25 21:26:51 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '347' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999963' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 1ms + x-request-id: + - req_0a1d4fa700dec9fa2f39b6a53af4d9c2 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/text_completion_stream.yaml b/tests/contrib/litellm/cassettes/text_completion_stream.yaml new file mode 100644 index 00000000000..bc9e6f2efc4 --- /dev/null +++ b/tests/contrib/litellm/cassettes/text_completion_stream.yaml @@ -0,0 +1,133 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '132' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + are"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":8,"total_tokens":17,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 922fd11b9ae3c971-IAD + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Wed, 19 Mar 2025 20:46:01 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=burGHM6VfMUi7tY4F9HPOZGLPGxfqoQv0QDudHYBzAI-1742417161-1.0.1.1-MC78dEOJbnnU73JGAYQb93S4ex3NPLLM70pH_.FPPXa1FeTqG9rlyPAZyx58mF.Z5aEhuPWTN45xS8eCJsKzuP20WOjvGlJAgW166y3SdTs; + path=/; expires=Wed, 19-Mar-25 21:16:01 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=ReP56wFcIwz4J_wQ9KjzLau5vHIddwYVpMetFMYh57w-1742417161882-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '226' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999980' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_6c0123908864d876c53702ce7507e69e + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices new file mode 100644 index 00000000000..62b0b934b66 --- /dev/null +++ b/tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices @@ -0,0 +1,177 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '138' + content-type: + - application/json + cookie: + - _cfuvid=ReP56wFcIwz4J_wQ9KjzLau5vHIddwYVpMetFMYh57w-1742417161882-0.0.1.1-604800000; + __cf_bm=burGHM6VfMUi7tY4F9HPOZGLPGxfqoQv0QDudHYBzAI-1742417161-1.0.1.1-MC78dEOJbnnU73JGAYQb93S4ex3NPLLM70pH_.FPPXa1FeTqG9rlyPAZyx58mF.Z5aEhuPWTN45xS8eCJsKzuP20WOjvGlJAgW166y3SdTs + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.66.5 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.5 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":20,"total_tokens":29,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 922fe0f349d2081e-IAD + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Wed, 19 Mar 2025 20:56:50 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '172' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999963' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 1ms + x-request-id: + - req_775dd461f5f264e40aeb0c5ab23fe071 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py new file mode 100644 index 00000000000..590b1986652 --- /dev/null +++ b/tests/contrib/litellm/conftest.py @@ -0,0 +1,57 @@ +import os +from typing import Generator + +import pytest +from ddtrace.contrib.internal.litellm.patch import patch +from ddtrace.trace import Pin +from ddtrace.contrib.internal.litellm.patch import unpatch +from tests.utils import DummyTracer +from tests.utils import DummyWriter +from tests.utils import override_config +from tests.utils import override_env +from tests.utils import override_global_config +from tests.contrib.litellm.utils import get_request_vcr + +def default_global_config(): + return {} + + +@pytest.fixture +def ddtrace_global_config(): + return {} + +@pytest.fixture +def ddtrace_config_litellm(): + return {} + + +@pytest.fixture +def litellm(ddtrace_global_config, ddtrace_config_litellm): + global_config = default_global_config() + global_config.update(ddtrace_global_config) + with override_global_config(global_config): + with override_config("litellm", ddtrace_config_litellm): + with override_env( + dict( + OPENAI=os.getenv("OPENAI_API_KEY", ""), + ) + ): + patch() + import litellm + + yield litellm + unpatch() + + +@pytest.fixture +def mock_tracer(litellm): + pin = Pin.get_from(litellm) + mock_tracer = DummyTracer(writer=DummyWriter(trace_flush_enabled=False)) + pin._override(litellm, tracer=mock_tracer) + pin.tracer._configure() + yield mock_tracer + + +@pytest.fixture +def request_vcr(): + return get_request_vcr() diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py new file mode 100644 index 00000000000..f3ce38c872f --- /dev/null +++ b/tests/contrib/litellm/test_litellm.py @@ -0,0 +1,55 @@ +import pytest + +@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) +@pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) +def test_litellm_completion(litellm, request_vcr, stream, n): + cassette = "completion.yaml" if not stream else "completion_stream.yaml" + choice_suffix = "_multiple_choices" if n > 1 else "" + with request_vcr.use_cassette(cassette + choice_suffix): + messages = [{ "content": "Hey, what is up?","role": "user"}] + litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + stream=stream, + n=n, + ) + +@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) +@pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) +async def test_litellm_acompletion(litellm, request_vcr, stream, n): + cassette = "acompletion.yaml" if not stream else "acompletion_stream.yaml" + choice_suffix = "_multiple_choices" if n > 1 else "" + with request_vcr.use_cassette(cassette + choice_suffix): + messages = [{ "content": "Hey, what is up?","role": "user"}] + await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + stream=stream, + n=n, + ) + +@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) +@pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) +def test_litellm_text_completion(litellm, request_vcr, stream, n): + cassette = "text_completion.yaml" if not stream else "text_completion_stream.yaml" + choice_suffix = "_multiple_choices" if n > 1 else "" + with request_vcr.use_cassette(cassette + choice_suffix): + litellm.text_completion( + model="gpt-3.5-turbo", + prompt="Hello world", + stream=stream, + n=n, + ) + +@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) +@pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) +async def test_litellm_atext_completion(litellm, request_vcr, stream, n): + cassette = "atext_completion.yaml" if not stream else "atext_completion_stream.yaml" + choice_suffix = "_multiple_choices" if n > 1 else "" + with request_vcr.use_cassette(cassette + choice_suffix): + await litellm.atext_completion( + model="gpt-3.5-turbo", + prompt="Hello world", + stream=stream, + n=n, + ) diff --git a/tests/contrib/litellm/test_litellm_patch.py b/tests/contrib/litellm/test_litellm_patch.py new file mode 100644 index 00000000000..4946aeda49d --- /dev/null +++ b/tests/contrib/litellm/test_litellm_patch.py @@ -0,0 +1,30 @@ +from ddtrace.contrib.internal.litellm.patch import get_version +from ddtrace.contrib.internal.litellm.patch import patch +from ddtrace.contrib.internal.litellm.patch import unpatch +from tests.contrib.patch import PatchTestCase + + +class TestLitellmPatch(PatchTestCase.Base): + __integration_name__ = "litellm" + __module_name__ = "litellm" + __patch_func__ = patch + __unpatch_func__ = unpatch + __get_version__ = get_version + + def assert_module_patched(self, litellm): + self.assert_wrapped(litellm.completion) + self.assert_wrapped(litellm.acompletion) + self.assert_wrapped(litellm.text_completion) + self.assert_wrapped(litellm.atext_completion) + + def assert_not_module_patched(self, litellm): + self.assert_not_wrapped(litellm.completion) + self.assert_not_wrapped(litellm.acompletion) + self.assert_not_wrapped(litellm.text_completion) + self.assert_not_wrapped(litellm.atext_completion) + + def assert_not_module_double_patched(self, litellm): + self.assert_not_double_wrapped(litellm.completion) + self.assert_not_double_wrapped(litellm.acompletion) + self.assert_not_double_wrapped(litellm.text_completion) + self.assert_not_double_wrapped(litellm.atext_completion) diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py new file mode 100644 index 00000000000..8b140d6cd0c --- /dev/null +++ b/tests/contrib/litellm/utils.py @@ -0,0 +1,15 @@ +import vcr +import os + +# VCR is used to capture and store network requests made to Anthropic. +# This is done to avoid making real calls to the API which could introduce +# flakiness and cost. +def get_request_vcr(): + return vcr.VCR( + cassette_library_dir=os.path.join(os.path.dirname(__file__), "cassettes"), + record_mode="once", + match_on=["path"], + filter_headers=["authorization", "x-api-key", "api-key"], + # Ignore requests to the agent + ignore_localhost=True, + ) \ No newline at end of file diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json new file mode 100644 index 00000000000..fc0b82fadf8 --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json @@ -0,0 +1,28 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.completion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67db283e00000000", + "language": "python", + "litellm.request.model": "gpt-3.5-turbo", + "litellm.request.provider": "", + "runtime-id": "f4fa019846d24cc9a50c88d550a339dd" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 18466 + }, + "duration": 737978000, + "start": 1742415934103250000 + }]] From 7e43133d642cd2ace84da7037b7a185db0d7c4df Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 20 Mar 2025 09:26:30 -0400 Subject: [PATCH 03/61] reuse cassettes that are similar across different litellm requests --- .../litellm/cassettes/acompletion.yaml | 106 -------- .../acompletion.yaml_multiple_choices | 107 -------- .../litellm/cassettes/acompletion_stream.yaml | 145 ----------- .../litellm/cassettes/atext_completion.yaml | 106 -------- .../atext_completion.yaml_multiple_choices | 106 -------- .../cassettes/atext_completion_stream.yaml | 141 ----------- ...xt_completion_stream.yaml_multiple_choices | 180 -------------- .../contrib/litellm/cassettes/completion.yaml | 41 ++- ...oices => completion_multiple_choices.yaml} | 37 ++- .../litellm/cassettes/completion_stream.yaml | 85 ++++--- .../completion_stream.yaml_multiple_choices | 234 ------------------ ...> completion_stream_multiple_choices.yaml} | 149 +++++------ .../litellm/cassettes/text_completion.yaml | 106 -------- .../text_completion.yaml_multiple_choices | 106 -------- .../cassettes/text_completion_stream.yaml | 133 ---------- ...xt_completion_stream.yaml_multiple_choices | 177 ------------- tests/contrib/litellm/test_litellm.py | 18 +- tests/contrib/litellm/utils.py | 12 +- 18 files changed, 175 insertions(+), 1814 deletions(-) delete mode 100644 tests/contrib/litellm/cassettes/acompletion.yaml delete mode 100644 tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices delete mode 100644 tests/contrib/litellm/cassettes/acompletion_stream.yaml delete mode 100644 tests/contrib/litellm/cassettes/atext_completion.yaml delete mode 100644 tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices delete mode 100644 tests/contrib/litellm/cassettes/atext_completion_stream.yaml delete mode 100644 tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices rename tests/contrib/litellm/cassettes/{completion.yaml_multiple_choices => completion_multiple_choices.yaml} (60%) delete mode 100644 tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices rename tests/contrib/litellm/cassettes/{acompletion_stream.yaml_multiple_choices => completion_stream_multiple_choices.yaml} (55%) delete mode 100644 tests/contrib/litellm/cassettes/text_completion.yaml delete mode 100644 tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices delete mode 100644 tests/contrib/litellm/cassettes/text_completion_stream.yaml delete mode 100644 tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices diff --git a/tests/contrib/litellm/cassettes/acompletion.yaml b/tests/contrib/litellm/cassettes/acompletion.yaml deleted file mode 100644 index 1ef458cb6cf..00000000000 --- a/tests/contrib/litellm/cassettes/acompletion.yaml +++ /dev/null @@ -1,106 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo"}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '83' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - AsyncOpenAI/Python 1.66.5 - x-stainless-arch: - - arm64 - x-stainless-async: - - async:asyncio - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.66.5 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAAwAAAP//jFLBahsxEL3vV0x16cU2cew4jS+B5NBATaEk0EIJiyzN7irVaoQ0cmuC - /71o7XjXbQK56DBv3tN7M/NcAAijxRKEaiSr1tvxzW2636x89bCd39+mL9/iQ9h8Xq8WP1Z3qhKj - zKD1Eyp+YU0Utd4iG3J7WAWUjFl1ejk/n08Xi09XHdCSRptptefxbHIx5hTWND6bnl8cmA0ZhVEs - 4WcBAPDcvdmj0/hHLOFs9FJpMUZZo1gemwBEIJsrQsZoIkvHYtSDihyj62zf4Ra4wYAf4CsxtEk1 - I3hKkaGRrjauBko8ge+N5I8RkoffhhvYUroeCgasUpQ5kEvWDgDpHLHMA+miPB6Q3dG8pdoHWsd/ - qKIyzsSmDCgjuWw0MnnRobsC4LEbUjrJLXyg1nPJ9Au776azvZzo1zIALw8gE0vb12eHwZ6qlRpZ - GhsHQxZKqgZ1z+w3IpM2NACKQeb/zbymvc9tXP0e+R5QCj2jLn1AbdRp4L4tYD7at9qOM+4Mi4hh - YxSWbDDkPWisZLL7cxJxGxnbsjKuxuCD6W4q77HYFX8BAAD//wMAtRqFIFIDAAA= - headers: - CF-RAY: - - 922fc592ca8ee61b-IAD - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Wed, 19 Mar 2025 20:38:09 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=Og1MhUPOSLUYFX6sLOZjXUt6_Ii7DeHec6bu0xEwveU-1742416689-1.0.1.1-24FPrr2zTb6eb.iYCyCZ5tyNDAaMwFjbJQ6MuU6ZZfarYDu945JdhKsS.h0Vc5bvUu4cEVkN072A15WuJ.KstgoBD.hgm.Owir7t6Mfrs4A; - path=/; expires=Wed, 19-Mar-25 21:08:09 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=igKx8CPA0FCh5KeOwdnLy585rLuR2kKt7gGASW5nWSA-1742416689508-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - datadog-4 - openai-processing-ms: - - '312' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '15000' - x-ratelimit-limit-tokens: - - '2000000' - x-ratelimit-remaining-requests: - - '14999' - x-ratelimit-remaining-tokens: - - '1999978' - x-ratelimit-reset-requests: - - 4ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_c35f7392746227673bdca3d80b74722e - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices deleted file mode 100644 index 69fe82a0cc4..00000000000 --- a/tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices +++ /dev/null @@ -1,107 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '89' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - AsyncOpenAI/Python 1.66.5 - x-stainless-arch: - - arm64 - x-stainless-async: - - async:asyncio - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.66.5 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAA8RTwW7bMAy9+ysInZMgTho0y2VYOwwdMHSnYYehMBSJsdXKoipRa7Oi/z7ITmNn - 3YBdhl184ON7fnykngoAYbTYgFCNZNV6O724THc/vnxaf31/9eH68l7rz/PHhb+450TvSjHJDNre - ouIX1kxR6y2yIdfDKqBkzKrl+dnirDxfz990QEsababVnqfL2WrKKWxpOi8XqwOzIaMwig18KwAA - nrpv9ug0PooNzCcvlRZjlDWKzbEJQASyuSJkjCaydCwmA6jIMbrO9jUxtEk1E7hNkaHBgMAEPQ32 - lGZwRQ+gpIOP0KD1uQZMWu7fjiUD7lKUeSSXrB0B0jlimSPphrk5IM9H+5ZqH2gbf6GKnXEmNlVA - Gcllq5HJi2JEfpVJ+U8zOU7/YLgB6fbcGFd3FYeox0EN8f33qAqAm+6i0kkgwgdqPVdMd9j9rlz2 - cmK44QFcrg8gE0s71FeHxE/VKo0sjY2j9IWSqkE9MIfzlUkbGgHjDb828zvtfm7j6r+RHwCl0DPq - ygfURp0OPLQFzC/8T23HjDvDImL4bhRWbDDkPWjcyWT7OxNxHxnbamdcjcEH0x1b3mPxXPwEAAD/ - /wMAj6bv838EAAA= - headers: - CF-RAY: - - 922fe0ef9f095818-IAD - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Wed, 19 Mar 2025 20:56:50 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=aPjfuM9i_nmKcJm5fCd1envIdCwOQVscUCICrBTkJYU-1742417810-1.0.1.1-ADp9ZNwLfBcVhfTOqZ9bjV6taFMKE6YhPfsBJvKDzRAsgzcYNT1xHhcwjuBPt.NhHPZYGalO3QieMp1UU1gMwYJMN1JlwKpDJS5CoYVFdps; - path=/; expires=Wed, 19-Mar-25 21:26:50 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=vDmdz8TD9L7irSgxD2Y3qy54hE9HNQW7QTl3s0jxFGk-1742417810365-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - datadog-4 - openai-processing-ms: - - '404' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '15000' - x-ratelimit-limit-tokens: - - '2000000' - x-ratelimit-remaining-requests: - - '14999' - x-ratelimit-remaining-tokens: - - '1999962' - x-ratelimit-reset-requests: - - 4ms - x-ratelimit-reset-tokens: - - 1ms - x-request-id: - - req_a2320c1466e6e6e7e4ca227666ddef05 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/acompletion_stream.yaml b/tests/contrib/litellm/cassettes/acompletion_stream.yaml deleted file mode 100644 index eaa66f573f1..00000000000 --- a/tests/contrib/litellm/cassettes/acompletion_stream.yaml +++ /dev/null @@ -1,145 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '137' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - AsyncOpenAI/Python 1.66.5 - x-stainless-arch: - - arm64 - x-stainless-async: - - async:asyncio - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.66.5 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: 'data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - much"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - how"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - can"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - I"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - assist"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - you"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - today"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} - - - data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":11,"total_tokens":24,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} - - - data: [DONE] - - - ' - headers: - CF-RAY: - - 922fc595fb4f9c5e-IAD - Connection: - - keep-alive - Content-Type: - - text/event-stream; charset=utf-8 - Date: - - Wed, 19 Mar 2025 20:38:09 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=qJIrfX0cSgQGfLPmiIO9pJtmBk4d0TDxYojb3KwrOAQ-1742416689-1.0.1.1-H5.hTje2ckYwcrDNDdl7MVOpEbekK8vcdWlkX69z8CQtZFgoHg3xBQ2p0ijtOfOZoSsO.dkOlaQsLHQLTsnsPz5Ku2XJpBJx48ai9xyu_b4; - path=/; expires=Wed, 19-Mar-25 21:08:09 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=L6iVhGFB5poEf4mgyHgsiQer_LAFpmPwx7BsZksUMxA-1742416689952-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - datadog-4 - openai-processing-ms: - - '174' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '15000' - x-ratelimit-limit-tokens: - - '2000000' - x-ratelimit-remaining-requests: - - '14999' - x-ratelimit-remaining-tokens: - - '1999979' - x-ratelimit-reset-requests: - - 4ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_7d0818d359483bfb4ae5dd041857b220 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/atext_completion.yaml b/tests/contrib/litellm/cassettes/atext_completion.yaml deleted file mode 100644 index dd13fc8e98e..00000000000 --- a/tests/contrib/litellm/cassettes/atext_completion.yaml +++ /dev/null @@ -1,106 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo"}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '78' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - AsyncOpenAI/Python 1.66.5 - x-stainless-arch: - - arm64 - x-stainless-async: - - async:asyncio - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.66.5 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAAwAAAP//jFLBTuMwEL3nKwafKWq6LYheVoJL97x7Qyhy7Uni4ngse7K0Qv13ZKc0 - 6QLSXnyYN+/5vZl5KwCE0WINQrWSVeft7OGx//26n2+Wrlwc9n/cQ1PXZFd3tGp2G3GdGLTdoeIP - 1o2izltkQ26AVUDJmFTLu+ViWd7e3s8z0JFGm2iN59mPm9WM+7Cl2bxcrE7MlozCKNbwVAAAvOU3 - eXQa92INWSdXOoxRNijW5yYAEcimipAxmsjSsbgeQUWO0WXbG7SWgFsMeAUbegUlHfyCgQYH6oFJ - y8PPKT1g3UeZ7Lve2gkgnSOWKX42/nxCjmerlhofaBv/oYraOBPbKqCM5JKtyORFRo8FwHMeSX+R - UvhAneeK6QXzd/eDmhh3MGJleQKZWNqxvjhN8VKs0sjS2DiZqFBStahH5jh+2WtDE6CYRP5s5ivt - IbZxzf/Ij4BS6Bl15QNqoy4Dj20B04V+13YecTYsIoa/RmHFBkNag8Za9na4HREPkbGrauMaDD6Y - fEBpjcWxeAcAAP//AwBZaYouPwMAAA== - headers: - CF-RAY: - - 922fc59a1ac781f9-IAD - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Wed, 19 Mar 2025 20:38:10 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=Bz9ucbRhHir1yYpu5unXOWIq57sqhNmDYXwU8KYxhHk-1742416690-1.0.1.1-SaoVbDJEo.BTnLOYloqvADWkOFfKvIPkYdRataztswTUOadi9nBdpnxOrxqBTCDld5JP_w__0pINDjdJi4sSxdlMlK3f0SG9r54Vxu19sPQ; - path=/; expires=Wed, 19-Mar-25 21:08:10 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=CocuhkG0h7KH_xyamzsz3.bJ.3F_Hbx3vpGagUoqjNk-1742416690671-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - datadog-4 - openai-processing-ms: - - '292' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '15000' - x-ratelimit-limit-tokens: - - '2000000' - x-ratelimit-remaining-requests: - - '14999' - x-ratelimit-remaining-tokens: - - '1999979' - x-ratelimit-reset-requests: - - 4ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_a2a6419c8a6accc5e78eef471a5b26f5 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices deleted file mode 100644 index 7c33ea81265..00000000000 --- a/tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices +++ /dev/null @@ -1,106 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '84' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - AsyncOpenAI/Python 1.66.5 - x-stainless-arch: - - arm64 - x-stainless-async: - - async:asyncio - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.66.5 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAA9xTsW7bMBDd9RXXm+0gUmIk8RK0XVIU6dAOHYpAoMmzzJTiMeSpqRHk3wtKtiUn - LdA5C4d7947v8R6fCgC0BpeAeqNEt8HNP3zsXPX965VpzuRRPXxZPNxe3r6vWrr/LN9wlhm8uict - e9aJ5jY4Est+gHUkJZSnlhfn1Xl5cVlWPdCyIZdpTZD52cliLl1c8fy0rBY75oatpoRL+FEAADz1 - Z9boDf3GJZzO9pWWUlIN4fLQBICRXa6gSskmUV5wNoKavZDvZd+Qc/wObvgRtPLwCQYCbLkDYaO2 - 11NipHWXVBbuO+cmgPKeRWXjveS7HfJ8EOm4CZFX6QUV19bbtKkjqcQ+C0rCAYsJ+ZXz8u07LwDu - +hh0R/4wRG6D1MI/qb/uapiGY+5GrNolBIVFuUl9TzoaVhsSZV2avCVqpTdkRuYYOdUZyxNguq/X - Yv42e7BtffM/40dAawpCpg6RjNXHhse2SPlX/qvt8MS9YEwUf1lNtViKeQ2G1qpzQ2owbZNQW6+t - byiGaPvo5DUWz8UfAAAA//8DALgzECozBAAA - headers: - CF-RAY: - - 922fe0fe5c567048-IAD - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Wed, 19 Mar 2025 20:56:52 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=gCkAh45BSGsfWua_FvRAhLkamJr4XgRd0lcnofceYBY-1742417812-1.0.1.1-_Ift61wmuekINzK5SeNl.ZXlL8hJS1voTYf6n8_6aUhDSUOfy.a2z5vgSwlHYq9IvWGj3LuIw32DO0HcDW_yICnwNQSWKsmr9e1.SAjWwMY; - path=/; expires=Wed, 19-Mar-25 21:26:52 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=QA9rcm0xWUBveoAGWFSSbnWbR50iE8_T0TS2HZr70GQ-1742417812613-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - datadog-4 - openai-processing-ms: - - '284' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '15000' - x-ratelimit-limit-tokens: - - '2000000' - x-ratelimit-remaining-requests: - - '14999' - x-ratelimit-remaining-tokens: - - '1999964' - x-ratelimit-reset-requests: - - 4ms - x-ratelimit-reset-tokens: - - 1ms - x-request-id: - - req_4f430f54186e98254720edf4049f781c - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/atext_completion_stream.yaml b/tests/contrib/litellm/cassettes/atext_completion_stream.yaml deleted file mode 100644 index bfb6b17bb6d..00000000000 --- a/tests/contrib/litellm/cassettes/atext_completion_stream.yaml +++ /dev/null @@ -1,141 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '132' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - AsyncOpenAI/Python 1.66.5 - x-stainless-arch: - - arm64 - x-stainless-async: - - async:asyncio - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.66.5 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: 'data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - How"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - can"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - I"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - assist"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - you"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - today"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} - - - data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":10,"total_tokens":19,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} - - - data: [DONE] - - - ' - headers: - CF-RAY: - - 922fd11f3abc6fd4-IAD - Connection: - - keep-alive - Content-Type: - - text/event-stream; charset=utf-8 - Date: - - Wed, 19 Mar 2025 20:46:02 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=V_JLXb_xlWAgshE0jP2yNY0KBReG3a8K9k.1.dtFMCg-1742417162-1.0.1.1-FZF9YKksh6mPiowW_hJcxYsQgyc8V.sjUl892Qq3mA7LD_3uGUbH7U.DjmnY8HxjecXeWIVp3wTlLleq10jNmS8WvmrJg76.LaSNBV6tQ4U; - path=/; expires=Wed, 19-Mar-25 21:16:02 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=WqtIXB3UQHUJ15_13eNn1X9VBnzvnBe8zPBAhTHs9K0-1742417162393-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - datadog-4 - openai-processing-ms: - - '163' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '15000' - x-ratelimit-limit-tokens: - - '2000000' - x-ratelimit-remaining-requests: - - '14999' - x-ratelimit-remaining-tokens: - - '1999979' - x-ratelimit-reset-requests: - - 4ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_e2fdb82e16db2663c51b4ae5540e74e5 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices deleted file mode 100644 index 7980c0ce553..00000000000 --- a/tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices +++ /dev/null @@ -1,180 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '138' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - AsyncOpenAI/Python 1.66.5 - x-stainless-arch: - - arm64 - x-stainless-async: - - async:asyncio - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.66.5 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: 'data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - How"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - How"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - can"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - can"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - I"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - I"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - assist"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - assist"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - you"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - you"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - today"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - today"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} - - - data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":20,"total_tokens":29,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} - - - data: [DONE] - - - ' - headers: - CF-RAY: - - 922fe0faab2cc9bb-IAD - Connection: - - keep-alive - Content-Type: - - text/event-stream; charset=utf-8 - Date: - - Wed, 19 Mar 2025 20:56:51 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=T4a2h0oJlCPxhohnl5Y8ce47sMTKxTSiMx4YnMHdCeE-1742417811-1.0.1.1-7h_MyNOSh.23MJSDS07CHWQSCd54y1UXY03vB9MV_upmjlqus6.JWajf9T9VvkjWxvSy_46nZsEU.neeA_2Ok7EBScbstRENB_le3WcBoCs; - path=/; expires=Wed, 19-Mar-25 21:26:51 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=s1wC2SePnrWoNLlq9w_9HSceo63N6ZY6bl87NsDSz7U-1742417811921-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - datadog-4 - openai-processing-ms: - - '183' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '15000' - x-ratelimit-limit-tokens: - - '2000000' - x-ratelimit-remaining-requests: - - '14999' - x-ratelimit-remaining-tokens: - - '1999964' - x-ratelimit-reset-requests: - - 4ms - x-ratelimit-reset-tokens: - - 1ms - x-request-id: - - req_947acd4ad0207061c681399bb70031ac - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/completion.yaml b/tests/contrib/litellm/cassettes/completion.yaml index 8b1c5e95ffb..8a48a419be1 100644 --- a/tests/contrib/litellm/cassettes/completion.yaml +++ b/tests/contrib/litellm/cassettes/completion.yaml @@ -1,6 +1,6 @@ interactions: - request: - body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo"}' + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1}' headers: accept: - application/json @@ -9,9 +9,12 @@ interactions: connection: - keep-alive content-length: - - '83' + - '89' content-type: - application/json + cookie: + - __cf_bm=8uxWeZujXJxknKPHUVTv93owTsLP6A_7S9rmPUqYFqM-1742476858-1.0.1.1-Z1890C3uorOQnRF57DQFSmxnNXHE0TaP_oXxoolxhkaNxA2pyytE3307uN6CaZ3u9yu9ztH.3.HRNfSprzJA4o2PVittzgtZ6Bf16p_4omM; + _cfuvid=XCe0nOmJMT2AZm.ZO6Nwin7Mu3h3tYIlybA60Pa1myk-1742476858288-0.0.1.1-604800000 host: - api.openai.com user-agent: @@ -41,18 +44,18 @@ interactions: response: body: string: !!binary | - H4sIAAAAAAAAAwAAAP//jJJNb9swDIbv/hWEzknQfGFbLgPSAUUvvXTDDkNhKDJjq5FFRaTaBUX+ - +yAnjd21A3bRgQ9fii/JlwJA2UqtQJlGi2mDG6+v080u/Vi399c3e6zx2634u/XefN88870aZQVt - HtHIq2piqA0OxZI/YRNRC+aq00+L2WK6/DJfdKClCl2W1UHG88lyLCluaHw1nS3PyoasQVYr+FUA - ALx0b+7RV/hbreBq9BppkVnXqFaXJAAVyeWI0syWRXtRox4a8oK+a/uOBNpkmhE8JhZoMCIIwUkG - B0rwbKUB7Q+wT8jZGgNFEM077nirD9DoJ5zAz0YLGO3hFhp04aL+Ovw74jaxzt59cm4AtPckuvsg - u344k+PFp6M6RNrwX1K1td5yU0bUTD57YqGgOnosAB66eaY3I1IhUhukFNph9910fiqn+g32cLY8 - QyHRro/PP48+qFZWKNo6HuxDGW0arHplvzydKksDUAw8v2/mo9on39bX/1O+B8ZgEKzKELGy5q3h - Pi1ivu9/pV1m3DWsGOOTNViKxZj3UOFWJ3e6PMUHFmzLrfU1xhBtd355j8Wx+AMAAP//AwCIlZxL - fQMAAA== + H4sIAAAAAAAAAwAAAP//jFJNaxsxEL3vrxh0tk38VQdfSk0PpZCGXkqhhEWWZleytRpVmq3rBP/3 + orXj3aQp9KLDvHlP897MUwEgrBZrEMpIVk1w483HD6v774+z3eawr+p7vfm6f7z7bO7w24J+ilFm + 0HaHip9ZE0VNcMiW/BlWESVjVp2uFrPF6t3t8rYDGtLoMq0OPJ5PlmNu45bGN9PZ8sI0ZBUmsYYf + BQDAU/fmGb3G32INN6PnSoMpyRrF+toEICK5XBEyJZtYehajHlTkGX039hdiaFplRrBrE4PBiMAE + 2Q1Ir8GgC3CwbED6IxvrazhSC42tDYNH1BP4RAeQEbu6Juvr98O/IlZtktmrb50bANJ7Ypmz6lw+ + XJDT1ZejOkTapldUUVlvkykjykQ+e0hMQXToqQB46PJrX0QiQqQmcMm0x+676fwsJ/qN9eBsegGZ + WLq+Pl+M3lArNbK0Lg3yF0oqg7pn9suSrbY0AIqB57+HeUv77Nv6+n/ke0ApDIy6DBG1VS8N920R + 8z3/q+2acTewSBh/WYUlW4x5Dxor2brzpYl0TIxNWVlfYwzRdueW91icij8AAAD//wMACIVjLG0D + AAA= headers: CF-RAY: - - 922fb3249bf7e5c3-IAD + - 9235828e7e930798-IAD Connection: - keep-alive Content-Encoding: @@ -60,15 +63,9 @@ interactions: Content-Type: - application/json Date: - - Wed, 19 Mar 2025 20:25:34 GMT + - Thu, 20 Mar 2025 13:20:59 GMT Server: - cloudflare - Set-Cookie: - - __cf_bm=j1UnG6U2N_m2w7_ZZs_IcvkLv3zZHG2XAZJvgnwKIX4-1742415934-1.0.1.1-3q9GhK0Y_WF039ZDEh.fvsi1rJCJJCq6hn15xodxGjAHi3Ard0mxXU2Ae709hsZPlid0d8teeWknL0oS5TuPPsBimWG1qT6LRUFmo1J3pTU; - path=/; expires=Wed, 19-Mar-25 20:55:34 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=YonKtY9iy9G7TG4c1XY31ZGLtQkwk9WEsAUQm55fiHs-1742415934844-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None Transfer-Encoding: - chunked X-Content-Type-Options: @@ -82,7 +79,7 @@ interactions: openai-organization: - datadog-4 openai-processing-ms: - - '536' + - '433' openai-version: - '2020-10-01' strict-transport-security: @@ -94,13 +91,13 @@ interactions: x-ratelimit-remaining-requests: - '14999' x-ratelimit-remaining-tokens: - - '1999978' + - '1999994' x-ratelimit-reset-requests: - 4ms x-ratelimit-reset-tokens: - 0s x-request-id: - - req_831c50d74f79d3c6f55b46c6165ad726 + - req_d81e25e4ffb6cba1e92b5907b689fefb status: code: 200 message: OK diff --git a/tests/contrib/litellm/cassettes/completion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/completion_multiple_choices.yaml similarity index 60% rename from tests/contrib/litellm/cassettes/completion.yaml_multiple_choices rename to tests/contrib/litellm/cassettes/completion_multiple_choices.yaml index 3284d55e07f..c39b5e948d3 100644 --- a/tests/contrib/litellm/cassettes/completion.yaml_multiple_choices +++ b/tests/contrib/litellm/cassettes/completion_multiple_choices.yaml @@ -13,7 +13,8 @@ interactions: content-type: - application/json cookie: - - _cfuvid=YonKtY9iy9G7TG4c1XY31ZGLtQkwk9WEsAUQm55fiHs-1742415934844-0.0.1.1-604800000 + - __cf_bm=8uxWeZujXJxknKPHUVTv93owTsLP6A_7S9rmPUqYFqM-1742476858-1.0.1.1-Z1890C3uorOQnRF57DQFSmxnNXHE0TaP_oXxoolxhkaNxA2pyytE3307uN6CaZ3u9yu9ztH.3.HRNfSprzJA4o2PVittzgtZ6Bf16p_4omM; + _cfuvid=XCe0nOmJMT2AZm.ZO6Nwin7Mu3h3tYIlybA60Pa1myk-1742476858288-0.0.1.1-604800000 host: - api.openai.com user-agent: @@ -43,18 +44,18 @@ interactions: response: body: string: !!binary | - H4sIAAAAAAAAA8RTTWsbMRC9768YdLaDHTs49aU0oV+UFkpvKWGRpfGuYq1GSKMkJuS/F2kd76ZJ - oZfSyx7mzZt9783ooQIQRos1CNVKVp2304vLtLs/t1fvLr/srn7suu9f31y8d/Zjsh/wVkwygzY3 - qPiJdaKo8xbZkOthFVAy5qnz1fJ0OV+dz1YF6EijzbTG83RxcjblFDY0nc1Pzw7MlozCKNbwswIA - eCjfrNFpvBdrmE2eKh3GKBsU62MTgAhkc0XIGE1k6VhMBlCRY3RF9jdi6JJqJ3CTIkOLAYEJihv4 - RHcgA8Ke0tsxP+A2RZn1u2TtCJDOEcvsvyi/PiCPR62WGh9oE3+jiq1xJrZ1QBnJZV2RyYtqRH4R - wPyfBtDTsnO4M9yCdHtujWtKxSHqPh0lHXyGFq0vAJOW+/8YVQVwXc4nPQtE+ECd55pph+V380U/ - TgwHO4CL5QFkYmmH+nI1eWVarZGlsXGUvlBStagH5nCrMmlDI2C84ZdiXpvd+zau+ZvxA6AUekZd - +4DaqOeGh7aA+Tn/qe2YcREsIoZbo7BmgyHvQeNWJtvfmYj7yNjVW+MaDD6Ycmx5j9Vj9QsAAP// - AwBjQR3ubAQAAA== + H4sIAAAAAAAAAwAAAP//xFRNb9swDL37VxA6J0HdJM2Wy9BiGPYN7DwUhiIxtlJZFCQ6nVH0vw9S + sthdOqCXYhcf+Pie+B4JPxQAwmixBqEayar1dnrz/nr1Y96trr/EXfvt7W5/t/9QfnVb8jebz2KS + GLTZoeI/rJmi1ltkQ+4Aq4CSMamWq8XlYnX15uoiAy1ptIlWe57OZ8spd2FD04vycnlkNmQURrGG + nwUAwEP+phmdxl9iDVknV1qMUdYo1qcmABHIpoqQMZrI0rGYDKAix+jy2N+Joe1UM4FdFxkaDAjS + aQgodQ9McBCAnjq4N9yAdD03xtW54hD1DD7SPSjp4BM0aH0GmLTs343fDLjtokyeXWftCJDOEcuU + WXZ7e0QeT/4s1T7QJv5FFVvjTGyqgDKSS14ikxfFiHwWWvl6oTFBOoEcXo7hPK1W9qPEZMBc1WRc + /R+jKgBu88l1TwIRPlDruWK6w/xcOT/IieHIB3CxOIJMLO1QX64mz6hVGlkaG0fpCyVVg3pgDvct + O21oBIw3fD7Mc9oH38bVL5EfAKXQM+rKB9RGPTU8tAVMv4B/tZ0yzgOLiGFvFFZsMKQ9aNzKzh7u + TMQ+MrbV1rgagw8mH1vaY/FY/AYAAP//AwBdjdH5oAQAAA== headers: CF-RAY: - - 922fe0dd3980081e-IAD + - 923582928a520798-IAD Connection: - keep-alive Content-Encoding: @@ -62,13 +63,9 @@ interactions: Content-Type: - application/json Date: - - Wed, 19 Mar 2025 20:56:47 GMT + - Thu, 20 Mar 2025 13:21:01 GMT Server: - cloudflare - Set-Cookie: - - __cf_bm=4IvKUKDYbguDfkct3LJSPjqtZQXESBNXlj0FYX2EhDw-1742417807-1.0.1.1-NheqJGPsBtnZt86lvpEQ399jpX9C0.Meer7zqTrBFvtM1nDS.F2nb3Am2CumeUA9gl3hKjHDRDn2VRJSIEJL1F4Ki3Doz2f86LPah_teN_M; - path=/; expires=Wed, 19-Mar-25 21:26:47 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None Transfer-Encoding: - chunked X-Content-Type-Options: @@ -82,7 +79,7 @@ interactions: openai-organization: - datadog-4 openai-processing-ms: - - '429' + - '693' openai-version: - '2020-10-01' strict-transport-security: @@ -94,13 +91,13 @@ interactions: x-ratelimit-remaining-requests: - '14999' x-ratelimit-remaining-tokens: - - '1999963' + - '1999993' x-ratelimit-reset-requests: - 4ms x-ratelimit-reset-tokens: - - 1ms + - 0s x-request-id: - - req_039283032c4707703c924619286ae6b1 + - req_a9fed9cf625f2496c21b9413f11fa6da status: code: 200 message: OK diff --git a/tests/contrib/litellm/cassettes/completion_stream.yaml b/tests/contrib/litellm/cassettes/completion_stream.yaml index 8b9ee2258a0..7a7febb6e30 100644 --- a/tests/contrib/litellm/cassettes/completion_stream.yaml +++ b/tests/contrib/litellm/cassettes/completion_stream.yaml @@ -1,6 +1,6 @@ interactions: - request: - body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}' + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":true}}' headers: accept: - application/json @@ -9,12 +9,9 @@ interactions: connection: - keep-alive content-length: - - '137' + - '143' content-type: - application/json - cookie: - - __cf_bm=j1UnG6U2N_m2w7_ZZs_IcvkLv3zZHG2XAZJvgnwKIX4-1742415934-1.0.1.1-3q9GhK0Y_WF039ZDEh.fvsi1rJCJJCq6hn15xodxGjAHi3Ard0mxXU2Ae709hsZPlid0d8teeWknL0oS5TuPPsBimWG1qT6LRUFmo1J3pTU; - _cfuvid=YonKtY9iy9G7TG4c1XY31ZGLtQkwk9WEsAUQm55fiHs-1742415934844-0.0.1.1-604800000 host: - api.openai.com user-agent: @@ -43,89 +40,85 @@ interactions: uri: https://api.openai.com/v1/chat/completions response: body: - string: 'data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + string: 'data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" much"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" just"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" here"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" to"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + chat"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + and"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" assist"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" with"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" anything"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" need"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" How"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - can"},"logprobs":null,"finish_reason":null}],"usage":null} - + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + are"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - I"},"logprobs":null,"finish_reason":null}],"usage":null} - - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - help"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - today"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} - data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":21,"total_tokens":34,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":20,"total_tokens":33,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} data: [DONE] @@ -134,15 +127,21 @@ interactions: ' headers: CF-RAY: - - 922fbf33c84f05d4-IAD + - 92358286cc050820-IAD Connection: - keep-alive Content-Type: - text/event-stream; charset=utf-8 Date: - - Wed, 19 Mar 2025 20:33:49 GMT + - Thu, 20 Mar 2025 13:20:57 GMT Server: - cloudflare + Set-Cookie: + - __cf_bm=JpzSaUROcp7sHhVWx1Wg27uDVgIq4vtDDwL7tZMUMG0-1742476857-1.0.1.1-eRa7HeyAvoVA0z.zlCfx_r9.xUo7yEVtZ1ptiY1CSeeePSEkliaQZrsM4AVOsv7GH9Ftos4TLzwxoFAc8sVc7GGKJb.QKZXcTpVEemhzheI; + path=/; expires=Thu, 20-Mar-25 13:50:57 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=vOlVxqZHnk9s1Ko78yMITZ4lWCYFB_cPjAEkvBcMff0-1742476857669-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None Transfer-Encoding: - chunked X-Content-Type-Options: @@ -156,7 +155,7 @@ interactions: openai-organization: - datadog-4 openai-processing-ms: - - '484' + - '150' openai-version: - '2020-10-01' strict-transport-security: @@ -168,13 +167,13 @@ interactions: x-ratelimit-remaining-requests: - '14999' x-ratelimit-remaining-tokens: - - '1999978' + - '1999994' x-ratelimit-reset-requests: - 4ms x-ratelimit-reset-tokens: - 0s x-request-id: - - req_ed79ffc55abcad13d82ad45a85b50cef + - req_c6b88c23a429b4c97b52dbf20f562742 status: code: 200 message: OK diff --git a/tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices deleted file mode 100644 index 71352afe403..00000000000 --- a/tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices +++ /dev/null @@ -1,234 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '143' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - OpenAI/Python 1.66.5 - x-stainless-arch: - - arm64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.66.5 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: 'data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - much"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - much"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - just"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - just"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - here"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - here"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - to"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - to"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - chat"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - help"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - with"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - How"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - anything"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - about"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - you"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - you"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - might"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - need"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - How"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - can"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - I"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - assist"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - you"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - today"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} - - - data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":34,"total_tokens":47,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} - - - data: [DONE] - - - ' - headers: - CF-RAY: - - 922fe0d84bc32081-IAD - Connection: - - keep-alive - Content-Type: - - text/event-stream; charset=utf-8 - Date: - - Wed, 19 Mar 2025 20:56:46 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=bUH6Gw.1xWGTdrtJJp3Kd1QwQu.citY3dSw84SuP9a8-1742417806-1.0.1.1-nd3_tPgN5caA927YXL7MDwbkwDcsY2.cOUvLdkUaaYxi7UqUPwCwwGgjDsSpkg1AHFx7aR.wS8GKU2eBr2aujsFMmkWLmL_ohd4qBtE6K84; - path=/; expires=Wed, 19-Mar-25 21:26:46 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=spcOdmzqi_GTX7NW.qMkPKuD0G7qOz1ab6PGnOf_m_s-1742417806409-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - datadog-4 - openai-processing-ms: - - '172' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '15000' - x-ratelimit-limit-tokens: - - '2000000' - x-ratelimit-remaining-requests: - - '14999' - x-ratelimit-remaining-tokens: - - '1999962' - x-ratelimit-reset-requests: - - 4ms - x-ratelimit-reset-tokens: - - 1ms - x-request-id: - - req_68b16a7f5aec94206951718ec91e4166 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/acompletion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices.yaml similarity index 55% rename from tests/contrib/litellm/cassettes/acompletion_stream.yaml_multiple_choices rename to tests/contrib/litellm/cassettes/completion_stream_multiple_choices.yaml index b3f1528ee49..9fb9a1bfbc3 100644 --- a/tests/contrib/litellm/cassettes/acompletion_stream.yaml_multiple_choices +++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices.yaml @@ -15,11 +15,11 @@ interactions: host: - api.openai.com user-agent: - - AsyncOpenAI/Python 1.66.5 + - OpenAI/Python 1.66.5 x-stainless-arch: - arm64 x-stainless-async: - - async:asyncio + - 'false' x-stainless-lang: - python x-stainless-os: @@ -40,163 +40,174 @@ interactions: uri: https://api.openai.com/v1/chat/completions response: body: - string: 'data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + string: 'data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hey"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - much"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + there"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" much"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - just"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + Not"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" just"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - here"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + much"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" here"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" ready"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - to"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + just"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" to"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - help"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + here"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - help"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + chat"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - you"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}],"usage":null} + + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + or"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - and"},"logprobs":null,"finish_reason":null}],"usage":null} + + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + help"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + help"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" with"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - chat"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + with"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" anything"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + anything"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - How"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" need"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - can"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + need"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - I"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" How"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - assist"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" can"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - you"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + about"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" I"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - today"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" assist"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" today"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} - data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":40,"total_tokens":53,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":43,"total_tokens":56,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} data: [DONE] @@ -205,20 +216,20 @@ interactions: ' headers: CF-RAY: - - 922fe0e1892df27e-IAD + - 9235828a5ad20798-IAD Connection: - keep-alive Content-Type: - text/event-stream; charset=utf-8 Date: - - Wed, 19 Mar 2025 20:56:49 GMT + - Thu, 20 Mar 2025 13:20:58 GMT Server: - cloudflare Set-Cookie: - - __cf_bm=_sDHtJttvjcmNqUWyQLC0HY_6ceDdhabvgxP_mSWetQ-1742417809-1.0.1.1-CqlTg5EIrNFOnvIRAcesGqLwPwg3FZ18khnoA0HR26ZkfsHWDW2u.nJYbbUMztUsr2FmgqcE_dOzuuEF.u5QN04xVbjgSkJ9zBXVj1Y5Ei0; - path=/; expires=Wed, 19-Mar-25 21:26:49 GMT; domain=.api.openai.com; HttpOnly; + - __cf_bm=8uxWeZujXJxknKPHUVTv93owTsLP6A_7S9rmPUqYFqM-1742476858-1.0.1.1-Z1890C3uorOQnRF57DQFSmxnNXHE0TaP_oXxoolxhkaNxA2pyytE3307uN6CaZ3u9yu9ztH.3.HRNfSprzJA4o2PVittzgtZ6Bf16p_4omM; + path=/; expires=Thu, 20-Mar-25 13:50:58 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - - _cfuvid=3J8OnKlA8uybv8hIzboB47mhL0FVEQahZhquEcDPxcM-1742417809367-0.0.1.1-604800000; + - _cfuvid=XCe0nOmJMT2AZm.ZO6Nwin7Mu3h3tYIlybA60Pa1myk-1742476858288-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None Transfer-Encoding: - chunked @@ -233,7 +244,7 @@ interactions: openai-organization: - datadog-4 openai-processing-ms: - - '329' + - '195' openai-version: - '2020-10-01' strict-transport-security: @@ -245,13 +256,13 @@ interactions: x-ratelimit-remaining-requests: - '14999' x-ratelimit-remaining-tokens: - - '1999963' + - '1999993' x-ratelimit-reset-requests: - 4ms x-ratelimit-reset-tokens: - - 1ms + - 0s x-request-id: - - req_95fb491365244b0b18b9daf1bc26cf5f + - req_802b8ab4acaedc8189bfb4216f72135f status: code: 200 message: OK diff --git a/tests/contrib/litellm/cassettes/text_completion.yaml b/tests/contrib/litellm/cassettes/text_completion.yaml deleted file mode 100644 index 80fd89bcb39..00000000000 --- a/tests/contrib/litellm/cassettes/text_completion.yaml +++ /dev/null @@ -1,106 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo"}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '78' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - OpenAI/Python 1.66.5 - x-stainless-arch: - - arm64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.66.5 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAAwAAAP//jFLLbtswELzrK7Z7tgM7jtvGlwINiqSPQ9Fji0CgyZXEhOIS5KqtEfjf - C0q2JbcNkIsOOzujmeE+FQBoDW4AdaNEt8HN3990tw/fFu6Tflvx6sutbR4/7D5///q6dTcOZ5nB - 2wfScmRdaG6DI7HsB1hHUkJZdfnm6vJqub5erXqgZUMu0+og89XFei5d3PJ8sbxcH5gNW00JN/Cj - AAB46r/Zozf0GzewmB0nLaWkasLNaQkAI7s8QZWSTaK84GwENXsh39u+I+f4FdzxL9DKw0cYCLDj - DoSN2r2bEiNVXVLZuO+cmwDKexaVg/eW7w/I/mTScR0ib9NfVKyst6kpI6nEPhtKwgF7dF8A3Pdl - dGf5MERug5TCj9T/7npQw7H9EVseekJhUW4yP5LOxEpDoqxLky5RK92QGZlj8aozlidAMYn8r5n/ - aQ+xra9fIj8CWlMQMmWIZKw+DzyuRcq3+dzaqeLeMCaKP62mUizF/AyGKtW54Wow7ZJQW1bW1xRD - tP3p5Gcs9sUfAAAA//8DAEy0bTM5AwAA - headers: - CF-RAY: - - 922fb31c6b4a3b86-IAD - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Wed, 19 Mar 2025 20:25:33 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=OVR0c7pmjNlvkgITOy_5zxiGhdaeoh2rYi0sMIwKUGw-1742415933-1.0.1.1-9ldeCN1Z0Gzz63GmLhFMPkykl_aiDMZHh9jdn_aB8Mwaq8j8c3UX0EJL_RDMTdRgRFAjD7RDiSRhuM45kkZ8yvViyheANvDky_wdNXC7pmg; - path=/; expires=Wed, 19-Mar-25 20:55:33 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=pYi67r3mdZAGs2TdjYlNetH8he_L8lXNKD2EhBsZw4s-1742415933301-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - datadog-4 - openai-processing-ms: - - '310' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '15000' - x-ratelimit-limit-tokens: - - '2000000' - x-ratelimit-remaining-requests: - - '14999' - x-ratelimit-remaining-tokens: - - '1999980' - x-ratelimit-reset-requests: - - 4ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_f68897fc201c7e0cbeac660a95b4e136 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices deleted file mode 100644 index 743d1a085e2..00000000000 --- a/tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices +++ /dev/null @@ -1,106 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '84' - content-type: - - application/json - cookie: - - _cfuvid=pYi67r3mdZAGs2TdjYlNetH8he_L8lXNKD2EhBsZw4s-1742415933301-0.0.1.1-604800000 - host: - - api.openai.com - user-agent: - - OpenAI/Python 1.66.5 - x-stainless-arch: - - arm64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.66.5 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAA9xTwW4TMRC971cMc06qbGjUNhdEikSBWw9cULVy7Mmuweux7FlKqPrvyLtJdkNB - 4szFh3nzxu95np8KALQG14C6UaLb4Oab286VPzcf79/fNtvN4nH5rnt7f/1Z6NMqRJxlBm+/kpYj - 60JzGxyJZT/AOpISylPLq8vlZXl1XZY90LIhl2l1kPnri9Vcurjl+aJcrg7Mhq2mhGv4UgAAPPVn - 1ugN/cA1LGbHSkspqZpwfWoCwMguV1ClZJMoLzgbQc1eyPey78g5fgV3/AhaefgAAwH23IGwUfs3 - U2KkXZdUFu475yaA8p5FZeO95IcD8nwS6bgOkbfpNyrurLepqSKpxD4LSsIBiwn5hfPy/3deADz0 - MejO/GGI3AaphL9Rf93NMA3H3I3Y8pAQFBblJvUj6WxYZUiUdWnylqiVbsiMzDFyqjOWJ8B0Xy/F - /Gn2YNv6+l/Gj4DWFIRMFSIZq88Nj22R8q/8W9vpiXvBmCh+t5oqsRTzGgztVOeG1GDaJ6G22llf - UwzR9tHJayyei18AAAD//wMABa0m/DMEAAA= - headers: - CF-RAY: - - 922fe0f67e45081e-IAD - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Wed, 19 Mar 2025 20:56:51 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=FlHOj93lCOi0G8vOplzZPeLSW3Vdv0Hb2HoLy7MM6Us-1742417811-1.0.1.1-kahp3aLVlFkNG2jXCdMC.nUEAogyYTUDKmeLcLdMShR8EnclldclSdDIIWq6EgH9RGnwBGy5.NfkQ9REHjpAGzXjaNfX3IBb1LAF6cQGWOE; - path=/; expires=Wed, 19-Mar-25 21:26:51 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - datadog-4 - openai-processing-ms: - - '347' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '15000' - x-ratelimit-limit-tokens: - - '2000000' - x-ratelimit-remaining-requests: - - '14999' - x-ratelimit-remaining-tokens: - - '1999963' - x-ratelimit-reset-requests: - - 4ms - x-ratelimit-reset-tokens: - - 1ms - x-request-id: - - req_0a1d4fa700dec9fa2f39b6a53af4d9c2 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/text_completion_stream.yaml b/tests/contrib/litellm/cassettes/text_completion_stream.yaml deleted file mode 100644 index bc9e6f2efc4..00000000000 --- a/tests/contrib/litellm/cassettes/text_completion_stream.yaml +++ /dev/null @@ -1,133 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '132' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - OpenAI/Python 1.66.5 - x-stainless-arch: - - arm64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.66.5 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: 'data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - How"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - are"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - you"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - today"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} - - - data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":8,"total_tokens":17,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} - - - data: [DONE] - - - ' - headers: - CF-RAY: - - 922fd11b9ae3c971-IAD - Connection: - - keep-alive - Content-Type: - - text/event-stream; charset=utf-8 - Date: - - Wed, 19 Mar 2025 20:46:01 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=burGHM6VfMUi7tY4F9HPOZGLPGxfqoQv0QDudHYBzAI-1742417161-1.0.1.1-MC78dEOJbnnU73JGAYQb93S4ex3NPLLM70pH_.FPPXa1FeTqG9rlyPAZyx58mF.Z5aEhuPWTN45xS8eCJsKzuP20WOjvGlJAgW166y3SdTs; - path=/; expires=Wed, 19-Mar-25 21:16:01 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=ReP56wFcIwz4J_wQ9KjzLau5vHIddwYVpMetFMYh57w-1742417161882-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - datadog-4 - openai-processing-ms: - - '226' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '15000' - x-ratelimit-limit-tokens: - - '2000000' - x-ratelimit-remaining-requests: - - '14999' - x-ratelimit-remaining-tokens: - - '1999980' - x-ratelimit-reset-requests: - - 4ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_6c0123908864d876c53702ce7507e69e - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices deleted file mode 100644 index 62b0b934b66..00000000000 --- a/tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices +++ /dev/null @@ -1,177 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '138' - content-type: - - application/json - cookie: - - _cfuvid=ReP56wFcIwz4J_wQ9KjzLau5vHIddwYVpMetFMYh57w-1742417161882-0.0.1.1-604800000; - __cf_bm=burGHM6VfMUi7tY4F9HPOZGLPGxfqoQv0QDudHYBzAI-1742417161-1.0.1.1-MC78dEOJbnnU73JGAYQb93S4ex3NPLLM70pH_.FPPXa1FeTqG9rlyPAZyx58mF.Z5aEhuPWTN45xS8eCJsKzuP20WOjvGlJAgW166y3SdTs - host: - - api.openai.com - user-agent: - - OpenAI/Python 1.66.5 - x-stainless-arch: - - arm64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.66.5 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: 'data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - How"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - How"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - can"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - can"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - I"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - I"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - assist"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - assist"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - you"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - you"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" - today"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" - today"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} - - - data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":20,"total_tokens":29,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} - - - data: [DONE] - - - ' - headers: - CF-RAY: - - 922fe0f349d2081e-IAD - Connection: - - keep-alive - Content-Type: - - text/event-stream; charset=utf-8 - Date: - - Wed, 19 Mar 2025 20:56:50 GMT - Server: - - cloudflare - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - datadog-4 - openai-processing-ms: - - '172' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '15000' - x-ratelimit-limit-tokens: - - '2000000' - x-ratelimit-remaining-requests: - - '14999' - x-ratelimit-remaining-tokens: - - '1999963' - x-ratelimit-reset-requests: - - 4ms - x-ratelimit-reset-tokens: - - 1ms - x-request-id: - - req_775dd461f5f264e40aeb0c5ab23fe071 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py index f3ce38c872f..f1bd6c1f455 100644 --- a/tests/contrib/litellm/test_litellm.py +++ b/tests/contrib/litellm/test_litellm.py @@ -1,11 +1,11 @@ import pytest +from tests.contrib.litellm.utils import get_cassette_name + @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) def test_litellm_completion(litellm, request_vcr, stream, n): - cassette = "completion.yaml" if not stream else "completion_stream.yaml" - choice_suffix = "_multiple_choices" if n > 1 else "" - with request_vcr.use_cassette(cassette + choice_suffix): + with request_vcr.use_cassette(get_cassette_name(stream, n)): messages = [{ "content": "Hey, what is up?","role": "user"}] litellm.completion( model="gpt-3.5-turbo", @@ -17,9 +17,7 @@ def test_litellm_completion(litellm, request_vcr, stream, n): @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) async def test_litellm_acompletion(litellm, request_vcr, stream, n): - cassette = "acompletion.yaml" if not stream else "acompletion_stream.yaml" - choice_suffix = "_multiple_choices" if n > 1 else "" - with request_vcr.use_cassette(cassette + choice_suffix): + with request_vcr.use_cassette(get_cassette_name(stream, n)): messages = [{ "content": "Hey, what is up?","role": "user"}] await litellm.acompletion( model="gpt-3.5-turbo", @@ -31,9 +29,7 @@ async def test_litellm_acompletion(litellm, request_vcr, stream, n): @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) def test_litellm_text_completion(litellm, request_vcr, stream, n): - cassette = "text_completion.yaml" if not stream else "text_completion_stream.yaml" - choice_suffix = "_multiple_choices" if n > 1 else "" - with request_vcr.use_cassette(cassette + choice_suffix): + with request_vcr.use_cassette(get_cassette_name(stream, n)): litellm.text_completion( model="gpt-3.5-turbo", prompt="Hello world", @@ -44,9 +40,7 @@ def test_litellm_text_completion(litellm, request_vcr, stream, n): @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) async def test_litellm_atext_completion(litellm, request_vcr, stream, n): - cassette = "atext_completion.yaml" if not stream else "atext_completion_stream.yaml" - choice_suffix = "_multiple_choices" if n > 1 else "" - with request_vcr.use_cassette(cassette + choice_suffix): + with request_vcr.use_cassette(get_cassette_name(stream, n)): await litellm.atext_completion( model="gpt-3.5-turbo", prompt="Hello world", diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py index 8b140d6cd0c..f95503dce6c 100644 --- a/tests/contrib/litellm/utils.py +++ b/tests/contrib/litellm/utils.py @@ -1,6 +1,8 @@ import vcr import os +CASETTE_EXTENSION = ".yaml" + # VCR is used to capture and store network requests made to Anthropic. # This is done to avoid making real calls to the API which could introduce # flakiness and cost. @@ -12,4 +14,12 @@ def get_request_vcr(): filter_headers=["authorization", "x-api-key", "api-key"], # Ignore requests to the agent ignore_localhost=True, - ) \ No newline at end of file + ) + +# Get the name of the cassette to use for a given test +# All LiteLLM requests that use Open AI get routed to the chat completions endpoint, +# so we can reuse the same cassette for each combination of stream and n +def get_cassette_name(stream, n): + stream_suffix = "_stream" if stream else "" + choice_suffix = "_multiple_choices" if n > 1 else "" + return "completion" + stream_suffix + choice_suffix + CASETTE_EXTENSION From 65714e5d9905c4366ee4af9a339becde8c1f2efd Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 20 Mar 2025 14:29:17 -0400 Subject: [PATCH 04/61] add test snapshots --- ddtrace/contrib/internal/litellm/patch.py | 3 ++ ddtrace/contrib/internal/litellm/utils.py | 6 ++++ ...llm.test_litellm_acompletion[False-1].json | 28 +++++++++++++++++++ ...llm.test_litellm_acompletion[False-2].json | 28 +++++++++++++++++++ ...ellm.test_litellm_acompletion[True-2].json | 28 +++++++++++++++++++ ...est_litellm_atext_completion[False-1].json | 28 +++++++++++++++++++ ...est_litellm_atext_completion[False-2].json | 28 +++++++++++++++++++ ...test_litellm_atext_completion[True-2].json | 28 +++++++++++++++++++ ....test_litellm.test_litellm_completion.json | 10 +++---- ...ellm.test_litellm_completion[False-1].json | 28 +++++++++++++++++++ ...ellm.test_litellm_completion[False-2].json | 28 +++++++++++++++++++ ...tellm.test_litellm_completion[True-2].json | 28 +++++++++++++++++++ ...test_litellm_text_completion[False-1].json | 28 +++++++++++++++++++ ...test_litellm_text_completion[False-2].json | 28 +++++++++++++++++++ ....test_litellm_text_completion[True-2].json | 28 +++++++++++++++++++ 15 files changed, 350 insertions(+), 5 deletions(-) create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 1ce2e073c73..911fc2a7147 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -9,6 +9,7 @@ from ddtrace.contrib.trace_utils import with_traced_module from ddtrace.contrib.trace_utils import wrap from ddtrace.contrib.internal.litellm.utils import get_provider +from ddtrace.contrib.internal.litellm.utils import tag_request from ddtrace.llmobs._integrations import LiteLLMIntegration from ddtrace.trace import Pin from ddtrace.internal.utils import get_argument_value @@ -48,6 +49,7 @@ def _create_span(litellm, pin, func, instance, args, kwargs): @with_traced_module def traced_completion(litellm, pin, func, instance, args, kwargs): span = _create_span(litellm, pin, func, instance, args, kwargs) + tag_request(span, kwargs) try: return func(*args, **kwargs) except Exception: @@ -60,6 +62,7 @@ def traced_completion(litellm, pin, func, instance, args, kwargs): @with_traced_module async def traced_acompletion(litellm, pin, func, instance, args, kwargs): span = _create_span(litellm, pin, func, instance, args, kwargs) + tag_request(span, kwargs) try: return await func(*args, **kwargs) except Exception: diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index dd6d766ad2b..f3e7721ee7f 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -5,3 +5,9 @@ def get_provider(model): return parsed_model[0] else: return "" + +def tag_request(span, kwargs): + if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]: + span.set_tag_str("litellm.host", kwargs["metadata"]["headers"]["host"]) + + diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json new file mode 100644 index 00000000000..900c1ac4412 --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json @@ -0,0 +1,28 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.acompletion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67dc5d3c00000000", + "language": "python", + "litellm.request.model": "gpt-3.5-turbo", + "litellm.request.provider": "", + "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 34947 + }, + "duration": 11157000, + "start": 1742495036719661000 + }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json new file mode 100644 index 00000000000..8183d85e848 --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json @@ -0,0 +1,28 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.acompletion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67dc5d3c00000000", + "language": "python", + "litellm.request.model": "gpt-3.5-turbo", + "litellm.request.provider": "", + "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 34947 + }, + "duration": 11435000, + "start": 1742495036748464000 + }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json new file mode 100644 index 00000000000..a6bc65181c9 --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json @@ -0,0 +1,28 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.acompletion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67dc5d3c00000000", + "language": "python", + "litellm.request.model": "gpt-3.5-turbo", + "litellm.request.provider": "", + "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 34947 + }, + "duration": 11029000, + "start": 1742495036690303000 + }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json new file mode 100644 index 00000000000..a05dd9ffeda --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json @@ -0,0 +1,28 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.atext_completion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67dc5d3c00000000", + "language": "python", + "litellm.request.model": "gpt-3.5-turbo", + "litellm.request.provider": "", + "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 34947 + }, + "duration": 11393000, + "start": 1742495036919703000 + }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json new file mode 100644 index 00000000000..1bb4b5798f8 --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json @@ -0,0 +1,28 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.atext_completion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67dc5d3c00000000", + "language": "python", + "litellm.request.model": "gpt-3.5-turbo", + "litellm.request.provider": "", + "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 34947 + }, + "duration": 11460000, + "start": 1742495036946048000 + }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json new file mode 100644 index 00000000000..cae9d84f2b9 --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json @@ -0,0 +1,28 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.atext_completion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67dc5d3c00000000", + "language": "python", + "litellm.request.model": "gpt-3.5-turbo", + "litellm.request.provider": "", + "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 34947 + }, + "duration": 10946000, + "start": 1742495036893874000 + }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json index fc0b82fadf8..60e83997323 100644 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json @@ -10,19 +10,19 @@ "error": 0, "meta": { "_dd.p.dm": "-0", - "_dd.p.tid": "67db283e00000000", + "_dd.p.tid": "67dc5d3c00000000", "language": "python", "litellm.request.model": "gpt-3.5-turbo", "litellm.request.provider": "", - "runtime-id": "f4fa019846d24cc9a50c88d550a339dd" + "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" }, "metrics": { "_dd.measured": 1, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "process_id": 18466 + "process_id": 34947 }, - "duration": 737978000, - "start": 1742415934103250000 + "duration": 42807000, + "start": 1742495036462994000 }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json new file mode 100644 index 00000000000..697da49dd16 --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json @@ -0,0 +1,28 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.completion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67dc5d3c00000000", + "language": "python", + "litellm.request.model": "gpt-3.5-turbo", + "litellm.request.provider": "", + "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 34947 + }, + "duration": 57377000, + "start": 1742495036556493000 + }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json new file mode 100644 index 00000000000..6d1d2839507 --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json @@ -0,0 +1,28 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.completion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67dc5d3c00000000", + "language": "python", + "litellm.request.model": "gpt-3.5-turbo", + "litellm.request.provider": "", + "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 34947 + }, + "duration": 6095000, + "start": 1742495036633397000 + }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json new file mode 100644 index 00000000000..f563f841768 --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json @@ -0,0 +1,28 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.completion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67dc5d3c00000000", + "language": "python", + "litellm.request.model": "gpt-3.5-turbo", + "litellm.request.provider": "", + "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 34947 + }, + "duration": 6195000, + "start": 1742495036533565000 + }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json new file mode 100644 index 00000000000..28bf0d39941 --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json @@ -0,0 +1,28 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.text_completion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67dc5d3c00000000", + "language": "python", + "litellm.request.model": "gpt-3.5-turbo", + "litellm.request.provider": "", + "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 34947 + }, + "duration": 7254000, + "start": 1742495036824538000 + }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json new file mode 100644 index 00000000000..f40a76bcaa5 --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json @@ -0,0 +1,28 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.text_completion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67dc5d3c00000000", + "language": "python", + "litellm.request.model": "gpt-3.5-turbo", + "litellm.request.provider": "", + "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 34947 + }, + "duration": 6404000, + "start": 1742495036845984000 + }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json new file mode 100644 index 00000000000..7d1105f25ef --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json @@ -0,0 +1,28 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.text_completion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67dc5d3c00000000", + "language": "python", + "litellm.request.model": "gpt-3.5-turbo", + "litellm.request.provider": "", + "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 34947 + }, + "duration": 5760000, + "start": 1742495036803937000 + }]] From 462b576f30499749c64600f6f6b3a0d190b74f6c Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Fri, 21 Mar 2025 14:01:30 -0400 Subject: [PATCH 05/61] trace get_llm_provider --- ddtrace/contrib/internal/litellm/patch.py | 28 ++++++++++++++++------- ddtrace/contrib/internal/litellm/utils.py | 11 ++++++++- ddtrace/llmobs/_integrations/litellm.py | 1 + 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 911fc2a7147..e162218ee13 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -8,8 +8,8 @@ from ddtrace.contrib.trace_utils import unwrap from ddtrace.contrib.trace_utils import with_traced_module from ddtrace.contrib.trace_utils import wrap -from ddtrace.contrib.internal.litellm.utils import get_provider from ddtrace.contrib.internal.litellm.utils import tag_request +from ddtrace.contrib.internal.litellm.utils import tag_model_and_provider from ddtrace.llmobs._integrations import LiteLLMIntegration from ddtrace.trace import Pin from ddtrace.internal.utils import get_argument_value @@ -17,10 +17,7 @@ config._add( "litellm", - { - "span_prompt_completion_sample_rate": float(os.getenv("DD_LITELLM_SPAN_PROMPT_COMPLETION_SAMPLE_RATE", 1.0)), - "span_char_limit": int(os.getenv("DD_LITELLM_SPAN_CHAR_LIMIT", 128)), - }, + {}, ) @@ -35,12 +32,9 @@ def get_version(): def _create_span(litellm, pin, func, instance, args, kwargs): """Helper function to create and configure a traced span.""" integration = litellm._datadog_integration - model = get_argument_value(args, kwargs, 0, "model", None) span = integration.trace( pin, "litellm.%s" % func.__name__, - model=model, - provider=get_provider(model), submit_to_llmobs=False, ) return span @@ -48,6 +42,7 @@ def _create_span(litellm, pin, func, instance, args, kwargs): @with_traced_module def traced_completion(litellm, pin, func, instance, args, kwargs): + requested_model = get_argument_value(args, kwargs, 0, "model", None) span = _create_span(litellm, pin, func, instance, args, kwargs) tag_request(span, kwargs) try: @@ -56,11 +51,14 @@ def traced_completion(litellm, pin, func, instance, args, kwargs): span.set_exc_info(*sys.exc_info()) raise finally: + # tag model and provider + tag_model_and_provider(litellm, span, requested_model) span.finish() @with_traced_module async def traced_acompletion(litellm, pin, func, instance, args, kwargs): + requested_model = get_argument_value(args, kwargs, 0, "model", None) span = _create_span(litellm, pin, func, instance, args, kwargs) tag_request(span, kwargs) try: @@ -69,9 +67,21 @@ async def traced_acompletion(litellm, pin, func, instance, args, kwargs): span.set_exc_info(*sys.exc_info()) raise finally: + # tag model and provider + tag_model_and_provider(litellm, span, requested_model) span.finish() +@with_traced_module +def traced_get_llm_provider(litellm, pin, func, instance, args, kwargs): + requested_model = get_argument_value(args, kwargs, 0, "model", None) + integration = litellm._datadog_integration + model, custom_llm_provider, dynamic_api_key, api_base = func(*args, **kwargs) + # Store the provider information in the integration + integration._provider_map[requested_model] = custom_llm_provider + return model, custom_llm_provider, dynamic_api_key, api_base + + def patch(): if getattr(litellm, "_datadog_patch", False): return @@ -86,6 +96,7 @@ def patch(): wrap("litellm", "acompletion", traced_acompletion(litellm)) wrap("litellm", "text_completion", traced_completion(litellm)) wrap("litellm", "atext_completion", traced_acompletion(litellm)) + wrap("litellm", "get_llm_provider", traced_get_llm_provider(litellm)) def unpatch(): @@ -98,5 +109,6 @@ def unpatch(): unwrap(litellm, "acompletion") unwrap(litellm, "text_completion") unwrap(litellm, "atext_completion") + unwrap(litellm, "get_llm_provider") delattr(litellm, "_datadog_integration") \ No newline at end of file diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index f3e7721ee7f..ad4970da79b 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -1,10 +1,19 @@ # TODO: temporary since we may want to intercept get_llm_provider response -def get_provider(model): +def get_provider(model, kwargs): + if "custom_llm_provider" in kwargs: + return kwargs["custom_llm_provider"] parsed_model = model.split("/") if len(parsed_model) == 2: return parsed_model[0] else: return "" + +def tag_model_and_provider(litellm, span, requested_model): + span.set_tag_str("litellm.request.model", requested_model) + integration = litellm._datadog_integration + provider = integration._provider_map.get(requested_model, None) + if provider: + span.set_tag_str("litellm.request.provider", provider) def tag_request(span, kwargs): if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]: diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index afe7a373f06..9602cf7336c 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -8,6 +8,7 @@ class LiteLLMIntegration(BaseLLMIntegration): _integration_name = "litellm" + _provider_map = {} def _set_base_span_tags( self, span: Span, provider: Optional[str] = None, model: Optional[str] = None, **kwargs: Dict[str, Any] From a01b4159ba56096861495c5a1bd47f1e433cb8e6 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Fri, 21 Mar 2025 14:06:40 -0400 Subject: [PATCH 06/61] remove provider tagging for now --- ddtrace/contrib/internal/litellm/patch.py | 21 ++----------------- ddtrace/contrib/internal/litellm/utils.py | 17 --------------- ...llm.test_litellm_acompletion[False-1].json | 11 +++++----- ...llm.test_litellm_acompletion[False-2].json | 11 +++++----- ...ellm.test_litellm_acompletion[True-2].json | 11 +++++----- ...est_litellm_atext_completion[False-1].json | 11 +++++----- ...est_litellm_atext_completion[False-2].json | 11 +++++----- ...test_litellm_atext_completion[True-2].json | 11 +++++----- ....test_litellm.test_litellm_completion.json | 11 +++++----- ...ellm.test_litellm_completion[False-1].json | 11 +++++----- ...ellm.test_litellm_completion[False-2].json | 11 +++++----- ...tellm.test_litellm_completion[True-2].json | 11 +++++----- ...test_litellm_text_completion[False-1].json | 11 +++++----- ...test_litellm_text_completion[False-2].json | 11 +++++----- ....test_litellm_text_completion[True-2].json | 11 +++++----- 15 files changed, 67 insertions(+), 114 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index e162218ee13..4cee84768b9 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -9,7 +9,6 @@ from ddtrace.contrib.trace_utils import with_traced_module from ddtrace.contrib.trace_utils import wrap from ddtrace.contrib.internal.litellm.utils import tag_request -from ddtrace.contrib.internal.litellm.utils import tag_model_and_provider from ddtrace.llmobs._integrations import LiteLLMIntegration from ddtrace.trace import Pin from ddtrace.internal.utils import get_argument_value @@ -32,9 +31,11 @@ def get_version(): def _create_span(litellm, pin, func, instance, args, kwargs): """Helper function to create and configure a traced span.""" integration = litellm._datadog_integration + model = get_argument_value(args, kwargs, 0, "model", None) span = integration.trace( pin, "litellm.%s" % func.__name__, + model=model, submit_to_llmobs=False, ) return span @@ -42,7 +43,6 @@ def _create_span(litellm, pin, func, instance, args, kwargs): @with_traced_module def traced_completion(litellm, pin, func, instance, args, kwargs): - requested_model = get_argument_value(args, kwargs, 0, "model", None) span = _create_span(litellm, pin, func, instance, args, kwargs) tag_request(span, kwargs) try: @@ -51,14 +51,11 @@ def traced_completion(litellm, pin, func, instance, args, kwargs): span.set_exc_info(*sys.exc_info()) raise finally: - # tag model and provider - tag_model_and_provider(litellm, span, requested_model) span.finish() @with_traced_module async def traced_acompletion(litellm, pin, func, instance, args, kwargs): - requested_model = get_argument_value(args, kwargs, 0, "model", None) span = _create_span(litellm, pin, func, instance, args, kwargs) tag_request(span, kwargs) try: @@ -67,21 +64,9 @@ async def traced_acompletion(litellm, pin, func, instance, args, kwargs): span.set_exc_info(*sys.exc_info()) raise finally: - # tag model and provider - tag_model_and_provider(litellm, span, requested_model) span.finish() -@with_traced_module -def traced_get_llm_provider(litellm, pin, func, instance, args, kwargs): - requested_model = get_argument_value(args, kwargs, 0, "model", None) - integration = litellm._datadog_integration - model, custom_llm_provider, dynamic_api_key, api_base = func(*args, **kwargs) - # Store the provider information in the integration - integration._provider_map[requested_model] = custom_llm_provider - return model, custom_llm_provider, dynamic_api_key, api_base - - def patch(): if getattr(litellm, "_datadog_patch", False): return @@ -96,7 +81,6 @@ def patch(): wrap("litellm", "acompletion", traced_acompletion(litellm)) wrap("litellm", "text_completion", traced_completion(litellm)) wrap("litellm", "atext_completion", traced_acompletion(litellm)) - wrap("litellm", "get_llm_provider", traced_get_llm_provider(litellm)) def unpatch(): @@ -109,6 +93,5 @@ def unpatch(): unwrap(litellm, "acompletion") unwrap(litellm, "text_completion") unwrap(litellm, "atext_completion") - unwrap(litellm, "get_llm_provider") delattr(litellm, "_datadog_integration") \ No newline at end of file diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index ad4970da79b..f9fc64dfeee 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -1,20 +1,3 @@ -# TODO: temporary since we may want to intercept get_llm_provider response -def get_provider(model, kwargs): - if "custom_llm_provider" in kwargs: - return kwargs["custom_llm_provider"] - parsed_model = model.split("/") - if len(parsed_model) == 2: - return parsed_model[0] - else: - return "" - -def tag_model_and_provider(litellm, span, requested_model): - span.set_tag_str("litellm.request.model", requested_model) - integration = litellm._datadog_integration - provider = integration._provider_map.get(requested_model, None) - if provider: - span.set_tag_str("litellm.request.provider", provider) - def tag_request(span, kwargs): if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]: span.set_tag_str("litellm.host", kwargs["metadata"]["headers"]["host"]) diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json index 900c1ac4412..214afe91718 100644 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json @@ -10,19 +10,18 @@ "error": 0, "meta": { "_dd.p.dm": "-0", - "_dd.p.tid": "67dc5d3c00000000", + "_dd.p.tid": "67ddaa2400000000", "language": "python", "litellm.request.model": "gpt-3.5-turbo", - "litellm.request.provider": "", - "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" }, "metrics": { "_dd.measured": 1, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "process_id": 34947 + "process_id": 44834 }, - "duration": 11157000, - "start": 1742495036719661000 + "duration": 11473000, + "start": 1742580260689536000 }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json index 8183d85e848..c0e47e63442 100644 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json @@ -10,19 +10,18 @@ "error": 0, "meta": { "_dd.p.dm": "-0", - "_dd.p.tid": "67dc5d3c00000000", + "_dd.p.tid": "67ddaa2400000000", "language": "python", "litellm.request.model": "gpt-3.5-turbo", - "litellm.request.provider": "", - "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" }, "metrics": { "_dd.measured": 1, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "process_id": 34947 + "process_id": 44834 }, - "duration": 11435000, - "start": 1742495036748464000 + "duration": 11410000, + "start": 1742580260717377000 }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json index a6bc65181c9..2edd0a58339 100644 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json @@ -10,19 +10,18 @@ "error": 0, "meta": { "_dd.p.dm": "-0", - "_dd.p.tid": "67dc5d3c00000000", + "_dd.p.tid": "67ddaa2400000000", "language": "python", "litellm.request.model": "gpt-3.5-turbo", - "litellm.request.provider": "", - "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" }, "metrics": { "_dd.measured": 1, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "process_id": 34947 + "process_id": 44834 }, - "duration": 11029000, - "start": 1742495036690303000 + "duration": 10910000, + "start": 1742580260660336000 }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json index a05dd9ffeda..4823265c91e 100644 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json @@ -10,19 +10,18 @@ "error": 0, "meta": { "_dd.p.dm": "-0", - "_dd.p.tid": "67dc5d3c00000000", + "_dd.p.tid": "67ddaa2400000000", "language": "python", "litellm.request.model": "gpt-3.5-turbo", - "litellm.request.provider": "", - "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" }, "metrics": { "_dd.measured": 1, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "process_id": 34947 + "process_id": 44834 }, - "duration": 11393000, - "start": 1742495036919703000 + "duration": 11645000, + "start": 1742580260892670000 }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json index 1bb4b5798f8..82aa9e0797b 100644 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json @@ -10,19 +10,18 @@ "error": 0, "meta": { "_dd.p.dm": "-0", - "_dd.p.tid": "67dc5d3c00000000", + "_dd.p.tid": "67ddaa2400000000", "language": "python", "litellm.request.model": "gpt-3.5-turbo", - "litellm.request.provider": "", - "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" }, "metrics": { "_dd.measured": 1, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "process_id": 34947 + "process_id": 44834 }, - "duration": 11460000, - "start": 1742495036946048000 + "duration": 11638000, + "start": 1742580260921802000 }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json index cae9d84f2b9..019e6861dd0 100644 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json @@ -10,19 +10,18 @@ "error": 0, "meta": { "_dd.p.dm": "-0", - "_dd.p.tid": "67dc5d3c00000000", + "_dd.p.tid": "67ddaa2400000000", "language": "python", "litellm.request.model": "gpt-3.5-turbo", - "litellm.request.provider": "", - "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" }, "metrics": { "_dd.measured": 1, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "process_id": 34947 + "process_id": 44834 }, - "duration": 10946000, - "start": 1742495036893874000 + "duration": 10966000, + "start": 1742580260866220000 }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json index 60e83997323..5d496bb6478 100644 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json @@ -10,19 +10,18 @@ "error": 0, "meta": { "_dd.p.dm": "-0", - "_dd.p.tid": "67dc5d3c00000000", + "_dd.p.tid": "67ddaa2400000000", "language": "python", "litellm.request.model": "gpt-3.5-turbo", - "litellm.request.provider": "", - "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" }, "metrics": { "_dd.measured": 1, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "process_id": 34947 + "process_id": 44834 }, - "duration": 42807000, - "start": 1742495036462994000 + "duration": 43782000, + "start": 1742580260427196000 }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json index 697da49dd16..70b4b725406 100644 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json @@ -10,19 +10,18 @@ "error": 0, "meta": { "_dd.p.dm": "-0", - "_dd.p.tid": "67dc5d3c00000000", + "_dd.p.tid": "67ddaa2400000000", "language": "python", "litellm.request.model": "gpt-3.5-turbo", - "litellm.request.provider": "", - "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" }, "metrics": { "_dd.measured": 1, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "process_id": 34947 + "process_id": 44834 }, - "duration": 57377000, - "start": 1742495036556493000 + "duration": 64700000, + "start": 1742580260523466000 }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json index 6d1d2839507..70100a1b6fe 100644 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json @@ -10,19 +10,18 @@ "error": 0, "meta": { "_dd.p.dm": "-0", - "_dd.p.tid": "67dc5d3c00000000", + "_dd.p.tid": "67ddaa2400000000", "language": "python", "litellm.request.model": "gpt-3.5-turbo", - "litellm.request.provider": "", - "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" }, "metrics": { "_dd.measured": 1, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "process_id": 34947 + "process_id": 44834 }, - "duration": 6095000, - "start": 1742495036633397000 + "duration": 6278000, + "start": 1742580260607014000 }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json index f563f841768..9f99831ec22 100644 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json @@ -10,19 +10,18 @@ "error": 0, "meta": { "_dd.p.dm": "-0", - "_dd.p.tid": "67dc5d3c00000000", + "_dd.p.tid": "67ddaa2400000000", "language": "python", "litellm.request.model": "gpt-3.5-turbo", - "litellm.request.provider": "", - "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" }, "metrics": { "_dd.measured": 1, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "process_id": 34947 + "process_id": 44834 }, - "duration": 6195000, - "start": 1742495036533565000 + "duration": 6885000, + "start": 1742580260495830000 }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json index 28bf0d39941..3639ac22839 100644 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json @@ -10,19 +10,18 @@ "error": 0, "meta": { "_dd.p.dm": "-0", - "_dd.p.tid": "67dc5d3c00000000", + "_dd.p.tid": "67ddaa2400000000", "language": "python", "litellm.request.model": "gpt-3.5-turbo", - "litellm.request.provider": "", - "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" }, "metrics": { "_dd.measured": 1, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "process_id": 34947 + "process_id": 44834 }, - "duration": 7254000, - "start": 1742495036824538000 + "duration": 7357000, + "start": 1742580260791261000 }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json index f40a76bcaa5..cfa5ca52417 100644 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json @@ -10,19 +10,18 @@ "error": 0, "meta": { "_dd.p.dm": "-0", - "_dd.p.tid": "67dc5d3c00000000", + "_dd.p.tid": "67ddaa2400000000", "language": "python", "litellm.request.model": "gpt-3.5-turbo", - "litellm.request.provider": "", - "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" }, "metrics": { "_dd.measured": 1, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "process_id": 34947 + "process_id": 44834 }, - "duration": 6404000, - "start": 1742495036845984000 + "duration": 6527000, + "start": 1742580260815275000 }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json index 7d1105f25ef..e68a5d04c10 100644 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json @@ -10,19 +10,18 @@ "error": 0, "meta": { "_dd.p.dm": "-0", - "_dd.p.tid": "67dc5d3c00000000", + "_dd.p.tid": "67ddaa2400000000", "language": "python", "litellm.request.model": "gpt-3.5-turbo", - "litellm.request.provider": "", - "runtime-id": "5886e4cf96ff4386abd757da414c8d9b" + "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" }, "metrics": { "_dd.measured": 1, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, "_sampling_priority_v1": 1, - "process_id": 34947 + "process_id": 44834 }, - "duration": 5760000, - "start": 1742495036803937000 + "duration": 5879000, + "start": 1742580260771604000 }]] From 486979464ac9cc21819ed99efc999a41eff869bd Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Fri, 21 Mar 2025 15:37:15 -0400 Subject: [PATCH 07/61] test out other models --- .../requirements/{17b7978.txt => 45c12de.txt} | 17 ++- .../requirements/{1db92d0.txt => e05a904.txt} | 17 ++- .../requirements/{1f657b3.txt => e8c8851.txt} | 17 ++- .../requirements/{8c9f21c.txt => f30dfc2.txt} | 17 ++- riotfile.py | 3 + .../claude-3-5-sonnet-20240620.yaml | 86 ++++++++++++++ .../completion_vertex_ai/gemini-pro.yaml | 110 ++++++++++++++++++ tests/contrib/litellm/test_litellm.py | 23 ++++ ...t_litellm_completion_different_models.json | 27 +++++ ...anthropic_claude-3-5-sonnet-20240620].json | 27 +++++ 10 files changed, 332 insertions(+), 12 deletions(-) rename .riot/requirements/{17b7978.txt => 45c12de.txt} (82%) rename .riot/requirements/{1db92d0.txt => e05a904.txt} (83%) rename .riot/requirements/{1f657b3.txt => e8c8851.txt} (82%) rename .riot/requirements/{8c9f21c.txt => f30dfc2.txt} (83%) create mode 100644 tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml create mode 100644 tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json diff --git a/.riot/requirements/17b7978.txt b/.riot/requirements/45c12de.txt similarity index 82% rename from .riot/requirements/17b7978.txt rename to .riot/requirements/45c12de.txt index 798b258db60..c2da32fca1e 100644 --- a/.riot/requirements/17b7978.txt +++ b/.riot/requirements/45c12de.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile --allow-unsafe --no-annotate .riot/requirements/17b7978.in +# pip-compile --allow-unsafe --no-annotate .riot/requirements/45c12de.in # aiohappyeyeballs==2.6.1 aiohttp==3.11.14 @@ -10,14 +10,18 @@ aiosignal==1.3.2 annotated-types==0.7.0 anyio==4.9.0 attrs==25.3.0 +boto3==1.37.17 +botocore==1.37.17 +cachetools==5.5.2 certifi==2025.1.31 charset-normalizer==3.4.1 click==8.1.8 -coverage[toml]==7.7.0 +coverage[toml]==7.7.1 distro==1.9.0 filelock==3.18.0 frozenlist==1.5.0 fsspec==2025.3.0 +google-auth==2.38.0 h11==0.14.0 httpcore==1.0.7 httpx==0.28.1 @@ -28,29 +32,36 @@ importlib-metadata==8.6.1 iniconfig==2.1.0 jinja2==3.1.6 jiter==0.9.0 +jmespath==1.0.1 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 litellm==1.63.12 markupsafe==3.0.2 mock==5.2.0 multidict==6.2.0 -openai==1.66.5 +openai==1.68.2 opentracing==2.4.0 packaging==24.2 pluggy==1.5.0 propcache==0.3.0 +pyasn1==0.6.1 +pyasn1-modules==0.4.1 pydantic==2.10.6 pydantic-core==2.27.2 pytest==8.3.5 pytest-asyncio==0.25.3 pytest-cov==6.0.0 pytest-mock==3.14.0 +python-dateutil==2.9.0.post0 python-dotenv==1.0.1 pyyaml==6.0.2 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 rpds-py==0.23.1 +rsa==4.9 +s3transfer==0.11.4 +six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 tiktoken==0.9.0 diff --git a/.riot/requirements/1db92d0.txt b/.riot/requirements/e05a904.txt similarity index 83% rename from .riot/requirements/1db92d0.txt rename to .riot/requirements/e05a904.txt index e86bb4cb0aa..48afc8fdf4a 100644 --- a/.riot/requirements/1db92d0.txt +++ b/.riot/requirements/e05a904.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --allow-unsafe --no-annotate .riot/requirements/1db92d0.in +# pip-compile --allow-unsafe --no-annotate .riot/requirements/e05a904.in # aiohappyeyeballs==2.6.1 aiohttp==3.11.14 @@ -11,15 +11,19 @@ annotated-types==0.7.0 anyio==4.9.0 async-timeout==5.0.1 attrs==25.3.0 +boto3==1.37.17 +botocore==1.37.17 +cachetools==5.5.2 certifi==2025.1.31 charset-normalizer==3.4.1 click==8.1.8 -coverage[toml]==7.7.0 +coverage[toml]==7.7.1 distro==1.9.0 exceptiongroup==1.2.2 filelock==3.18.0 frozenlist==1.5.0 fsspec==2025.3.0 +google-auth==2.38.0 h11==0.14.0 httpcore==1.0.7 httpx==0.28.1 @@ -30,29 +34,36 @@ importlib-metadata==8.6.1 iniconfig==2.1.0 jinja2==3.1.6 jiter==0.9.0 +jmespath==1.0.1 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 litellm==1.63.12 markupsafe==3.0.2 mock==5.2.0 multidict==6.2.0 -openai==1.66.5 +openai==1.68.2 opentracing==2.4.0 packaging==24.2 pluggy==1.5.0 propcache==0.3.0 +pyasn1==0.6.1 +pyasn1-modules==0.4.1 pydantic==2.10.6 pydantic-core==2.27.2 pytest==8.3.5 pytest-asyncio==0.25.3 pytest-cov==6.0.0 pytest-mock==3.14.0 +python-dateutil==2.9.0.post0 python-dotenv==1.0.1 pyyaml==6.0.2 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 rpds-py==0.23.1 +rsa==4.9 +s3transfer==0.11.4 +six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 tiktoken==0.9.0 diff --git a/.riot/requirements/1f657b3.txt b/.riot/requirements/e8c8851.txt similarity index 82% rename from .riot/requirements/1f657b3.txt rename to .riot/requirements/e8c8851.txt index d1a93e65777..a209020993c 100644 --- a/.riot/requirements/1f657b3.txt +++ b/.riot/requirements/e8c8851.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --allow-unsafe --no-annotate .riot/requirements/1f657b3.in +# pip-compile --allow-unsafe --no-annotate .riot/requirements/e8c8851.in # aiohappyeyeballs==2.6.1 aiohttp==3.11.14 @@ -10,14 +10,18 @@ aiosignal==1.3.2 annotated-types==0.7.0 anyio==4.9.0 attrs==25.3.0 +boto3==1.37.17 +botocore==1.37.17 +cachetools==5.5.2 certifi==2025.1.31 charset-normalizer==3.4.1 click==8.1.8 -coverage[toml]==7.7.0 +coverage[toml]==7.7.1 distro==1.9.0 filelock==3.18.0 frozenlist==1.5.0 fsspec==2025.3.0 +google-auth==2.38.0 h11==0.14.0 httpcore==1.0.7 httpx==0.28.1 @@ -28,29 +32,36 @@ importlib-metadata==8.6.1 iniconfig==2.1.0 jinja2==3.1.6 jiter==0.9.0 +jmespath==1.0.1 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 litellm==1.63.12 markupsafe==3.0.2 mock==5.2.0 multidict==6.2.0 -openai==1.66.5 +openai==1.68.2 opentracing==2.4.0 packaging==24.2 pluggy==1.5.0 propcache==0.3.0 +pyasn1==0.6.1 +pyasn1-modules==0.4.1 pydantic==2.10.6 pydantic-core==2.27.2 pytest==8.3.5 pytest-asyncio==0.25.3 pytest-cov==6.0.0 pytest-mock==3.14.0 +python-dateutil==2.9.0.post0 python-dotenv==1.0.1 pyyaml==6.0.2 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 rpds-py==0.23.1 +rsa==4.9 +s3transfer==0.11.4 +six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 tiktoken==0.9.0 diff --git a/.riot/requirements/8c9f21c.txt b/.riot/requirements/f30dfc2.txt similarity index 83% rename from .riot/requirements/8c9f21c.txt rename to .riot/requirements/f30dfc2.txt index 4c7ee2bb6e4..c9092e0225c 100644 --- a/.riot/requirements/8c9f21c.txt +++ b/.riot/requirements/f30dfc2.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.9 # by the following command: # -# pip-compile --allow-unsafe --no-annotate .riot/requirements/8c9f21c.in +# pip-compile --allow-unsafe --no-annotate .riot/requirements/f30dfc2.in # aiohappyeyeballs==2.6.1 aiohttp==3.11.14 @@ -11,15 +11,19 @@ annotated-types==0.7.0 anyio==4.9.0 async-timeout==5.0.1 attrs==25.3.0 +boto3==1.37.17 +botocore==1.37.17 +cachetools==5.5.2 certifi==2025.1.31 charset-normalizer==3.4.1 click==8.1.8 -coverage[toml]==7.7.0 +coverage[toml]==7.7.1 distro==1.9.0 exceptiongroup==1.2.2 filelock==3.18.0 frozenlist==1.5.0 fsspec==2025.3.0 +google-auth==2.38.0 h11==0.14.0 httpcore==1.0.7 httpx==0.28.1 @@ -30,29 +34,36 @@ importlib-metadata==8.6.1 iniconfig==2.1.0 jinja2==3.1.6 jiter==0.9.0 +jmespath==1.0.1 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 litellm==1.63.12 markupsafe==3.0.2 mock==5.2.0 multidict==6.2.0 -openai==1.66.5 +openai==1.68.2 opentracing==2.4.0 packaging==24.2 pluggy==1.5.0 propcache==0.3.0 +pyasn1==0.6.1 +pyasn1-modules==0.4.1 pydantic==2.10.6 pydantic-core==2.27.2 pytest==8.3.5 pytest-asyncio==0.25.3 pytest-cov==6.0.0 pytest-mock==3.14.0 +python-dateutil==2.9.0.post0 python-dotenv==1.0.1 pyyaml==6.0.2 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 rpds-py==0.23.1 +rsa==4.9 +s3transfer==0.11.4 +six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 tiktoken==0.9.0 diff --git a/riotfile.py b/riotfile.py index 5fcf5d52097..373d75c4d02 100644 --- a/riotfile.py +++ b/riotfile.py @@ -2584,6 +2584,9 @@ def select_pys(min_version: str = MIN_PYTHON_VERSION, max_version: str = MAX_PYT "litellm": latest, "vcrpy": latest, "pytest-asyncio": latest, + "botocore": latest, + "boto3": latest, + "google-auth": latest, }, ), Venv( diff --git a/tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml b/tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml new file mode 100644 index 00000000000..a719ac0af83 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml @@ -0,0 +1,86 @@ +interactions: +- request: + body: '{"model": "claude-3-5-sonnet-20240620", "messages": [{"role": "user", "content": + [{"type": "text", "text": "Hey, what is up?"}]}], "max_tokens": 4096}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '150' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - litellm/1.63.12 + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAA2RQTW/UQAz9K8aXvcyibNoCygWVU5dyQagnhCJ3xiSjztpp7Gm7Wu1/RwlUAnGy + 9L70nk+YE3Z4sKFvdrd37Xhf33+gT5+/yXT7ZUdPX9s7DOjHiRcVm9HAGHDWsgBkls1JHAMeNHHB + DmOhmnh7sb3amoqwb9umvWzetQ0GjCrO4th9P72GOr8s9vV0eMOl6Bu4NiCB6z0UkqHSwLDGB9hD + Utk4jPTEMPFsKlSAXyaeM0tkA53hJ3PJMliA++qw3xxg5JnBFUYuExy1wnP2EUiO8FjZPKusRtcp + R1sEmwQlP6yelC1Ws7dwo88QSWAPv3evQa6Jjh/x/COguU79zGQq2CFL6r3Ogn8I48e6NMROaikB + 6/rK7oRZpuq96wOLYbe7CBgpjtzHmWlp1v8raF75mSn9z2n1v5HLq/P5FwAAAP//AwAsXlFX5AEA + AA== + headers: + CF-RAY: + - 923fd79c4a4a7cfc-EWR + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 21 Mar 2025 19:26:40 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 0280e0cf-573a-4392-b276-1b73319958fb + anthropic-ratelimit-input-tokens-limit: + - '20000' + anthropic-ratelimit-input-tokens-remaining: + - '20000' + anthropic-ratelimit-input-tokens-reset: + - '2025-03-21T19:26:40Z' + anthropic-ratelimit-output-tokens-limit: + - '4000' + anthropic-ratelimit-output-tokens-remaining: + - '4000' + anthropic-ratelimit-output-tokens-reset: + - '2025-03-21T19:26:40Z' + anthropic-ratelimit-requests-limit: + - '5' + anthropic-ratelimit-requests-remaining: + - '4' + anthropic-ratelimit-requests-reset: + - '2025-03-21T19:26:52Z' + anthropic-ratelimit-tokens-limit: + - '24000' + anthropic-ratelimit-tokens-remaining: + - '24000' + anthropic-ratelimit-tokens-reset: + - '2025-03-21T19:26:40Z' + cf-cache-status: + - DYNAMIC + request-id: + - req_01RRDNDcX3wjQFEMkLiTep47 + via: + - 1.1 google + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml b/tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml new file mode 100644 index 00000000000..5e712cce0bb --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml @@ -0,0 +1,110 @@ +interactions: +- request: + body: assertion=eyJ0eXAiOiAiSldUIiwgImFsZyI6ICJSUzI1NiIsICJraWQiOiAiZjc0ODI1ZGMzZDE4ZWU2YTY5Y2I2YTE0NmQ5OGUxNTg4YTM5YWU3YyJ9.eyJpYXQiOiAxNzQyNTg1NTY0LCAiZXhwIjogMTc0MjU4OTE2NCwgImlzcyI6ICJsbG1vYnMtdGVzdEBkYXRhZG9nLXNhbmRib3guaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iLCAiYXVkIjogImh0dHBzOi8vb2F1dGgyLmdvb2dsZWFwaXMuY29tL3Rva2VuIiwgInNjb3BlIjogImh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29tL2F1dGgvY2xvdWQtcGxhdGZvcm0ifQ.ZivXu-4DxUTH_3JzIngRTswHdKmxiPR_yFl7T7o7C7FSrp4zf0cHa-fid8jMBiwzOz0ooBny11AezGE0w5b15NvhbrQq3HDYdoHXGooo9yBOnhez7v5EaP8iMfpkcp0EW8DUdSUrs2-y9rYT67rA6KxxWcdQLPFyk15ka-FC3f1BsdF_c0CdoPfKEG0mpBj5OHvmwjE3L5GP-2OLgx75B9loCFs3npkEa74YfCJ5OZXHUAPgONXC9VxiXf7__Secb-sDqZLKnGi2HSwaTZJ7TWkLyVufp71IMWpYaExI9Qw2IPPok3h-tCRJjljjJ1kfFy4N0AZCv1STT3p7w8jxyA&grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '801' + Content-Type: + - application/x-www-form-urlencoded + User-Agent: + - python-requests/2.32.3 + x-goog-api-client: + - gl-python/3.11.10 auth/2.38.0 auth-request-type/at cred-type/sa + method: POST + uri: https://oauth2.googleapis.com/token + response: + body: + string: '{"access_token":"ya29.c.c0ASRK0GZU0bfQUP0GYf4fuNe4BhHzumcFny6u68QFuxIsG64_oIXfd5scBCRKnTn4AavchTw0iXipb8jUGWZnRC-3IuQPmizs1oQAAWKL90jbwCqpwefQVEMkoPi6Sp1qs0RJjm3gjX_KVPPZlleAEHtWA-lxhbPkP56KRmOHaPWIC6z019UAO4wAuylihJSAq0QheNEW42e2E9NA6MAaCfMgARAvvrNhoaJ2NvvFPYTc_B4Ii8J-fdweojRHAn115d6k4LV0hDqdMeuuDmycrZPjXb6_DTRCYbXrbEjuSlHAAYLFqXaq1q1Uv_rzTt5yaGN8fQig3SZ0b6kQ959wq_6MWRrIlip3UnO3kAjMl7HKp8cZUS2l7sLAN385CjUXmxoq36By_bvYQRmavYvqlIxei6d7mym5I6Fnon1xb69jl2c4Ykatuey3yF2I195zhf1Q_nStxax5ikZoa8gztfzUQO8uz7wXt5zawkgYvvg_y4fZsU4J2gjb44encv0oaFlvw-uXS4WI-_meXXVp-vgexUvZaS6yXO31lhyiVe60BzFsanMMQu4UyJycm2bo4pfdOe6fg3uXZS2fvv8-551p9iWcixYswr-o6h_0FrucJQ0yp-wQ791mYaMx7q7a90iSaJ1s4OqeB_Mw4kl5nscbI-xjb1YVMMYd07MfMwS5fg-0Y-7lv2tYqJbk31hq_npBnvgX-96iupo6y2ZiXebMgIgBRgZxoqvQtsgBM4Jn2Wntjebw7Sy5UIqZOaleqz1nZjeXX7bw2VQkqrgFaXF-RdaBoxJF46Us0hhkRSdIkQMMQmbOf692lo6Sjwvc2ZkziieWp1-am1uaezpamYnckZy0g8eQoZY_S6dgMpMtxcS9Jinf5c2qmMxvIwQdzx3hjt3lsedhre3mhfQvf7ypS-j5g1JVxhyxRaw01jF7tycWp4ecbuVccwwWati9M-qpQ254Mtx89v1Rt85Xh7U66lu12cJc0oktJ7nX766QlZSsx-qf1ri","expires_in":3599,"token_type":"Bearer"}' + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Fri, 21 Mar 2025 19:32:45 GMT + Server: + - scaffolding on HTTPServer2 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +- request: + body: '{"contents":[{"role":"user","parts":[{"text":"Hey, what is up?"}]}],"generationConfig":{"candidate_count":1}}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '109' + content-type: + - application/json + host: + - us-central1-aiplatform.googleapis.com + user-agent: + - litellm/1.63.12 + method: POST + uri: https://us-central1-aiplatform.googleapis.com/v1/projects/datadog-sandbox/locations/us-central1/publishers/google/models/gemini-pro:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/7VVXW/aMBR951d4edkLIMf5IOGlYtSCTBQQSbvSqUKGuMFbiFHstEUV/312AjRM + 6sNUlofo5n4c33MdH781ADBWJItZTCQVRhf8VB4A3sq3jvFM0kyqwNGlnDlPqfIYGx7T1Gi+B7Yk + l+8g1fNWs1WKpK8azRiyL2DMJdgUq3UT/CqEBGuSJSxLAC8kYBmQawpiljBJUvDC8zRughfCpM6Q + HCwpWNN0+1SkbTDkL4AsddmOF1fgx5rIrwIkvATLNJby5xWIqo3J7sqodbU/2Y8Ha3/kZDyxjIn1 + jBLBM912GE2mJ8aGIE9U7mZEN3XOu85aDVjShOe7kndvdrPo9yI8mMzmi6EyFuEU4/6wNkc9yZwv + yZKlTJZVYzwYBYPg2wh/mBaueK53BbahBVHHsi3vLFXQZ5of4MomQnyHZ0E0X3wEfqyoISPo+y70 + 7VPavvmvlK974wGeTW7DRX8yjvA4uiBx3zNt27Euz9uGyLMQtD7BW331wvDmwoShpdv6Dxtt+h3k + Ou4nCIf4/rY3Gs0X+H46CvrBBXkj3/aR6V3+BzehCTs2Qu+0D9bj6cyT52TEE92WPvEtVeO5jqlO + hoNcz3N8r5qZLiyLjEKQhN5QSZTEkpOQamKbrYz4b5r1eVFKrFstUhPks7CFDnHJlSaehw6jqKOK + a7UmS+u6VNNwpd3kOPsI39c3R+HXmzpqYqM2h79bvNBaFjpfrHH476qb5o7mglUynNCNEuaW2YYt + xbgFISoxjVVOVU8R25T3E4LIaUGrhczI9LsW6tpO24KO7ZsPVXpOxZZnggaxTreWnfjBn4ZYuP2N + H0xzu/Pd6wVGY9/4A9Z/lKYoBwAA + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Fri, 21 Mar 2025 19:32:46 GMT + Server: + - scaffolding on HTTPServer2 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py index f1bd6c1f455..b8b113227b0 100644 --- a/tests/contrib/litellm/test_litellm.py +++ b/tests/contrib/litellm/test_litellm.py @@ -1,4 +1,5 @@ import pytest +import os from tests.contrib.litellm.utils import get_cassette_name @@ -47,3 +48,25 @@ async def test_litellm_atext_completion(litellm, request_vcr, stream, n): stream=stream, n=n, ) + +@pytest.mark.parametrize("model", ["vertex_ai/gemini-pro", "anthropic/claude-3-5-sonnet-20240620"]) +@pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion_different_models") +def test_litellm_completion_different_models(litellm, request_vcr, model): + aws_access_key_id = "" + aws_secret_access_key = "" + aws_region_name = "" + if model == "bedrock/amazon.titan-text-lite-v1": + aws_access_key_id = "ASIAWYLNJGWWOJPUYN45" + aws_secret_access_key = "z99X8m2gwAHrXGH8Owd0gzHq5ndHahRW0nEU8xbu" + aws_region_name = "us-east-1" + with request_vcr.use_cassette(f"completion_{model}.yaml"): + messages = [{ "content": "Hey, what is up?","role": "user"}] + litellm.completion( + model=model, + messages=messages, + stream=False, + n=1, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_region_name=aws_region_name, + ) \ No newline at end of file diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json new file mode 100644 index 00000000000..3f4326ea8f5 --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json @@ -0,0 +1,27 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.completion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67ddbedc00000000", + "language": "python", + "litellm.request.model": "vertex_ai/gemini-pro", + "runtime-id": "0b4c7b69fe1b4d69a2cf9a6cb3449ea9" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 4385 + }, + "duration": 1211993000, + "start": 1742585564858520000 + }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json new file mode 100644 index 00000000000..852a1cdfd4a --- /dev/null +++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json @@ -0,0 +1,27 @@ +[[ + { + "name": "litellm.request", + "service": "tests.contrib.litellm", + "resource": "litellm.completion", + "trace_id": 0, + "span_id": 1, + "parent_id": 0, + "type": "", + "error": 0, + "meta": { + "_dd.p.dm": "-0", + "_dd.p.tid": "67ddbede00000000", + "language": "python", + "litellm.request.model": "anthropic/claude-3-5-sonnet-20240620", + "runtime-id": "0b4c7b69fe1b4d69a2cf9a6cb3449ea9" + }, + "metrics": { + "_dd.measured": 1, + "_dd.top_level": 1, + "_dd.tracer_kr": 1.0, + "_sampling_priority_v1": 1, + "process_id": 4385 + }, + "duration": 11249000, + "start": 1742585566097702000 + }]] From 0adc8e4e26bb9de8e0863246c6ad5c1bfb49e85b Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Fri, 21 Mar 2025 15:42:19 -0400 Subject: [PATCH 08/61] add global tags test --- tests/contrib/litellm/test_litellm.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py index b8b113227b0..05cedf8307e 100644 --- a/tests/contrib/litellm/test_litellm.py +++ b/tests/contrib/litellm/test_litellm.py @@ -1,8 +1,31 @@ import pytest -import os +from tests.utils import override_global_config from tests.contrib.litellm.utils import get_cassette_name +def test_global_tags(ddtrace_config_litellm, litellm, request_vcr, mock_tracer): + """ + When the global config UST tags are set + The service name should be used for all data + The env should be used for all data + The version should be used for all data + """ + with override_global_config(dict(service="test-svc", env="staging", version="1234")): + cassette_name = "completion.yaml" + with request_vcr.use_cassette(cassette_name): + messages = [{ "content": "Hey, what is up?","role": "user"}] + litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + ) + + span = mock_tracer.pop_traces()[0][0] + assert span.resource == "litellm.completion" + assert span.service == "test-svc" + assert span.get_tag("env") == "staging" + assert span.get_tag("version") == "1234" + assert span.get_tag("litellm.request.model") == "gpt-3.5-turbo" + @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) def test_litellm_completion(litellm, request_vcr, stream, n): From 800fcf4300236ce05baa46417ad97f1da42ae047 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Fri, 21 Mar 2025 15:48:10 -0400 Subject: [PATCH 09/61] add release note --- releasenotes/notes/litellm-tracing-f62a4467d9b50120.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 releasenotes/notes/litellm-tracing-f62a4467d9b50120.yaml diff --git a/releasenotes/notes/litellm-tracing-f62a4467d9b50120.yaml b/releasenotes/notes/litellm-tracing-f62a4467d9b50120.yaml new file mode 100644 index 00000000000..704d57d1e20 --- /dev/null +++ b/releasenotes/notes/litellm-tracing-f62a4467d9b50120.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + litellm: Introduces tracing support for the LiteLLM Python SDK's sync and async ``completion`` and ``text_completion`` methods. + See `the docs ` + for more information. \ No newline at end of file From 1cc21e10e9dc2df7a05fe7c7505d36a913e7b147 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Fri, 21 Mar 2025 15:53:37 -0400 Subject: [PATCH 10/61] run black --- ddtrace/contrib/internal/litellm/patch.py | 2 +- ddtrace/contrib/internal/litellm/utils.py | 2 -- ddtrace/contrib/litellm/__init__.py | 2 +- ddtrace/llmobs/_integrations/litellm.py | 2 +- tests/contrib/litellm/conftest.py | 2 ++ tests/contrib/litellm/test_litellm.py | 20 +++++++++++++------- tests/contrib/litellm/utils.py | 4 +++- 7 files changed, 21 insertions(+), 13 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 4cee84768b9..de258c3080a 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -94,4 +94,4 @@ def unpatch(): unwrap(litellm, "text_completion") unwrap(litellm, "atext_completion") - delattr(litellm, "_datadog_integration") \ No newline at end of file + delattr(litellm, "_datadog_integration") diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index f9fc64dfeee..f4290ae06a2 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -1,5 +1,3 @@ def tag_request(span, kwargs): if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]: span.set_tag_str("litellm.host", kwargs["metadata"]["headers"]["host"]) - - diff --git a/ddtrace/contrib/litellm/__init__.py b/ddtrace/contrib/litellm/__init__.py index af284d63775..0ca7e276331 100644 --- a/ddtrace/contrib/litellm/__init__.py +++ b/ddtrace/contrib/litellm/__init__.py @@ -11,4 +11,4 @@ from ddtrace.contrib.internal.litellm.patch import patch from ddtrace.contrib.internal.litellm.patch import unpatch - __all__ = ["patch", "unpatch", "get_version"] \ No newline at end of file + __all__ = ["patch", "unpatch", "get_version"] diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index 9602cf7336c..fd448ca9491 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -16,4 +16,4 @@ def _set_base_span_tags( if provider is not None: span.set_tag_str("litellm.request.provider", provider) if model is not None: - span.set_tag_str("litellm.request.model", model) \ No newline at end of file + span.set_tag_str("litellm.request.model", model) diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index 590b1986652..9f88e2f5921 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -12,6 +12,7 @@ from tests.utils import override_global_config from tests.contrib.litellm.utils import get_request_vcr + def default_global_config(): return {} @@ -20,6 +21,7 @@ def default_global_config(): def ddtrace_global_config(): return {} + @pytest.fixture def ddtrace_config_litellm(): return {} diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py index 05cedf8307e..4d83d1c2209 100644 --- a/tests/contrib/litellm/test_litellm.py +++ b/tests/contrib/litellm/test_litellm.py @@ -3,6 +3,7 @@ from tests.utils import override_global_config from tests.contrib.litellm.utils import get_cassette_name + def test_global_tags(ddtrace_config_litellm, litellm, request_vcr, mock_tracer): """ When the global config UST tags are set @@ -13,7 +14,7 @@ def test_global_tags(ddtrace_config_litellm, litellm, request_vcr, mock_tracer): with override_global_config(dict(service="test-svc", env="staging", version="1234")): cassette_name = "completion.yaml" with request_vcr.use_cassette(cassette_name): - messages = [{ "content": "Hey, what is up?","role": "user"}] + messages = [{"content": "Hey, what is up?", "role": "user"}] litellm.completion( model="gpt-3.5-turbo", messages=messages, @@ -26,11 +27,12 @@ def test_global_tags(ddtrace_config_litellm, litellm, request_vcr, mock_tracer): assert span.get_tag("version") == "1234" assert span.get_tag("litellm.request.model") == "gpt-3.5-turbo" + @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) def test_litellm_completion(litellm, request_vcr, stream, n): with request_vcr.use_cassette(get_cassette_name(stream, n)): - messages = [{ "content": "Hey, what is up?","role": "user"}] + messages = [{"content": "Hey, what is up?", "role": "user"}] litellm.completion( model="gpt-3.5-turbo", messages=messages, @@ -38,11 +40,12 @@ def test_litellm_completion(litellm, request_vcr, stream, n): n=n, ) + @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) async def test_litellm_acompletion(litellm, request_vcr, stream, n): with request_vcr.use_cassette(get_cassette_name(stream, n)): - messages = [{ "content": "Hey, what is up?","role": "user"}] + messages = [{"content": "Hey, what is up?", "role": "user"}] await litellm.acompletion( model="gpt-3.5-turbo", messages=messages, @@ -50,7 +53,8 @@ async def test_litellm_acompletion(litellm, request_vcr, stream, n): n=n, ) -@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) + +@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) def test_litellm_text_completion(litellm, request_vcr, stream, n): with request_vcr.use_cassette(get_cassette_name(stream, n)): @@ -61,7 +65,8 @@ def test_litellm_text_completion(litellm, request_vcr, stream, n): n=n, ) -@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) + +@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) async def test_litellm_atext_completion(litellm, request_vcr, stream, n): with request_vcr.use_cassette(get_cassette_name(stream, n)): @@ -72,6 +77,7 @@ async def test_litellm_atext_completion(litellm, request_vcr, stream, n): n=n, ) + @pytest.mark.parametrize("model", ["vertex_ai/gemini-pro", "anthropic/claude-3-5-sonnet-20240620"]) @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion_different_models") def test_litellm_completion_different_models(litellm, request_vcr, model): @@ -83,7 +89,7 @@ def test_litellm_completion_different_models(litellm, request_vcr, model): aws_secret_access_key = "z99X8m2gwAHrXGH8Owd0gzHq5ndHahRW0nEU8xbu" aws_region_name = "us-east-1" with request_vcr.use_cassette(f"completion_{model}.yaml"): - messages = [{ "content": "Hey, what is up?","role": "user"}] + messages = [{"content": "Hey, what is up?", "role": "user"}] litellm.completion( model=model, messages=messages, @@ -92,4 +98,4 @@ def test_litellm_completion_different_models(litellm, request_vcr, model): aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, aws_region_name=aws_region_name, - ) \ No newline at end of file + ) diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py index f95503dce6c..62eb8fe6334 100644 --- a/tests/contrib/litellm/utils.py +++ b/tests/contrib/litellm/utils.py @@ -3,6 +3,7 @@ CASETTE_EXTENSION = ".yaml" + # VCR is used to capture and store network requests made to Anthropic. # This is done to avoid making real calls to the API which could introduce # flakiness and cost. @@ -16,8 +17,9 @@ def get_request_vcr(): ignore_localhost=True, ) + # Get the name of the cassette to use for a given test -# All LiteLLM requests that use Open AI get routed to the chat completions endpoint, +# All LiteLLM requests that use Open AI get routed to the chat completions endpoint, # so we can reuse the same cassette for each combination of stream and n def get_cassette_name(stream, n): stream_suffix = "_stream" if stream else "" From b95d96f82dcc7ed421993c22399748cd55b6ff6e Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Fri, 21 Mar 2025 15:57:39 -0400 Subject: [PATCH 11/61] fix requirements lock file --- lib-injection/sources/min_compatible_versions.csv | 2 ++ min_compatible_versions.csv | 2 ++ 2 files changed, 4 insertions(+) diff --git a/lib-injection/sources/min_compatible_versions.csv b/lib-injection/sources/min_compatible_versions.csv index ee7990e276a..24a7adb2052 100644 --- a/lib-injection/sources/min_compatible_versions.csv +++ b/lib-injection/sources/min_compatible_versions.csv @@ -67,6 +67,7 @@ flask-caching,~=1.10.0 flask-openapi3,0 gevent,~=20.12.0 google-ai-generativelanguage,0 +google-auth,0 google-generativeai,0 googleapis-common-protos,0 graphene,~=3.0.0 @@ -94,6 +95,7 @@ langchain-core,==0.1.52 langchain-openai,==0.1.6 langchain-pinecone,==0.1.0 langgraph,~=0.2.60 +litellm,0 logbook,~=1.0.0 loguru,~=0.4.0 lxml,0 diff --git a/min_compatible_versions.csv b/min_compatible_versions.csv index ee7990e276a..24a7adb2052 100644 --- a/min_compatible_versions.csv +++ b/min_compatible_versions.csv @@ -67,6 +67,7 @@ flask-caching,~=1.10.0 flask-openapi3,0 gevent,~=20.12.0 google-ai-generativelanguage,0 +google-auth,0 google-generativeai,0 googleapis-common-protos,0 graphene,~=3.0.0 @@ -94,6 +95,7 @@ langchain-core,==0.1.52 langchain-openai,==0.1.6 langchain-pinecone,==0.1.0 langgraph,~=0.2.60 +litellm,0 logbook,~=1.0.0 loguru,~=0.4.0 lxml,0 From 9f63ceeb0318257a3f13fa590562f95656e3a56b Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Fri, 21 Mar 2025 16:02:14 -0400 Subject: [PATCH 12/61] remove unnecessary bedrock credentials in tests --- tests/contrib/litellm/test_litellm.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py index 4d83d1c2209..6cb65fd69f6 100644 --- a/tests/contrib/litellm/test_litellm.py +++ b/tests/contrib/litellm/test_litellm.py @@ -81,13 +81,6 @@ async def test_litellm_atext_completion(litellm, request_vcr, stream, n): @pytest.mark.parametrize("model", ["vertex_ai/gemini-pro", "anthropic/claude-3-5-sonnet-20240620"]) @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion_different_models") def test_litellm_completion_different_models(litellm, request_vcr, model): - aws_access_key_id = "" - aws_secret_access_key = "" - aws_region_name = "" - if model == "bedrock/amazon.titan-text-lite-v1": - aws_access_key_id = "ASIAWYLNJGWWOJPUYN45" - aws_secret_access_key = "z99X8m2gwAHrXGH8Owd0gzHq5ndHahRW0nEU8xbu" - aws_region_name = "us-east-1" with request_vcr.use_cassette(f"completion_{model}.yaml"): messages = [{"content": "Hey, what is up?", "role": "user"}] litellm.completion( @@ -95,7 +88,4 @@ def test_litellm_completion_different_models(litellm, request_vcr, model): messages=messages, stream=False, n=1, - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_region_name=aws_region_name, ) From a066c99f2d0bfb98c6ccb1cf6d22d2af5c89cc44 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Sun, 23 Mar 2025 14:25:53 -0400 Subject: [PATCH 13/61] add documentation --- ddtrace/contrib/_litellm.py | 47 +++++++++++++++++++++++ ddtrace/contrib/internal/litellm/utils.py | 2 +- ddtrace/contrib/litellm/__init__.py | 14 ------- ddtrace/llmobs/_integrations/litellm.py | 5 +-- 4 files changed, 49 insertions(+), 19 deletions(-) create mode 100644 ddtrace/contrib/_litellm.py delete mode 100644 ddtrace/contrib/litellm/__init__.py diff --git a/ddtrace/contrib/_litellm.py b/ddtrace/contrib/_litellm.py new file mode 100644 index 00000000000..026d11250ca --- /dev/null +++ b/ddtrace/contrib/_litellm.py @@ -0,0 +1,47 @@ +""" +The LiteLLM integration instruments the LiteLLM Python SDK's sync and async ``completion`` and ``text_completion`` methods. + +All traces submitted from the LiteLLM integration are tagged by: + +- ``service``, ``env``, ``version``: see the `Unified Service Tagging docs `_. +- ``litellm.request.model``: Model used in the request. This may be just the model name (e.g. ``gpt-3.5-turbo``) or the model name with the route defined (e.g. ``openai/gpt-3.5-turbo``). +- ``litellm.request.host``: Host where the request is sent (if specified). + + +Enabling +~~~~~~~~ + +The LiteLLM integration is enabled automatically when you use +:ref:`ddtrace-run` or :ref:`import ddtrace.auto`. + +Alternatively, use :func:`patch() ` to manually enable the LiteLLM integration:: + + from ddtrace import config, patch + + patch(litellm=True) + + +Global Configuration +~~~~~~~~~~~~~~~~~~~~ + +.. py:data:: ddtrace.config.litellm["service"] + + The service name reported by default for LiteLLM requests. + + Alternatively, you can set this option with the ``DD_SERVICE`` or ``DD_LITELLM_SERVICE`` environment + variables. + + Default: ``DD_SERVICE`` + + +Instance Configuration +~~~~~~~~~~~~~~~~~~~~~~ + +To configure the LiteLLM integration on a per-instance basis use the +``Pin`` API:: + + import litellm + from ddtrace import Pin, config + + Pin.override(litellm, service="my-litellm-service") +""" # noqa: E501 \ No newline at end of file diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index f4290ae06a2..bd6ec1aa289 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -1,3 +1,3 @@ def tag_request(span, kwargs): if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]: - span.set_tag_str("litellm.host", kwargs["metadata"]["headers"]["host"]) + span.set_tag_str("litellm.request.host", kwargs["metadata"]["headers"]["host"]) diff --git a/ddtrace/contrib/litellm/__init__.py b/ddtrace/contrib/litellm/__init__.py deleted file mode 100644 index 0ca7e276331..00000000000 --- a/ddtrace/contrib/litellm/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# TODO: documentation - -from ddtrace.internal.utils.importlib import require_modules - - -required_modules = ["litellm"] - -with require_modules(required_modules) as missing_modules: - if not missing_modules: - from ddtrace.contrib.internal.litellm.patch import get_version - from ddtrace.contrib.internal.litellm.patch import patch - from ddtrace.contrib.internal.litellm.patch import unpatch - - __all__ = ["patch", "unpatch", "get_version"] diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index fd448ca9491..4dba18be518 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -8,12 +8,9 @@ class LiteLLMIntegration(BaseLLMIntegration): _integration_name = "litellm" - _provider_map = {} def _set_base_span_tags( - self, span: Span, provider: Optional[str] = None, model: Optional[str] = None, **kwargs: Dict[str, Any] + self, span: Span, model: Optional[str] = None, **kwargs: Dict[str, Any] ) -> None: - if provider is not None: - span.set_tag_str("litellm.request.provider", provider) if model is not None: span.set_tag_str("litellm.request.model", model) From 010f85e527dcac9b5f91dafbe66e63998b266632 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 25 Mar 2025 12:46:23 -0400 Subject: [PATCH 14/61] add llmobs base span tag method for litellm --- ddtrace/contrib/internal/litellm/patch.py | 3 +-- ddtrace/llmobs/_integrations/litellm.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index de258c3080a..e35f48f0d4e 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -20,8 +20,7 @@ ) -def get_version(): - # type: () -> str +def get_version() -> str: try: return version("litellm") except Exception: diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index 4dba18be518..7f823dea327 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -1,16 +1,39 @@ from typing import Any from typing import Dict +from typing import List from typing import Optional +from ddtrace.llmobs._constants import METRICS +from ddtrace.llmobs._constants import MODEL_NAME +from ddtrace.llmobs._constants import MODEL_PROVIDER +from ddtrace.llmobs._constants import SPAN_KIND from ddtrace.trace import Span from ddtrace.llmobs._integrations.base import BaseLLMIntegration class LiteLLMIntegration(BaseLLMIntegration): _integration_name = "litellm" + _provider_map = {} def _set_base_span_tags( self, span: Span, model: Optional[str] = None, **kwargs: Dict[str, Any] ) -> None: if model is not None: span.set_tag_str("litellm.request.model", model) + + def _llmobs_set_tags( + self, + span: Span, + args: List[Any], + kwargs: Dict[str, Any], + response: Optional[Any] = None, + operation: str = "", + ) -> None: + model_name = span.get_tag("litellm.request.model") + + # TODO: populate the provider map + model_provider = self._provider_map.get(model_name, "") + + span._set_ctx_items( + {SPAN_KIND: "llm", MODEL_NAME: model_name or "", MODEL_PROVIDER: model_provider} + ) \ No newline at end of file From 528a0ec0ae0be9691f17ee3b165d51590ca152ff Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 25 Mar 2025 16:34:40 -0400 Subject: [PATCH 15/61] add provider tagging and stream capturing support --- ddtrace/contrib/internal/litellm/patch.py | 74 ++++++++-- ddtrace/contrib/internal/litellm/utils.py | 170 ++++++++++++++++++++++ ddtrace/llmobs/_integrations/litellm.py | 7 +- 3 files changed, 239 insertions(+), 12 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index e35f48f0d4e..67e2af78897 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -9,6 +9,8 @@ from ddtrace.contrib.trace_utils import with_traced_module from ddtrace.contrib.trace_utils import wrap from ddtrace.contrib.internal.litellm.utils import tag_request +from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMStream +from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMAsyncStream from ddtrace.llmobs._integrations import LiteLLMIntegration from ddtrace.trace import Pin from ddtrace.internal.utils import get_argument_value @@ -16,7 +18,10 @@ config._add( "litellm", - {}, + { + "span_prompt_completion_sample_rate": float(os.getenv("DD_LITELLM_SPAN_PROMPT_COMPLETION_SAMPLE_RATE", 1.0)), + "span_char_limit": int(os.getenv("DD_LITELLM_SPAN_CHAR_LIMIT", 128)), + }, ) @@ -29,43 +34,92 @@ def get_version() -> str: def _create_span(litellm, pin, func, instance, args, kwargs): """Helper function to create and configure a traced span.""" - integration = litellm._datadog_integration model = get_argument_value(args, kwargs, 0, "model", None) + integration = litellm._datadog_integration + base_url = kwargs.get("api_base", None) span = integration.trace( pin, "litellm.%s" % func.__name__, model=model, - submit_to_llmobs=False, + submit_to_llmobs=integration.should_submit_to_llmobs(base_url), ) return span @with_traced_module def traced_completion(litellm, pin, func, instance, args, kwargs): + return _traced_completion(litellm, pin, func, instance, args, kwargs, False) + +@with_traced_module +async def traced_acompletion(litellm, pin, func, instance, args, kwargs): + return await _traced_acompletion(litellm, pin, func, instance, args, kwargs, False) + +@with_traced_module +def traced_text_completion(litellm, pin, func, instance, args, kwargs): + return _traced_completion(litellm, pin, func, instance, args, kwargs, True) + +@with_traced_module +async def traced_atext_completion(litellm, pin, func, instance, args, kwargs): + return await _traced_acompletion(litellm, pin, func, instance, args, kwargs) + + +def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion): + integration = litellm._datadog_integration span = _create_span(litellm, pin, func, instance, args, kwargs) + stream = kwargs.get("stream", False) tag_request(span, kwargs) + resp = None try: - return func(*args, **kwargs) + resp = func(*args, **kwargs) + if stream: + return TracedLiteLLMStream( + resp, integration, span, args, kwargs, is_completion + ) + return resp except Exception: span.set_exc_info(*sys.exc_info()) raise finally: + # streamed spans will be finished separately once the stream generator is exhausted + if span.error or not stream: + integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp) span.finish() -@with_traced_module -async def traced_acompletion(litellm, pin, func, instance, args, kwargs): +async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_completion): + integration = litellm._datadog_integration span = _create_span(litellm, pin, func, instance, args, kwargs) + stream = kwargs.get("stream", False) tag_request(span, kwargs) + resp = None try: - return await func(*args, **kwargs) + resp = await func(*args, **kwargs) + if stream: + return TracedLiteLLMAsyncStream( + resp, integration, span, args, kwargs, is_completion + ) + return resp except Exception: span.set_exc_info(*sys.exc_info()) raise finally: + # streamed spans will be finished separately once the stream generator is exhausted + if span.error or not stream: + integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp) span.finish() +@with_traced_module +def traced_get_llm_provider(litellm, pin, func, instance, args, kwargs): + requested_model = get_argument_value(args, kwargs, 0, "model", None) + integration = litellm._datadog_integration + model, custom_llm_provider, dynamic_api_key, api_base = func(*args, **kwargs) + # Store the provider information in the integration + integration._provider_map[requested_model] = custom_llm_provider + integration._provider_map[model] = custom_llm_provider + return model, custom_llm_provider, dynamic_api_key, api_base + + def patch(): if getattr(litellm, "_datadog_patch", False): return @@ -78,8 +132,9 @@ def patch(): wrap("litellm", "completion", traced_completion(litellm)) wrap("litellm", "acompletion", traced_acompletion(litellm)) - wrap("litellm", "text_completion", traced_completion(litellm)) - wrap("litellm", "atext_completion", traced_acompletion(litellm)) + wrap("litellm", "text_completion", traced_text_completion(litellm)) + wrap("litellm", "atext_completion", traced_atext_completion(litellm)) + wrap("litellm", "get_llm_provider", traced_get_llm_provider(litellm)) def unpatch(): @@ -92,5 +147,6 @@ def unpatch(): unwrap(litellm, "acompletion") unwrap(litellm, "text_completion") unwrap(litellm, "atext_completion") + unwrap(litellm, "get_llm_provider") delattr(litellm, "_datadog_integration") diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index bd6ec1aa289..d16d33f5ddd 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -1,3 +1,173 @@ +import sys +from typing import Any +from typing import Dict +from typing import List + +from ddtrace.internal.logger import get_logger + +log = get_logger(__name__) + def tag_request(span, kwargs): if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]: span.set_tag_str("litellm.request.host", kwargs["metadata"]["headers"]["host"]) + +class BaseTracedLiteLLMStream: + def __init__(self, generator, integration, span, args, kwargs, is_completion=False): + n = kwargs.get("n", 1) or 1 + self._generator = generator + self._dd_integration = integration + self._dd_span = span + self._args = args + self._kwargs = kwargs + self._streamed_chunks = [[] for _ in range(n)] + self._is_completion = is_completion + + +class TracedLiteLLMStream(BaseTracedLiteLLMStream): + def __enter__(self): + self._generator.__enter__() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._generator.__exit__(exc_type, exc_val, exc_tb) + + def __iter__(self): + exception_raised = False + try: + for chunk in self._generator: + self._extract_token_chunk(chunk) + yield chunk + _loop_handler(chunk, self._streamed_chunks) + except Exception: + self._dd_span.set_exc_info(*sys.exc_info()) + exception_raised = True + raise + finally: + if not exception_raised: + _process_finished_stream( + self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion + ) + self._dd_span.finish() + +class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream): + async def __aenter__(self): + await self._generator.__aenter__() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self._generator.__aexit__(exc_type, exc_val, exc_tb) + + async def __aiter__(self): + exception_raised = False + try: + async for chunk in self._generator: + yield chunk + _loop_handler(chunk, self._streamed_chunks) + except Exception: + self._dd_span.set_exc_info(*sys.exc_info()) + exception_raised = True + raise + finally: + if not exception_raised: + _process_finished_stream( + self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion + ) + self._dd_span.finish() + +def _loop_handler(chunk, streamed_chunks): + """Appends the chunk to the correct index in the streamed_chunks list. + + When handling a streamed chat/completion response, this function is called for each chunk in the streamed response. + """ + for choice in chunk.choices: + streamed_chunks[choice.index].append(choice) + if getattr(chunk, "usage", None): + streamed_chunks[0].insert(0, chunk) + + +def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_completion=False): + try: + if is_completion: + formatted_completions = [_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks] + else: + formatted_completions = [_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks] + operation = "completion" if is_completion else "chat" + integration.llmobs_set_tags(span, args=[], kwargs=kwargs, response=formatted_completions, operation=operation) + except Exception: + log.warning("Error processing streamed completion/chat response.", exc_info=True) + + +def _construct_completion_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]: + """Constructs a completion dictionary of form {"text": "...", "finish_reason": "..."} from streamed chunks.""" + if not streamed_chunks: + return {"text": ""} + completion = {"text": "".join(c.text for c in streamed_chunks if getattr(c, "text", None))} + if streamed_chunks[-1].finish_reason is not None: + completion["finish_reason"] = streamed_chunks[-1].finish_reason + if hasattr(streamed_chunks[0], "usage"): + completion["usage"] = streamed_chunks[0].usage + return completion + + +def _construct_tool_call_from_streamed_chunk(stored_tool_calls, tool_call_chunk=None, function_call_chunk=None): + """Builds a tool_call dictionary from streamed function_call/tool_call chunks.""" + if function_call_chunk: + if not stored_tool_calls: + stored_tool_calls.append({"name": getattr(function_call_chunk, "name", ""), "arguments": ""}) + stored_tool_calls[0]["arguments"] += getattr(function_call_chunk, "arguments", "") + return + if not tool_call_chunk: + return + tool_call_idx = getattr(tool_call_chunk, "index", None) + tool_id = getattr(tool_call_chunk, "id", None) + tool_type = getattr(tool_call_chunk, "type", None) + function_call = getattr(tool_call_chunk, "function", None) + function_name = getattr(function_call, "name", "") + # Find tool call index in tool_calls list, as it may potentially arrive unordered (i.e. index 2 before 0) + list_idx = next( + (idx for idx, tool_call in enumerate(stored_tool_calls) if tool_call["index"] == tool_call_idx), + None, + ) + if list_idx is None: + stored_tool_calls.append( + {"name": function_name, "arguments": "", "index": tool_call_idx, "tool_id": tool_id, "type": tool_type} + ) + list_idx = -1 + stored_tool_calls[list_idx]["arguments"] += getattr(function_call, "arguments", "") + + +def _construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]: + """Constructs a chat completion message dictionary from streamed chunks. + The resulting message dictionary is of form: + {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."} + """ + message = {"content": "", "tool_calls": []} + for chunk in streamed_chunks: + if getattr(chunk, "usage", None): + message["usage"] = chunk.usage + if not hasattr(chunk, "delta"): + continue + if getattr(chunk, "index", None) and not message.get("index"): + message["index"] = chunk.index + if getattr(chunk.delta, "role") and not message.get("role"): + message["role"] = chunk.delta.role + if getattr(chunk, "finish_reason", None) and not message.get("finish_reason"): + message["finish_reason"] = chunk.finish_reason + chunk_content = getattr(chunk.delta, "content", "") + if chunk_content: + message["content"] += chunk_content + continue + function_call = getattr(chunk.delta, "function_call", None) + if function_call: + _construct_tool_call_from_streamed_chunk(message["tool_calls"], function_call_chunk=function_call) + tool_calls = getattr(chunk.delta, "tool_calls", None) + if not tool_calls: + continue + for tool_call in tool_calls: + _construct_tool_call_from_streamed_chunk(message["tool_calls"], tool_call_chunk=tool_call) + if message["tool_calls"]: + message["tool_calls"].sort(key=lambda x: x.get("index", 0)) + else: + message.pop("tool_calls", None) + message["content"] = message["content"].strip() + return message diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index 7f823dea327..c2f0db2c890 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -30,10 +30,11 @@ def _llmobs_set_tags( operation: str = "", ) -> None: model_name = span.get_tag("litellm.request.model") - - # TODO: populate the provider map model_provider = self._provider_map.get(model_name, "") span._set_ctx_items( {SPAN_KIND: "llm", MODEL_NAME: model_name or "", MODEL_PROVIDER: model_provider} - ) \ No newline at end of file + ) + + def should_submit_to_llmobs(self, base_url: Optional[str] = None) -> bool: + return base_url is None From 4b16a4c72b41d4b3f0b64c153d051bf7b4614121 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 25 Mar 2025 17:38:54 -0400 Subject: [PATCH 16/61] attach usage metrics to llmobs spans --- ddtrace/contrib/internal/litellm/patch.py | 2 - ddtrace/contrib/internal/litellm/utils.py | 31 +++++++++ ddtrace/llmobs/_integrations/litellm.py | 28 +++++++- ddtrace/llmobs/_integrations/openai.py | 6 +- ddtrace/llmobs/_integrations/utils.py | 83 ++++++++++++++++++++++- 5 files changed, 141 insertions(+), 9 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 67e2af78897..fbd96f4a156 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -83,7 +83,6 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion # streamed spans will be finished separately once the stream generator is exhausted if span.error or not stream: integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp) - span.finish() async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_completion): @@ -106,7 +105,6 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com # streamed spans will be finished separately once the stream generator is exhausted if span.error or not stream: integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp) - span.finish() @with_traced_module diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index d16d33f5ddd..51b36888dca 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -48,6 +48,23 @@ def __iter__(self): self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion ) self._dd_span.finish() + + def _extract_token_chunk(self, chunk): + """Attempt to extract the token chunk (last chunk in the stream) from the streamed response.""" + choices = getattr(chunk, "choices") + if not choices: + return + choice = choices[0] + if not getattr(choice, "finish_reason", None): + # Only the second-last chunk in the stream with token usage enabled will have finish_reason set + return + try: + # User isn't expecting last token chunk to be present since it's not part of the default streamed response, + # so we consume it and extract the token usage metadata before it reaches the user. + usage_chunk = self._generator.__next__() + self._streamed_chunks[0].insert(0, usage_chunk) + except (StopIteration, GeneratorExit): + return class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream): async def __aenter__(self): @@ -61,6 +78,7 @@ async def __aiter__(self): exception_raised = False try: async for chunk in self._generator: + self._extract_token_chunk(chunk) yield chunk _loop_handler(chunk, self._streamed_chunks) except Exception: @@ -73,6 +91,19 @@ async def __aiter__(self): self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion ) self._dd_span.finish() + + async def _extract_token_chunk(self, chunk): + choices = getattr(chunk, "choices") + if not choices: + return + choice = choices[0] + if not getattr(choice, "finish_reason", None): + return + try: + usage_chunk = await self._generator.__anext__() + self._streamed_chunks[0].insert(0, usage_chunk) + except (StopAsyncIteration, GeneratorExit): + return def _loop_handler(chunk, streamed_chunks): """Appends the chunk to the correct index in the streamed_chunks list. diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index c2f0db2c890..f3ba73763cf 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -3,10 +3,12 @@ from typing import List from typing import Optional -from ddtrace.llmobs._constants import METRICS +from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY, METRICS, OUTPUT_TOKENS_METRIC_KEY, TOTAL_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import MODEL_NAME from ddtrace.llmobs._constants import MODEL_PROVIDER from ddtrace.llmobs._constants import SPAN_KIND +from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags, openai_set_meta_tags_from_chat, openai_set_meta_tags_from_completion +from ddtrace.llmobs._utils import _get_attr from ddtrace.trace import Span from ddtrace.llmobs._integrations.base import BaseLLMIntegration @@ -32,9 +34,31 @@ def _llmobs_set_tags( model_name = span.get_tag("litellm.request.model") model_provider = self._provider_map.get(model_name, "") + # response format will match Open AI + if operation == "completion": + openai_set_meta_tags_from_completion(span, kwargs, response) + else: + openai_set_meta_tags_from_chat(span, kwargs, response) + + metrics = self._extract_llmobs_metrics(response) span._set_ctx_items( - {SPAN_KIND: "llm", MODEL_NAME: model_name or "", MODEL_PROVIDER: model_provider} + {SPAN_KIND: "llm", MODEL_NAME: model_name or "", MODEL_PROVIDER: model_provider, METRICS: metrics} ) + @staticmethod + def _extract_llmobs_metrics(resp: Any) -> Dict[str, Any]: + if isinstance(resp, list): + token_usage = _get_attr(resp[0], "usage", None) + else: + token_usage = _get_attr(resp, "usage", None) + if token_usage is not None: + prompt_tokens = _get_attr(token_usage, "prompt_tokens", 0) + completion_tokens = _get_attr(token_usage, "completion_tokens", 0) + return { + INPUT_TOKENS_METRIC_KEY: prompt_tokens, + OUTPUT_TOKENS_METRIC_KEY: completion_tokens, + TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens, + } + def should_submit_to_llmobs(self, base_url: Optional[str] = None) -> bool: return base_url is None diff --git a/ddtrace/llmobs/_integrations/openai.py b/ddtrace/llmobs/_integrations/openai.py index 336cea8fde3..b0ae3fc8e17 100644 --- a/ddtrace/llmobs/_integrations/openai.py +++ b/ddtrace/llmobs/_integrations/openai.py @@ -20,7 +20,7 @@ from ddtrace.llmobs._constants import SPAN_KIND from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY from ddtrace.llmobs._integrations.base import BaseLLMIntegration -from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags +from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags, openai_set_meta_tags_from_chat, openai_set_meta_tags_from_completion from ddtrace.llmobs._integrations.utils import is_openai_default_base_url from ddtrace.llmobs._utils import _get_attr from ddtrace.llmobs.utils import Document @@ -124,9 +124,9 @@ def _llmobs_set_tags( model_provider = "deepseek" if operation == "completion": - self._llmobs_set_meta_tags_from_completion(span, kwargs, response) + openai_set_meta_tags_from_completion(span, kwargs, response) elif operation == "chat": - self._llmobs_set_meta_tags_from_chat(span, kwargs, response) + openai_set_meta_tags_from_chat(span, kwargs, response) elif operation == "embedding": self._llmobs_set_meta_tags_from_embedding(span, kwargs, response) metrics = self._extract_llmobs_metrics_tags(span, response) diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index c5531deb2ad..a0e41082400 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -1,10 +1,12 @@ +import json import re -from typing import Optional +from typing import Any, Dict, Optional from typing import Tuple from typing import Union from urllib.parse import urlparse -from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY +from ddtrace._trace.span import Span +from ddtrace.llmobs._constants import INPUT_MESSAGES, INPUT_TOKENS_METRIC_KEY, METADATA, OUTPUT_MESSAGES from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY from ddtrace.llmobs._utils import _get_attr @@ -270,3 +272,80 @@ def get_messages_from_converse_content(role: str, content: list): if message: messages.append(message) return messages + + +def openai_set_meta_tags_from_completion(span: Span, kwargs: Dict[str, Any], completions: Any) -> None: + """Extract prompt/response tags from a completion and set them as temporary "_ml_obs.meta.*" tags.""" + prompt = kwargs.get("prompt", "") + if isinstance(prompt, str): + prompt = [prompt] + parameters = {k: v for k, v in kwargs.items() if k not in ("model", "prompt", "api_key", "user_api_key", "user_api_key_hash")} + output_messages = [{"content": ""}] + if not span.error and completions: + choices = getattr(completions, "choices", completions) + output_messages = [{"content": _get_attr(choice, "text", "")} for choice in choices] + span._set_ctx_items( + { + INPUT_MESSAGES: [{"content": str(p)} for p in prompt], + METADATA: parameters, + OUTPUT_MESSAGES: output_messages, + } + ) + +def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: Optional[Any]) -> None: + """Extract prompt/response tags from a chat completion and set them as temporary "_ml_obs.meta.*" tags.""" + input_messages = [] + for m in kwargs.get("messages", []): + input_messages.append({"content": str(_get_attr(m, "content", "")), "role": str(_get_attr(m, "role", ""))}) + parameters = {k: v for k, v in kwargs.items() if k not in ("model", "messages", "tools", "functions", "api_key", "user_api_key", "user_api_key_hash")} + span._set_ctx_items({INPUT_MESSAGES: input_messages, METADATA: parameters}) + + if span.error or not messages: + span._set_ctx_item(OUTPUT_MESSAGES, [{"content": ""}]) + return + if isinstance(messages, list): # streamed response + output_messages = [] + for streamed_message in messages: + message = {"content": streamed_message["content"], "role": streamed_message["role"]} + tool_calls = streamed_message.get("tool_calls", []) + if tool_calls: + message["tool_calls"] = [ + { + "name": tool_call.get("name", ""), + "arguments": json.loads(tool_call.get("arguments", "")), + "tool_id": tool_call.get("tool_id", ""), + "type": tool_call.get("type", ""), + } + for tool_call in tool_calls + ] + output_messages.append(message) + span._set_ctx_item(OUTPUT_MESSAGES, output_messages) + return + choices = _get_attr(messages, "choices", []) + output_messages = [] + for idx, choice in enumerate(choices): + tool_calls_info = [] + choice_message = _get_attr(choice, "message", {}) + role = _get_attr(choice_message, "role", "") + content = _get_attr(choice_message, "content", "") or "" + function_call = _get_attr(choice_message, "function_call", None) + if function_call: + function_name = _get_attr(function_call, "name", "") + arguments = json.loads(_get_attr(function_call, "arguments", "")) + function_call_info = {"name": function_name, "arguments": arguments} + output_messages.append({"content": content, "role": role, "tool_calls": [function_call_info]}) + continue + tool_calls = _get_attr(choice_message, "tool_calls", []) or [] + for tool_call in tool_calls: + tool_call_info = { + "name": getattr(tool_call.function, "name", ""), + "arguments": json.loads(getattr(tool_call.function, "arguments", "")), + "tool_id": getattr(tool_call, "id", ""), + "type": getattr(tool_call, "type", ""), + } + tool_calls_info.append(tool_call_info) + if tool_calls_info: + output_messages.append({"content": content, "role": role, "tool_calls": tool_calls_info}) + continue + output_messages.append({"content": content, "role": role}) + span._set_ctx_item(OUTPUT_MESSAGES, output_messages) From 0a6b744d29d9d15652d68a788e303657a9ec0164 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 25 Mar 2025 17:47:59 -0400 Subject: [PATCH 17/61] finish non streamed spans --- ddtrace/contrib/internal/litellm/patch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index fbd96f4a156..19cb3da51af 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -83,6 +83,7 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion # streamed spans will be finished separately once the stream generator is exhausted if span.error or not stream: integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp) + span.finish() async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_completion): @@ -105,6 +106,7 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com # streamed spans will be finished separately once the stream generator is exhausted if span.error or not stream: integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp) + span.finish() @with_traced_module From 33aae75d2ce0ee1829b052f4059524bed8903250 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Wed, 26 Mar 2025 09:51:57 -0400 Subject: [PATCH 18/61] use sample pc config --- ddtrace/contrib/internal/litellm/patch.py | 6 ++++-- ddtrace/contrib/internal/litellm/utils.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 19cb3da51af..798040020ec 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -82,7 +82,8 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion finally: # streamed spans will be finished separately once the stream generator is exhausted if span.error or not stream: - integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp) + if integration.is_pc_sampled_llmobs(span): + integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp) span.finish() @@ -105,7 +106,8 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com finally: # streamed spans will be finished separately once the stream generator is exhausted if span.error or not stream: - integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp) + if integration.is_pc_sampled_llmobs(span): + integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp) span.finish() diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index 51b36888dca..1f326105ffc 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -123,7 +123,8 @@ def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_comp else: formatted_completions = [_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks] operation = "completion" if is_completion else "chat" - integration.llmobs_set_tags(span, args=[], kwargs=kwargs, response=formatted_completions, operation=operation) + if integration.is_pc_sampled_llmobs(span): + integration.llmobs_set_tags(span, args=[], kwargs=kwargs, response=formatted_completions, operation=operation) except Exception: log.warning("Error processing streamed completion/chat response.", exc_info=True) From 184d728f8247d031bfe1b4046b712ef8c9cf3d9b Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Wed, 26 Mar 2025 10:04:02 -0400 Subject: [PATCH 19/61] move openai message parsing utils to shared utils file --- ddtrace/contrib/internal/litellm/utils.py | 81 +--------------------- ddtrace/contrib/internal/openai/utils.py | 82 +---------------------- ddtrace/llmobs/_integrations/utils.py | 77 ++++++++++++++++++++- 3 files changed, 82 insertions(+), 158 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index 1f326105ffc..26803f1f5ab 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -4,6 +4,7 @@ from typing import List from ddtrace.internal.logger import get_logger +from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks, openai_construct_message_from_streamed_chunks log = get_logger(__name__) @@ -119,87 +120,11 @@ def _loop_handler(chunk, streamed_chunks): def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_completion=False): try: if is_completion: - formatted_completions = [_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks] + formatted_completions = [openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks] else: - formatted_completions = [_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks] + formatted_completions = [openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks] operation = "completion" if is_completion else "chat" if integration.is_pc_sampled_llmobs(span): integration.llmobs_set_tags(span, args=[], kwargs=kwargs, response=formatted_completions, operation=operation) except Exception: log.warning("Error processing streamed completion/chat response.", exc_info=True) - - -def _construct_completion_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]: - """Constructs a completion dictionary of form {"text": "...", "finish_reason": "..."} from streamed chunks.""" - if not streamed_chunks: - return {"text": ""} - completion = {"text": "".join(c.text for c in streamed_chunks if getattr(c, "text", None))} - if streamed_chunks[-1].finish_reason is not None: - completion["finish_reason"] = streamed_chunks[-1].finish_reason - if hasattr(streamed_chunks[0], "usage"): - completion["usage"] = streamed_chunks[0].usage - return completion - - -def _construct_tool_call_from_streamed_chunk(stored_tool_calls, tool_call_chunk=None, function_call_chunk=None): - """Builds a tool_call dictionary from streamed function_call/tool_call chunks.""" - if function_call_chunk: - if not stored_tool_calls: - stored_tool_calls.append({"name": getattr(function_call_chunk, "name", ""), "arguments": ""}) - stored_tool_calls[0]["arguments"] += getattr(function_call_chunk, "arguments", "") - return - if not tool_call_chunk: - return - tool_call_idx = getattr(tool_call_chunk, "index", None) - tool_id = getattr(tool_call_chunk, "id", None) - tool_type = getattr(tool_call_chunk, "type", None) - function_call = getattr(tool_call_chunk, "function", None) - function_name = getattr(function_call, "name", "") - # Find tool call index in tool_calls list, as it may potentially arrive unordered (i.e. index 2 before 0) - list_idx = next( - (idx for idx, tool_call in enumerate(stored_tool_calls) if tool_call["index"] == tool_call_idx), - None, - ) - if list_idx is None: - stored_tool_calls.append( - {"name": function_name, "arguments": "", "index": tool_call_idx, "tool_id": tool_id, "type": tool_type} - ) - list_idx = -1 - stored_tool_calls[list_idx]["arguments"] += getattr(function_call, "arguments", "") - - -def _construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]: - """Constructs a chat completion message dictionary from streamed chunks. - The resulting message dictionary is of form: - {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."} - """ - message = {"content": "", "tool_calls": []} - for chunk in streamed_chunks: - if getattr(chunk, "usage", None): - message["usage"] = chunk.usage - if not hasattr(chunk, "delta"): - continue - if getattr(chunk, "index", None) and not message.get("index"): - message["index"] = chunk.index - if getattr(chunk.delta, "role") and not message.get("role"): - message["role"] = chunk.delta.role - if getattr(chunk, "finish_reason", None) and not message.get("finish_reason"): - message["finish_reason"] = chunk.finish_reason - chunk_content = getattr(chunk.delta, "content", "") - if chunk_content: - message["content"] += chunk_content - continue - function_call = getattr(chunk.delta, "function_call", None) - if function_call: - _construct_tool_call_from_streamed_chunk(message["tool_calls"], function_call_chunk=function_call) - tool_calls = getattr(chunk.delta, "tool_calls", None) - if not tool_calls: - continue - for tool_call in tool_calls: - _construct_tool_call_from_streamed_chunk(message["tool_calls"], tool_call_chunk=tool_call) - if message["tool_calls"]: - message["tool_calls"].sort(key=lambda x: x.get("index", 0)) - else: - message.pop("tool_calls", None) - message["content"] = message["content"].strip() - return message diff --git a/ddtrace/contrib/internal/openai/utils.py b/ddtrace/contrib/internal/openai/utils.py index c421d57c74c..5f4f227054e 100644 --- a/ddtrace/contrib/internal/openai/utils.py +++ b/ddtrace/contrib/internal/openai/utils.py @@ -6,6 +6,7 @@ from typing import Generator from typing import List +from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks, openai_construct_message_from_streamed_chunks import wrapt from ddtrace.internal.logger import get_logger @@ -265,9 +266,9 @@ def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_comp request_messages = kwargs.get("messages", None) try: if is_completion: - formatted_completions = [_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks] + formatted_completions = [openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks] else: - formatted_completions = [_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks] + formatted_completions = [openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks] if integration.is_pc_sampled_span(span): _tag_streamed_response(integration, span, formatted_completions) _set_token_metrics(span, formatted_completions, prompts, request_messages, kwargs) @@ -276,83 +277,6 @@ def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_comp except Exception: log.warning("Error processing streamed completion/chat response.", exc_info=True) - -def _construct_completion_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]: - """Constructs a completion dictionary of form {"text": "...", "finish_reason": "..."} from streamed chunks.""" - if not streamed_chunks: - return {"text": ""} - completion = {"text": "".join(c.text for c in streamed_chunks if getattr(c, "text", None))} - if streamed_chunks[-1].finish_reason is not None: - completion["finish_reason"] = streamed_chunks[-1].finish_reason - if hasattr(streamed_chunks[0], "usage"): - completion["usage"] = streamed_chunks[0].usage - return completion - - -def _construct_tool_call_from_streamed_chunk(stored_tool_calls, tool_call_chunk=None, function_call_chunk=None): - """Builds a tool_call dictionary from streamed function_call/tool_call chunks.""" - if function_call_chunk: - if not stored_tool_calls: - stored_tool_calls.append({"name": getattr(function_call_chunk, "name", ""), "arguments": ""}) - stored_tool_calls[0]["arguments"] += getattr(function_call_chunk, "arguments", "") - return - if not tool_call_chunk: - return - tool_call_idx = getattr(tool_call_chunk, "index", None) - tool_id = getattr(tool_call_chunk, "id", None) - tool_type = getattr(tool_call_chunk, "type", None) - function_call = getattr(tool_call_chunk, "function", None) - function_name = getattr(function_call, "name", "") - # Find tool call index in tool_calls list, as it may potentially arrive unordered (i.e. index 2 before 0) - list_idx = next( - (idx for idx, tool_call in enumerate(stored_tool_calls) if tool_call["index"] == tool_call_idx), - None, - ) - if list_idx is None: - stored_tool_calls.append( - {"name": function_name, "arguments": "", "index": tool_call_idx, "tool_id": tool_id, "type": tool_type} - ) - list_idx = -1 - stored_tool_calls[list_idx]["arguments"] += getattr(function_call, "arguments", "") - - -def _construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]: - """Constructs a chat completion message dictionary from streamed chunks. - The resulting message dictionary is of form: - {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."} - """ - message = {"content": "", "tool_calls": []} - for chunk in streamed_chunks: - if getattr(chunk, "usage", None): - message["usage"] = chunk.usage - if not hasattr(chunk, "delta"): - continue - if getattr(chunk, "index", None) and not message.get("index"): - message["index"] = chunk.index - if getattr(chunk.delta, "role") and not message.get("role"): - message["role"] = chunk.delta.role - if getattr(chunk, "finish_reason", None) and not message.get("finish_reason"): - message["finish_reason"] = chunk.finish_reason - chunk_content = getattr(chunk.delta, "content", "") - if chunk_content: - message["content"] += chunk_content - continue - function_call = getattr(chunk.delta, "function_call", None) - if function_call: - _construct_tool_call_from_streamed_chunk(message["tool_calls"], function_call_chunk=function_call) - tool_calls = getattr(chunk.delta, "tool_calls", None) - if not tool_calls: - continue - for tool_call in tool_calls: - _construct_tool_call_from_streamed_chunk(message["tool_calls"], tool_call_chunk=tool_call) - if message["tool_calls"]: - message["tool_calls"].sort(key=lambda x: x.get("index", 0)) - else: - message.pop("tool_calls", None) - message["content"] = message["content"].strip() - return message - - def _tag_streamed_response(integration, span, completions_or_messages=None): """Tagging logic for streamed completions and chat completions.""" for idx, choice in enumerate(completions_or_messages): diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index a0e41082400..477c15bfdee 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -1,6 +1,6 @@ import json import re -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from typing import Tuple from typing import Union from urllib.parse import urlparse @@ -349,3 +349,78 @@ def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: continue output_messages.append({"content": content, "role": role}) span._set_ctx_item(OUTPUT_MESSAGES, output_messages) + +def openai_construct_completion_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]: + """Constructs a completion dictionary of form {"text": "...", "finish_reason": "..."} from streamed chunks.""" + if not streamed_chunks: + return {"text": ""} + completion = {"text": "".join(c.text for c in streamed_chunks if getattr(c, "text", None))} + if streamed_chunks[-1].finish_reason is not None: + completion["finish_reason"] = streamed_chunks[-1].finish_reason + if hasattr(streamed_chunks[0], "usage"): + completion["usage"] = streamed_chunks[0].usage + return completion + + +def openai_construct_tool_call_from_streamed_chunk(stored_tool_calls, tool_call_chunk=None, function_call_chunk=None): + """Builds a tool_call dictionary from streamed function_call/tool_call chunks.""" + if function_call_chunk: + if not stored_tool_calls: + stored_tool_calls.append({"name": getattr(function_call_chunk, "name", ""), "arguments": ""}) + stored_tool_calls[0]["arguments"] += getattr(function_call_chunk, "arguments", "") + return + if not tool_call_chunk: + return + tool_call_idx = getattr(tool_call_chunk, "index", None) + tool_id = getattr(tool_call_chunk, "id", None) + tool_type = getattr(tool_call_chunk, "type", None) + function_call = getattr(tool_call_chunk, "function", None) + function_name = getattr(function_call, "name", "") + # Find tool call index in tool_calls list, as it may potentially arrive unordered (i.e. index 2 before 0) + list_idx = next( + (idx for idx, tool_call in enumerate(stored_tool_calls) if tool_call["index"] == tool_call_idx), + None, + ) + if list_idx is None: + stored_tool_calls.append( + {"name": function_name, "arguments": "", "index": tool_call_idx, "tool_id": tool_id, "type": tool_type} + ) + list_idx = -1 + stored_tool_calls[list_idx]["arguments"] += getattr(function_call, "arguments", "") + + +def openai_construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]: + """Constructs a chat completion message dictionary from streamed chunks. + The resulting message dictionary is of form: + {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."} + """ + message = {"content": "", "tool_calls": []} + for chunk in streamed_chunks: + if getattr(chunk, "usage", None): + message["usage"] = chunk.usage + if not hasattr(chunk, "delta"): + continue + if getattr(chunk, "index", None) and not message.get("index"): + message["index"] = chunk.index + if getattr(chunk.delta, "role") and not message.get("role"): + message["role"] = chunk.delta.role + if getattr(chunk, "finish_reason", None) and not message.get("finish_reason"): + message["finish_reason"] = chunk.finish_reason + chunk_content = getattr(chunk.delta, "content", "") + if chunk_content: + message["content"] += chunk_content + continue + function_call = getattr(chunk.delta, "function_call", None) + if function_call: + openai_construct_tool_call_from_streamed_chunk(message["tool_calls"], function_call_chunk=function_call) + tool_calls = getattr(chunk.delta, "tool_calls", None) + if not tool_calls: + continue + for tool_call in tool_calls: + openai_construct_tool_call_from_streamed_chunk(message["tool_calls"], tool_call_chunk=tool_call) + if message["tool_calls"]: + message["tool_calls"].sort(key=lambda x: x.get("index", 0)) + else: + message.pop("tool_calls", None) + message["content"] = message["content"].strip() + return message From a241160c5cd71279d5b5dac60a0b02d10dd6eccc Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Wed, 26 Mar 2025 10:47:08 -0400 Subject: [PATCH 20/61] reuse role for litellm streamed multi choice responses --- ddtrace/llmobs/_integrations/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index 477c15bfdee..855b0a829aa 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -304,9 +304,12 @@ def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: span._set_ctx_item(OUTPUT_MESSAGES, [{"content": ""}]) return if isinstance(messages, list): # streamed response + role = "" output_messages = [] for streamed_message in messages: - message = {"content": streamed_message["content"], "role": streamed_message["role"]} + # litellm roles appear only on the first choice, so store it to be used for all choices + role = streamed_message.get("role", "") or role + message = {"content": streamed_message.get("content", ""), "role": role} tool_calls = streamed_message.get("tool_calls", []) if tool_calls: message["tool_calls"] = [ From 2d2337a9f392dae9bf38bf5c5ef5b74e47c613e9 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Wed, 26 Mar 2025 11:21:37 -0400 Subject: [PATCH 21/61] pass operation to litellm llmobs set tags --- ddtrace/contrib/internal/litellm/patch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 798040020ec..1894e25d185 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -83,7 +83,7 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion # streamed spans will be finished separately once the stream generator is exhausted if span.error or not stream: if integration.is_pc_sampled_llmobs(span): - integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp) + integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat") span.finish() @@ -107,7 +107,7 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com # streamed spans will be finished separately once the stream generator is exhausted if span.error or not stream: if integration.is_pc_sampled_llmobs(span): - integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp) + integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat") span.finish() From 3891da90225d10797aead5a83cf6fd5cd8df5a22 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Wed, 26 Mar 2025 13:27:37 -0400 Subject: [PATCH 22/61] wrap get_llm_provider in litellm sdk --- ddtrace/contrib/internal/litellm/patch.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 1894e25d185..2ad1b2fdf10 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -137,6 +137,7 @@ def patch(): wrap("litellm", "text_completion", traced_text_completion(litellm)) wrap("litellm", "atext_completion", traced_atext_completion(litellm)) wrap("litellm", "get_llm_provider", traced_get_llm_provider(litellm)) + wrap("litellm", "litellm.main.get_llm_provider", traced_get_llm_provider(litellm)) def unpatch(): @@ -150,5 +151,6 @@ def unpatch(): unwrap(litellm, "text_completion") unwrap(litellm, "atext_completion") unwrap(litellm, "get_llm_provider") - + unwrap(litellm.litellm.main, "get_llm_provider") + delattr(litellm, "_datadog_integration") From df8449ea05d03081f3de8bef6ebdd027c2dfd42f Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 27 Mar 2025 08:34:36 -0400 Subject: [PATCH 23/61] update provider map to store parsed model name and provider --- ddtrace/contrib/internal/litellm/patch.py | 7 +++---- ddtrace/llmobs/_integrations/litellm.py | 5 +++-- ddtrace/llmobs/_llmobs.py | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 2ad1b2fdf10..c17bbcdd447 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -60,7 +60,7 @@ def traced_text_completion(litellm, pin, func, instance, args, kwargs): @with_traced_module async def traced_atext_completion(litellm, pin, func, instance, args, kwargs): - return await _traced_acompletion(litellm, pin, func, instance, args, kwargs) + return await _traced_acompletion(litellm, pin, func, instance, args, kwargs, True) def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion): @@ -117,8 +117,7 @@ def traced_get_llm_provider(litellm, pin, func, instance, args, kwargs): integration = litellm._datadog_integration model, custom_llm_provider, dynamic_api_key, api_base = func(*args, **kwargs) # Store the provider information in the integration - integration._provider_map[requested_model] = custom_llm_provider - integration._provider_map[model] = custom_llm_provider + integration._model_map[requested_model] = (model, custom_llm_provider) return model, custom_llm_provider, dynamic_api_key, api_base @@ -152,5 +151,5 @@ def unpatch(): unwrap(litellm, "atext_completion") unwrap(litellm, "get_llm_provider") unwrap(litellm.litellm.main, "get_llm_provider") - + delattr(litellm, "_datadog_integration") diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index f3ba73763cf..94eb1f2a579 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -15,7 +15,8 @@ class LiteLLMIntegration(BaseLLMIntegration): _integration_name = "litellm" - _provider_map = {} + # maps requested model name to parsed model name and provider + _model_map = {} def _set_base_span_tags( self, span: Span, model: Optional[str] = None, **kwargs: Dict[str, Any] @@ -32,7 +33,7 @@ def _llmobs_set_tags( operation: str = "", ) -> None: model_name = span.get_tag("litellm.request.model") - model_provider = self._provider_map.get(model_name, "") + _, model_provider = self._model_map.get(model_name, (model_name, "")) # response format will match Open AI if operation == "completion": diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py index 61ead184924..c3f278cf6d1 100644 --- a/ddtrace/llmobs/_llmobs.py +++ b/ddtrace/llmobs/_llmobs.py @@ -85,6 +85,7 @@ "google_generativeai": "google_generativeai", "vertexai": "vertexai", "langgraph": "langgraph", + "litellm": "litellm", } From 122cf458f8f7a00fd24d6878703a2046297c1255 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 27 Mar 2025 08:48:36 -0400 Subject: [PATCH 24/61] update model name based on model map --- ddtrace/llmobs/_integrations/litellm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index 94eb1f2a579..024720313f2 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -33,7 +33,7 @@ def _llmobs_set_tags( operation: str = "", ) -> None: model_name = span.get_tag("litellm.request.model") - _, model_provider = self._model_map.get(model_name, (model_name, "")) + model_name, model_provider = self._model_map.get(model_name, (model_name, "")) # response format will match Open AI if operation == "completion": From 92670c5c15925df84786fd3ef69e3f2d6675e895 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 27 Mar 2025 13:46:27 -0400 Subject: [PATCH 25/61] add llmobs test file and remove usage extraction logic --- ddtrace/contrib/internal/litellm/utils.py | 32 --- .../completion_stream_exclude_usage.yaml | 190 ++++++++++++++ ...stream_multiple_choices_exclude_usage.yaml | 242 ++++++++++++++++++ tests/contrib/litellm/conftest.py | 25 +- tests/contrib/litellm/test_litellm.py | 23 +- tests/contrib/litellm/test_litellm_llmobs.py | 47 ++++ tests/contrib/litellm/utils.py | 30 ++- 7 files changed, 548 insertions(+), 41 deletions(-) create mode 100644 tests/contrib/litellm/cassettes/completion_stream_exclude_usage.yaml create mode 100644 tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage.yaml create mode 100644 tests/contrib/litellm/test_litellm_llmobs.py diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index 26803f1f5ab..e854f814aa9 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -36,7 +36,6 @@ def __iter__(self): exception_raised = False try: for chunk in self._generator: - self._extract_token_chunk(chunk) yield chunk _loop_handler(chunk, self._streamed_chunks) except Exception: @@ -49,23 +48,6 @@ def __iter__(self): self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion ) self._dd_span.finish() - - def _extract_token_chunk(self, chunk): - """Attempt to extract the token chunk (last chunk in the stream) from the streamed response.""" - choices = getattr(chunk, "choices") - if not choices: - return - choice = choices[0] - if not getattr(choice, "finish_reason", None): - # Only the second-last chunk in the stream with token usage enabled will have finish_reason set - return - try: - # User isn't expecting last token chunk to be present since it's not part of the default streamed response, - # so we consume it and extract the token usage metadata before it reaches the user. - usage_chunk = self._generator.__next__() - self._streamed_chunks[0].insert(0, usage_chunk) - except (StopIteration, GeneratorExit): - return class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream): async def __aenter__(self): @@ -79,7 +61,6 @@ async def __aiter__(self): exception_raised = False try: async for chunk in self._generator: - self._extract_token_chunk(chunk) yield chunk _loop_handler(chunk, self._streamed_chunks) except Exception: @@ -92,19 +73,6 @@ async def __aiter__(self): self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion ) self._dd_span.finish() - - async def _extract_token_chunk(self, chunk): - choices = getattr(chunk, "choices") - if not choices: - return - choice = choices[0] - if not getattr(choice, "finish_reason", None): - return - try: - usage_chunk = await self._generator.__anext__() - self._streamed_chunks[0].insert(0, usage_chunk) - except (StopAsyncIteration, GeneratorExit): - return def _loop_handler(chunk, streamed_chunks): """Appends the chunk to the correct index in the streamed_chunks list. diff --git a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage.yaml b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage.yaml new file mode 100644 index 00000000000..7f5f315dbd0 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage.yaml @@ -0,0 +1,190 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":false}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '144' + content-type: + - application/json + cookie: + - _cfuvid=XCe0nOmJMT2AZm.ZO6Nwin7Mu3h3tYIlybA60Pa1myk-1742476858288-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + Not"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + much"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + just"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + here"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + chat"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + and"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + with"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + anything"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + need"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + are"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + doing"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 926fe2d998864ce4-BOS + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Thu, 27 Mar 2025 15:22:57 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=CKfpgOZbNgeO_hZnZwDsP9MQL771OL.QGPQvL7sPRLM-1743088977-1.0.1.1-_AOAMiv0VN3eR0.0l1ZyAhvT8I.sKfG.FnBMJqIAMVU5fFpO4aETM8QMsSGgjjx2dyoOnQ9sOSa6vt2WO_I8dLE2qo4dNe7VwOTDw21Ujrw; + path=/; expires=Thu, 27-Mar-25 15:52:57 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=D6vyj85I9udz_8Fd3dvOGdjJWNUTz5W_P_XpI71JrJw-1743088977188-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '178' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999994' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_601c6b7020f2cb6a2bb1fbd6d195dabc + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage.yaml new file mode 100644 index 00000000000..3b4b44429c0 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage.yaml @@ -0,0 +1,242 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":false}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '144' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + much"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + Not"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + just"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + much"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + here"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":","},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + ready"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + just"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + here"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + chat"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + with"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + help"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]} + + + data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}]} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 926fe2dd98378f69-BOS + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Thu, 27 Mar 2025 15:22:57 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=tMtTAMSYbzl6Mz.ZmEx.t97SoHOEXO_PAGvnES4TErc-1743088977-1.0.1.1-9HXDCBRrHw.0632QNaKGFswPnd4Q7Gcf7tPaifQTEHGv.NOLfayXgXIeHlotH7TAOqyxUdp.KNZ2w43w08vOKnwATLE4VdXkeKJ05zjIvV8; + path=/; expires=Thu, 27-Mar-25 15:52:57 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=fueXgERk0DK.0YI2CrP74Rvo77MpY9vRD4SXAqqK4S4-1743088977877-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '170' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999993' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_eb315d75e206dc7de5d075296b7b9b6c + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index 9f88e2f5921..d12810564c4 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -1,5 +1,5 @@ import os -from typing import Generator +import mock import pytest from ddtrace.contrib.internal.litellm.patch import patch @@ -11,6 +11,7 @@ from tests.utils import override_env from tests.utils import override_global_config from tests.contrib.litellm.utils import get_request_vcr +from ddtrace.llmobs import LLMObs def default_global_config(): @@ -27,6 +28,17 @@ def ddtrace_config_litellm(): return {} +@pytest.fixture() +def mock_llmobs_writer(): + patcher = mock.patch("ddtrace.llmobs._llmobs.LLMObsSpanWriter") + try: + LLMObsSpanWriterMock = patcher.start() + m = mock.MagicMock() + LLMObsSpanWriterMock.return_value = m + yield m + finally: + patcher.stop() + @pytest.fixture def litellm(ddtrace_global_config, ddtrace_config_litellm): global_config = default_global_config() @@ -44,15 +56,22 @@ def litellm(ddtrace_global_config, ddtrace_config_litellm): yield litellm unpatch() - @pytest.fixture -def mock_tracer(litellm): +def mock_tracer(litellm, ddtrace_global_config): pin = Pin.get_from(litellm) mock_tracer = DummyTracer(writer=DummyWriter(trace_flush_enabled=False)) pin._override(litellm, tracer=mock_tracer) pin.tracer._configure() + + if ddtrace_global_config.get("_llmobs_enabled", False): + # Have to disable and re-enable LLMObs to use to mock tracer. + LLMObs.disable() + LLMObs.enable(_tracer=mock_tracer, integrations_enabled=False) + yield mock_tracer + LLMObs.disable() + @pytest.fixture def request_vcr(): diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py index 6cb65fd69f6..570c2d23251 100644 --- a/tests/contrib/litellm/test_litellm.py +++ b/tests/contrib/litellm/test_litellm.py @@ -33,12 +33,15 @@ def test_global_tags(ddtrace_config_litellm, litellm, request_vcr, mock_tracer): def test_litellm_completion(litellm, request_vcr, stream, n): with request_vcr.use_cassette(get_cassette_name(stream, n)): messages = [{"content": "Hey, what is up?", "role": "user"}] - litellm.completion( + resp =litellm.completion( model="gpt-3.5-turbo", messages=messages, stream=stream, n=n, ) + if stream: + for _ in resp: + pass @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @@ -46,36 +49,48 @@ def test_litellm_completion(litellm, request_vcr, stream, n): async def test_litellm_acompletion(litellm, request_vcr, stream, n): with request_vcr.use_cassette(get_cassette_name(stream, n)): messages = [{"content": "Hey, what is up?", "role": "user"}] - await litellm.acompletion( + resp = await litellm.acompletion( model="gpt-3.5-turbo", messages=messages, stream=stream, n=n, ) + if stream: + async for _ in resp: + pass + + + @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) def test_litellm_text_completion(litellm, request_vcr, stream, n): with request_vcr.use_cassette(get_cassette_name(stream, n)): - litellm.text_completion( + resp = litellm.text_completion( model="gpt-3.5-turbo", prompt="Hello world", stream=stream, n=n, ) + if stream: + for _ in resp: + pass @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) async def test_litellm_atext_completion(litellm, request_vcr, stream, n): with request_vcr.use_cassette(get_cassette_name(stream, n)): - await litellm.atext_completion( + resp = await litellm.atext_completion( model="gpt-3.5-turbo", prompt="Hello world", stream=stream, n=n, ) + if stream: + async for _ in resp: + pass @pytest.mark.parametrize("model", ["vertex_ai/gemini-pro", "anthropic/claude-3-5-sonnet-20240620"]) diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py new file mode 100644 index 00000000000..811b6343d5e --- /dev/null +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -0,0 +1,47 @@ +import pytest + +from tests.contrib.litellm.utils import get_cassette_name, consume_stream +from tests.llmobs._utils import _expected_llmobs_llm_span_event + + +@pytest.mark.parametrize( + "ddtrace_global_config", [dict(_llmobs_enabled=True, _llmobs_sample_rate=1.0, _llmobs_ml_app="")] +) +class TestLLMObsLiteLLM: + # TODO: need to behind the scenes extract token usage from the stream so that it can still be reported even if not returned in the response + @pytest.mark.parametrize("stream,n,include_usage", [(True, 1, True), (True, 2, True), (False, 1, True), (False, 2, True), (True, 1, False), (True, 2, False), (False, 1, False), (False, 2, False)]) + def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): + with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + messages = [{"content": "Hey, what is up?", "role": "user"}] + resp = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + stream=stream, + n=n, + stream_options={"include_usage": include_usage}, + ) + if stream: + output_messages, token_metrics = consume_stream(resp, n) + else: + output_messages = [{"content": message.message.content, "role": message.message.role} for message in resp.choices] + token_metrics = { + "input_tokens": resp.usage.prompt_tokens, + "output_tokens": resp.usage.completion_tokens, + "total_tokens": resp.usage.total_tokens, + } + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + mock_llmobs_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=messages, + output_messages=output_messages, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) + ) + + diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py index 62eb8fe6334..55684b9f602 100644 --- a/tests/contrib/litellm/utils.py +++ b/tests/contrib/litellm/utils.py @@ -21,7 +21,33 @@ def get_request_vcr(): # Get the name of the cassette to use for a given test # All LiteLLM requests that use Open AI get routed to the chat completions endpoint, # so we can reuse the same cassette for each combination of stream and n -def get_cassette_name(stream, n): +def get_cassette_name(stream, n, include_usage=True): stream_suffix = "_stream" if stream else "" choice_suffix = "_multiple_choices" if n > 1 else "" - return "completion" + stream_suffix + choice_suffix + CASETTE_EXTENSION + # include_usage only affects streamed responses + if stream and not include_usage: + usage_suffix = "_exclude_usage" + else: + usage_suffix = "" + return "completion" + stream_suffix + choice_suffix + usage_suffix + CASETTE_EXTENSION + + +def consume_stream(resp, n): + output_messages = [{"content": "", "role": ""} for _ in range(n)] + token_metrics = {} + role = None + for chunk in resp: + for choice in chunk["choices"]: + content = choice["delta"]["content"] or "" + output_messages[choice.index]["content"] += content + if not output_messages[choice.index]["role"]: + role = choice["delta"]["role"] or role + output_messages[choice.index]["role"] = role + + if "usage" in chunk: + token_metrics = { + "input_tokens": chunk["usage"]["prompt_tokens"], + "output_tokens": chunk["usage"]["completion_tokens"], + "total_tokens": chunk["usage"]["total_tokens"], + } + return output_messages, token_metrics From 60830c4ed05427efd752a67fc97e19d8b75a89a6 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Fri, 28 Mar 2025 11:06:47 -0400 Subject: [PATCH 26/61] add more tests --- tests/contrib/litellm/test_litellm_llmobs.py | 154 +++++++++++++++++-- tests/contrib/litellm/utils.py | 55 +++++-- 2 files changed, 189 insertions(+), 20 deletions(-) diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 811b6343d5e..e64c5975c28 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -1,6 +1,6 @@ import pytest -from tests.contrib.litellm.utils import get_cassette_name, consume_stream +from tests.contrib.litellm.utils import async_consume_stream, get_cassette_name, consume_stream, parse_response from tests.llmobs._utils import _expected_llmobs_llm_span_event @@ -8,8 +8,19 @@ "ddtrace_global_config", [dict(_llmobs_enabled=True, _llmobs_sample_rate=1.0, _llmobs_ml_app="")] ) class TestLLMObsLiteLLM: - # TODO: need to behind the scenes extract token usage from the stream so that it can still be reported even if not returned in the response - @pytest.mark.parametrize("stream,n,include_usage", [(True, 1, True), (True, 2, True), (False, 1, True), (False, 2, True), (True, 1, False), (True, 2, False), (False, 1, False), (False, 2, False)]) + @pytest.mark.parametrize( + "stream,n,include_usage", + [ + (True, 1, True), + (True, 2, True), + (False, 1, True), + (False, 2, True), + (True, 1, False), + (True, 2, False), + (False, 1, False), + (False, 2, False), + ], + ) def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): messages = [{"content": "Hey, what is up?", "role": "user"}] @@ -23,12 +34,51 @@ def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, if stream: output_messages, token_metrics = consume_stream(resp, n) else: - output_messages = [{"content": message.message.content, "role": message.message.role} for message in resp.choices] - token_metrics = { - "input_tokens": resp.usage.prompt_tokens, - "output_tokens": resp.usage.completion_tokens, - "total_tokens": resp.usage.total_tokens, - } + output_messages, token_metrics = parse_response(resp) + + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + mock_llmobs_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=messages, + output_messages=output_messages, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) + ) + + @pytest.mark.parametrize( + "stream,n,include_usage", + [ + (True, 1, True), + (True, 2, True), + (False, 1, True), + (False, 2, True), + (True, 1, False), + (True, 2, False), + (False, 1, False), + (False, 2, False), + ], + ) + async def test_acompletion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): + with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + messages = [{"content": "Hey, what is up?", "role": "user"}] + resp = await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + stream=stream, + n=n, + stream_options={"include_usage": include_usage}, + ) + if stream: + output_messages, token_metrics = await async_consume_stream(resp, n) + else: + output_messages, token_metrics = parse_response(resp) + span = mock_tracer.pop_traces()[0][0] assert mock_llmobs_writer.enqueue.call_count == 1 mock_llmobs_writer.enqueue.assert_called_with( @@ -44,4 +94,90 @@ def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, ) ) + @pytest.mark.parametrize( + "stream,n,include_usage", + [ + (True, 1, True), + (True, 2, True), + (False, 1, True), + (False, 2, True), + (True, 1, False), + (True, 2, False), + (False, 1, False), + (False, 2, False), + ], + ) + def test_text_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): + with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + prompt = "Hey, what is up?" + resp = litellm.text_completion( + model="gpt-3.5-turbo", + prompt=prompt, + stream=stream, + n=n, + stream_options={"include_usage": include_usage}, + ) + if stream: + output_messages, token_metrics = consume_stream(resp, n, is_completion=True) + else: + output_messages, token_metrics = parse_response(resp, is_completion=True) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + mock_llmobs_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=[{"content": prompt}], + output_messages=output_messages, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) + ) + + @pytest.mark.parametrize( + "stream,n,include_usage", + [ + (True, 1, True), + (True, 2, True), + (False, 1, True), + (False, 2, True), + (True, 1, False), + (True, 2, False), + (False, 1, False), + (False, 2, False), + ], + ) + async def test_atext_completion( + self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage + ): + with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + prompt = "Hey, what is up?" + resp = await litellm.atext_completion( + model="gpt-3.5-turbo", + prompt=prompt, + stream=stream, + n=n, + stream_options={"include_usage": include_usage}, + ) + if stream: + output_messages, token_metrics = await async_consume_stream(resp, n, is_completion=True) + else: + output_messages, token_metrics = parse_response(resp, is_completion=True) + + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + mock_llmobs_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=[{"content": prompt}], + output_messages=output_messages, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) + ) diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py index 55684b9f602..508d6a1fd3e 100644 --- a/tests/contrib/litellm/utils.py +++ b/tests/contrib/litellm/utils.py @@ -32,22 +32,55 @@ def get_cassette_name(stream, n, include_usage=True): return "completion" + stream_suffix + choice_suffix + usage_suffix + CASETTE_EXTENSION -def consume_stream(resp, n): - output_messages = [{"content": "", "role": ""} for _ in range(n)] +def consume_stream(resp, n, is_completion=False): + output_messages = [{"content": ""} for _ in range(n)] token_metrics = {} role = None for chunk in resp: - for choice in chunk["choices"]: - content = choice["delta"]["content"] or "" - output_messages[choice.index]["content"] += content - if not output_messages[choice.index]["role"]: - role = choice["delta"]["role"] or role - output_messages[choice.index]["role"] = role - - if "usage" in chunk: - token_metrics = { + role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion) + return output_messages, token_metrics + + +async def async_consume_stream(resp, n, is_completion=False): + output_messages = [{"content": ""} for _ in range(n)] + token_metrics = {} + role = None + async for chunk in resp: + role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion) + return output_messages, token_metrics + + +def extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion=False): + for choice in chunk["choices"]: + content = choice["text"] if is_completion else choice["delta"]["content"] + content = content or "" + output_messages[choice.index]["content"] += content + if "role" not in output_messages[choice.index] and (choice.get("delta", {}).get("role") or role): + role = choice.get("delta", {}).get("role") or role + output_messages[choice.index]["role"] = role + + if "usage" in chunk and chunk["usage"]: + token_metrics.update( + { "input_tokens": chunk["usage"]["prompt_tokens"], "output_tokens": chunk["usage"]["completion_tokens"], "total_tokens": chunk["usage"]["total_tokens"], } + ) + + return role + + +def parse_response(resp, is_completion=False): + output_messages = [] + for choice in resp.choices: + message = {"content": choice.text if is_completion else choice.message.content} + if choice.get("role", None) or choice.get("message", {}).get("role", None): + message["role"] = choice["role"] if is_completion else choice["message"]["role"] + output_messages.append(message) + token_metrics = { + "input_tokens": resp.usage.prompt_tokens, + "output_tokens": resp.usage.completion_tokens, + "total_tokens": resp.usage.total_tokens, + } return output_messages, token_metrics From f4d76d74e7f7a3f3a33799b86afb1a621b39a685 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Fri, 28 Mar 2025 11:20:40 -0400 Subject: [PATCH 27/61] run black --- ddtrace/contrib/_litellm.py | 2 +- ddtrace/contrib/internal/litellm/patch.py | 19 +++++++++++-------- ddtrace/contrib/internal/litellm/utils.py | 21 +++++++++++++++++---- ddtrace/contrib/internal/openai/utils.py | 14 +++++++++++--- ddtrace/llmobs/_integrations/litellm.py | 17 ++++++++++++----- ddtrace/llmobs/_integrations/openai.py | 6 +++++- ddtrace/llmobs/_integrations/utils.py | 16 ++++++++++++---- tests/contrib/litellm/conftest.py | 2 ++ tests/contrib/litellm/test_litellm.py | 5 +---- 9 files changed, 72 insertions(+), 30 deletions(-) diff --git a/ddtrace/contrib/_litellm.py b/ddtrace/contrib/_litellm.py index 026d11250ca..70d4038a173 100644 --- a/ddtrace/contrib/_litellm.py +++ b/ddtrace/contrib/_litellm.py @@ -44,4 +44,4 @@ from ddtrace import Pin, config Pin.override(litellm, service="my-litellm-service") -""" # noqa: E501 \ No newline at end of file +""" # noqa: E501 diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index c17bbcdd447..74515b8e829 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -50,14 +50,17 @@ def _create_span(litellm, pin, func, instance, args, kwargs): def traced_completion(litellm, pin, func, instance, args, kwargs): return _traced_completion(litellm, pin, func, instance, args, kwargs, False) + @with_traced_module async def traced_acompletion(litellm, pin, func, instance, args, kwargs): return await _traced_acompletion(litellm, pin, func, instance, args, kwargs, False) + @with_traced_module def traced_text_completion(litellm, pin, func, instance, args, kwargs): return _traced_completion(litellm, pin, func, instance, args, kwargs, True) + @with_traced_module async def traced_atext_completion(litellm, pin, func, instance, args, kwargs): return await _traced_acompletion(litellm, pin, func, instance, args, kwargs, True) @@ -72,9 +75,7 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion try: resp = func(*args, **kwargs) if stream: - return TracedLiteLLMStream( - resp, integration, span, args, kwargs, is_completion - ) + return TracedLiteLLMStream(resp, integration, span, args, kwargs, is_completion) return resp except Exception: span.set_exc_info(*sys.exc_info()) @@ -83,7 +84,9 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion # streamed spans will be finished separately once the stream generator is exhausted if span.error or not stream: if integration.is_pc_sampled_llmobs(span): - integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat") + integration.llmobs_set_tags( + span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat" + ) span.finish() @@ -96,9 +99,7 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com try: resp = await func(*args, **kwargs) if stream: - return TracedLiteLLMAsyncStream( - resp, integration, span, args, kwargs, is_completion - ) + return TracedLiteLLMAsyncStream(resp, integration, span, args, kwargs, is_completion) return resp except Exception: span.set_exc_info(*sys.exc_info()) @@ -107,7 +108,9 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com # streamed spans will be finished separately once the stream generator is exhausted if span.error or not stream: if integration.is_pc_sampled_llmobs(span): - integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat") + integration.llmobs_set_tags( + span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat" + ) span.finish() diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index e854f814aa9..37fdcf01b55 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -4,14 +4,19 @@ from typing import List from ddtrace.internal.logger import get_logger -from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks, openai_construct_message_from_streamed_chunks +from ddtrace.llmobs._integrations.utils import ( + openai_construct_completion_from_streamed_chunks, + openai_construct_message_from_streamed_chunks, +) log = get_logger(__name__) + def tag_request(span, kwargs): if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]: span.set_tag_str("litellm.request.host", kwargs["metadata"]["headers"]["host"]) + class BaseTracedLiteLLMStream: def __init__(self, generator, integration, span, args, kwargs, is_completion=False): n = kwargs.get("n", 1) or 1 @@ -49,6 +54,7 @@ def __iter__(self): ) self._dd_span.finish() + class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream): async def __aenter__(self): await self._generator.__aenter__() @@ -74,6 +80,7 @@ async def __aiter__(self): ) self._dd_span.finish() + def _loop_handler(chunk, streamed_chunks): """Appends the chunk to the correct index in the streamed_chunks list. @@ -88,11 +95,17 @@ def _loop_handler(chunk, streamed_chunks): def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_completion=False): try: if is_completion: - formatted_completions = [openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks] + formatted_completions = [ + openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks + ] else: - formatted_completions = [openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks] + formatted_completions = [ + openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks + ] operation = "completion" if is_completion else "chat" if integration.is_pc_sampled_llmobs(span): - integration.llmobs_set_tags(span, args=[], kwargs=kwargs, response=formatted_completions, operation=operation) + integration.llmobs_set_tags( + span, args=[], kwargs=kwargs, response=formatted_completions, operation=operation + ) except Exception: log.warning("Error processing streamed completion/chat response.", exc_info=True) diff --git a/ddtrace/contrib/internal/openai/utils.py b/ddtrace/contrib/internal/openai/utils.py index 5f4f227054e..ddf0eb37b49 100644 --- a/ddtrace/contrib/internal/openai/utils.py +++ b/ddtrace/contrib/internal/openai/utils.py @@ -6,7 +6,10 @@ from typing import Generator from typing import List -from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks, openai_construct_message_from_streamed_chunks +from ddtrace.llmobs._integrations.utils import ( + openai_construct_completion_from_streamed_chunks, + openai_construct_message_from_streamed_chunks, +) import wrapt from ddtrace.internal.logger import get_logger @@ -266,9 +269,13 @@ def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_comp request_messages = kwargs.get("messages", None) try: if is_completion: - formatted_completions = [openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks] + formatted_completions = [ + openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks + ] else: - formatted_completions = [openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks] + formatted_completions = [ + openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks + ] if integration.is_pc_sampled_span(span): _tag_streamed_response(integration, span, formatted_completions) _set_token_metrics(span, formatted_completions, prompts, request_messages, kwargs) @@ -277,6 +284,7 @@ def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_comp except Exception: log.warning("Error processing streamed completion/chat response.", exc_info=True) + def _tag_streamed_response(integration, span, completions_or_messages=None): """Tagging logic for streamed completions and chat completions.""" for idx, choice in enumerate(completions_or_messages): diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index 024720313f2..c15c7836e4a 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -3,11 +3,20 @@ from typing import List from typing import Optional -from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY, METRICS, OUTPUT_TOKENS_METRIC_KEY, TOTAL_TOKENS_METRIC_KEY +from ddtrace.llmobs._constants import ( + INPUT_TOKENS_METRIC_KEY, + METRICS, + OUTPUT_TOKENS_METRIC_KEY, + TOTAL_TOKENS_METRIC_KEY, +) from ddtrace.llmobs._constants import MODEL_NAME from ddtrace.llmobs._constants import MODEL_PROVIDER from ddtrace.llmobs._constants import SPAN_KIND -from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags, openai_set_meta_tags_from_chat, openai_set_meta_tags_from_completion +from ddtrace.llmobs._integrations.utils import ( + get_llmobs_metrics_tags, + openai_set_meta_tags_from_chat, + openai_set_meta_tags_from_completion, +) from ddtrace.llmobs._utils import _get_attr from ddtrace.trace import Span from ddtrace.llmobs._integrations.base import BaseLLMIntegration @@ -18,9 +27,7 @@ class LiteLLMIntegration(BaseLLMIntegration): # maps requested model name to parsed model name and provider _model_map = {} - def _set_base_span_tags( - self, span: Span, model: Optional[str] = None, **kwargs: Dict[str, Any] - ) -> None: + def _set_base_span_tags(self, span: Span, model: Optional[str] = None, **kwargs: Dict[str, Any]) -> None: if model is not None: span.set_tag_str("litellm.request.model", model) diff --git a/ddtrace/llmobs/_integrations/openai.py b/ddtrace/llmobs/_integrations/openai.py index b0ae3fc8e17..6c8464bf2a3 100644 --- a/ddtrace/llmobs/_integrations/openai.py +++ b/ddtrace/llmobs/_integrations/openai.py @@ -20,7 +20,11 @@ from ddtrace.llmobs._constants import SPAN_KIND from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY from ddtrace.llmobs._integrations.base import BaseLLMIntegration -from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags, openai_set_meta_tags_from_chat, openai_set_meta_tags_from_completion +from ddtrace.llmobs._integrations.utils import ( + get_llmobs_metrics_tags, + openai_set_meta_tags_from_chat, + openai_set_meta_tags_from_completion, +) from ddtrace.llmobs._integrations.utils import is_openai_default_base_url from ddtrace.llmobs._utils import _get_attr from ddtrace.llmobs.utils import Document diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index 855b0a829aa..cb7e022f802 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -279,7 +279,9 @@ def openai_set_meta_tags_from_completion(span: Span, kwargs: Dict[str, Any], com prompt = kwargs.get("prompt", "") if isinstance(prompt, str): prompt = [prompt] - parameters = {k: v for k, v in kwargs.items() if k not in ("model", "prompt", "api_key", "user_api_key", "user_api_key_hash")} + parameters = { + k: v for k, v in kwargs.items() if k not in ("model", "prompt", "api_key", "user_api_key", "user_api_key_hash") + } output_messages = [{"content": ""}] if not span.error and completions: choices = getattr(completions, "choices", completions) @@ -292,23 +294,28 @@ def openai_set_meta_tags_from_completion(span: Span, kwargs: Dict[str, Any], com } ) + def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: Optional[Any]) -> None: """Extract prompt/response tags from a chat completion and set them as temporary "_ml_obs.meta.*" tags.""" input_messages = [] for m in kwargs.get("messages", []): input_messages.append({"content": str(_get_attr(m, "content", "")), "role": str(_get_attr(m, "role", ""))}) - parameters = {k: v for k, v in kwargs.items() if k not in ("model", "messages", "tools", "functions", "api_key", "user_api_key", "user_api_key_hash")} + parameters = { + k: v + for k, v in kwargs.items() + if k not in ("model", "messages", "tools", "functions", "api_key", "user_api_key", "user_api_key_hash") + } span._set_ctx_items({INPUT_MESSAGES: input_messages, METADATA: parameters}) if span.error or not messages: span._set_ctx_item(OUTPUT_MESSAGES, [{"content": ""}]) return if isinstance(messages, list): # streamed response - role = "" + role = "" output_messages = [] for streamed_message in messages: # litellm roles appear only on the first choice, so store it to be used for all choices - role = streamed_message.get("role", "") or role + role = streamed_message.get("role", "") or role message = {"content": streamed_message.get("content", ""), "role": role} tool_calls = streamed_message.get("tool_calls", []) if tool_calls: @@ -353,6 +360,7 @@ def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: output_messages.append({"content": content, "role": role}) span._set_ctx_item(OUTPUT_MESSAGES, output_messages) + def openai_construct_completion_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]: """Constructs a completion dictionary of form {"text": "...", "finish_reason": "..."} from streamed chunks.""" if not streamed_chunks: diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index d12810564c4..9bc9e4fa887 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -39,6 +39,7 @@ def mock_llmobs_writer(): finally: patcher.stop() + @pytest.fixture def litellm(ddtrace_global_config, ddtrace_config_litellm): global_config = default_global_config() @@ -56,6 +57,7 @@ def litellm(ddtrace_global_config, ddtrace_config_litellm): yield litellm unpatch() + @pytest.fixture def mock_tracer(litellm, ddtrace_global_config): pin = Pin.get_from(litellm) diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py index 570c2d23251..a6c94383811 100644 --- a/tests/contrib/litellm/test_litellm.py +++ b/tests/contrib/litellm/test_litellm.py @@ -33,7 +33,7 @@ def test_global_tags(ddtrace_config_litellm, litellm, request_vcr, mock_tracer): def test_litellm_completion(litellm, request_vcr, stream, n): with request_vcr.use_cassette(get_cassette_name(stream, n)): messages = [{"content": "Hey, what is up?", "role": "user"}] - resp =litellm.completion( + resp = litellm.completion( model="gpt-3.5-turbo", messages=messages, stream=stream, @@ -60,9 +60,6 @@ async def test_litellm_acompletion(litellm, request_vcr, stream, n): pass - - - @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]) def test_litellm_text_completion(litellm, request_vcr, stream, n): From a4a2b24a70b3ad816f80a0a7b0f3f7966e36284c Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Sun, 30 Mar 2025 18:32:41 -0400 Subject: [PATCH 28/61] add tool call tests --- .riot/requirements/45c12de.txt | 25 +-- .riot/requirements/e05a904.txt | 25 +-- .riot/requirements/e8c8851.txt | 25 +-- .riot/requirements/f30dfc2.txt | 25 +-- ...ompletion_multiple_choices_with_tools.yaml | 107 ++++++++++ ...etion_stream_exclude_usage_with_tools.yaml | 130 +++++++++++++ ...iple_choices_exclude_usage_with_tools.yaml | 162 ++++++++++++++++ ...on_stream_multiple_choices_with_tools.yaml | 183 ++++++++++++++++++ .../completion_stream_with_tools.yaml | 151 +++++++++++++++ .../cassettes/completion_with_tools.yaml | 110 +++++++++++ tests/contrib/litellm/test_litellm_llmobs.py | 103 +++++----- tests/contrib/litellm/utils.py | 74 ++++++- 12 files changed, 1011 insertions(+), 109 deletions(-) create mode 100644 tests/contrib/litellm/cassettes/completion_multiple_choices_with_tools.yaml create mode 100644 tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml create mode 100644 tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml create mode 100644 tests/contrib/litellm/cassettes/completion_stream_multiple_choices_with_tools.yaml create mode 100644 tests/contrib/litellm/cassettes/completion_stream_with_tools.yaml create mode 100644 tests/contrib/litellm/cassettes/completion_with_tools.yaml diff --git a/.riot/requirements/45c12de.txt b/.riot/requirements/45c12de.txt index c2da32fca1e..3f0c6ae7839 100644 --- a/.riot/requirements/45c12de.txt +++ b/.riot/requirements/45c12de.txt @@ -10,8 +10,8 @@ aiosignal==1.3.2 annotated-types==0.7.0 anyio==4.9.0 attrs==25.3.0 -boto3==1.37.17 -botocore==1.37.17 +boto3==1.37.22 +botocore==1.37.22 cachetools==5.5.2 certifi==2025.1.31 charset-normalizer==3.4.1 @@ -35,30 +35,30 @@ jiter==0.9.0 jmespath==1.0.1 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 -litellm==1.63.12 +litellm==1.64.1 markupsafe==3.0.2 mock==5.2.0 multidict==6.2.0 -openai==1.68.2 +openai==1.69.0 opentracing==2.4.0 packaging==24.2 pluggy==1.5.0 -propcache==0.3.0 +propcache==0.3.1 pyasn1==0.6.1 -pyasn1-modules==0.4.1 -pydantic==2.10.6 -pydantic-core==2.27.2 +pyasn1-modules==0.4.2 +pydantic==2.11.0 +pydantic-core==2.33.0 pytest==8.3.5 -pytest-asyncio==0.25.3 +pytest-asyncio==0.26.0 pytest-cov==6.0.0 pytest-mock==3.14.0 python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 +python-dotenv==1.1.0 pyyaml==6.0.2 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 -rpds-py==0.23.1 +rpds-py==0.24.0 rsa==4.9 s3transfer==0.11.4 six==1.17.0 @@ -67,7 +67,8 @@ sortedcontainers==2.4.0 tiktoken==0.9.0 tokenizers==0.21.1 tqdm==4.67.1 -typing-extensions==4.12.2 +typing-extensions==4.13.0 +typing-inspection==0.4.0 urllib3==2.3.0 vcrpy==7.0.0 wrapt==1.17.2 diff --git a/.riot/requirements/e05a904.txt b/.riot/requirements/e05a904.txt index 48afc8fdf4a..e46108467ec 100644 --- a/.riot/requirements/e05a904.txt +++ b/.riot/requirements/e05a904.txt @@ -11,8 +11,8 @@ annotated-types==0.7.0 anyio==4.9.0 async-timeout==5.0.1 attrs==25.3.0 -boto3==1.37.17 -botocore==1.37.17 +boto3==1.37.22 +botocore==1.37.22 cachetools==5.5.2 certifi==2025.1.31 charset-normalizer==3.4.1 @@ -37,30 +37,30 @@ jiter==0.9.0 jmespath==1.0.1 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 -litellm==1.63.12 +litellm==1.64.1 markupsafe==3.0.2 mock==5.2.0 multidict==6.2.0 -openai==1.68.2 +openai==1.69.0 opentracing==2.4.0 packaging==24.2 pluggy==1.5.0 -propcache==0.3.0 +propcache==0.3.1 pyasn1==0.6.1 -pyasn1-modules==0.4.1 -pydantic==2.10.6 -pydantic-core==2.27.2 +pyasn1-modules==0.4.2 +pydantic==2.11.0 +pydantic-core==2.33.0 pytest==8.3.5 -pytest-asyncio==0.25.3 +pytest-asyncio==0.26.0 pytest-cov==6.0.0 pytest-mock==3.14.0 python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 +python-dotenv==1.1.0 pyyaml==6.0.2 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 -rpds-py==0.23.1 +rpds-py==0.24.0 rsa==4.9 s3transfer==0.11.4 six==1.17.0 @@ -70,7 +70,8 @@ tiktoken==0.9.0 tokenizers==0.21.1 tomli==2.2.1 tqdm==4.67.1 -typing-extensions==4.12.2 +typing-extensions==4.13.0 +typing-inspection==0.4.0 urllib3==2.3.0 vcrpy==7.0.0 wrapt==1.17.2 diff --git a/.riot/requirements/e8c8851.txt b/.riot/requirements/e8c8851.txt index a209020993c..55dd71876c7 100644 --- a/.riot/requirements/e8c8851.txt +++ b/.riot/requirements/e8c8851.txt @@ -10,8 +10,8 @@ aiosignal==1.3.2 annotated-types==0.7.0 anyio==4.9.0 attrs==25.3.0 -boto3==1.37.17 -botocore==1.37.17 +boto3==1.37.22 +botocore==1.37.22 cachetools==5.5.2 certifi==2025.1.31 charset-normalizer==3.4.1 @@ -35,30 +35,30 @@ jiter==0.9.0 jmespath==1.0.1 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 -litellm==1.63.12 +litellm==1.64.1 markupsafe==3.0.2 mock==5.2.0 multidict==6.2.0 -openai==1.68.2 +openai==1.69.0 opentracing==2.4.0 packaging==24.2 pluggy==1.5.0 -propcache==0.3.0 +propcache==0.3.1 pyasn1==0.6.1 -pyasn1-modules==0.4.1 -pydantic==2.10.6 -pydantic-core==2.27.2 +pyasn1-modules==0.4.2 +pydantic==2.11.0 +pydantic-core==2.33.0 pytest==8.3.5 -pytest-asyncio==0.25.3 +pytest-asyncio==0.26.0 pytest-cov==6.0.0 pytest-mock==3.14.0 python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 +python-dotenv==1.1.0 pyyaml==6.0.2 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 -rpds-py==0.23.1 +rpds-py==0.24.0 rsa==4.9 s3transfer==0.11.4 six==1.17.0 @@ -67,7 +67,8 @@ sortedcontainers==2.4.0 tiktoken==0.9.0 tokenizers==0.21.1 tqdm==4.67.1 -typing-extensions==4.12.2 +typing-extensions==4.13.0 +typing-inspection==0.4.0 urllib3==2.3.0 vcrpy==7.0.0 wrapt==1.17.2 diff --git a/.riot/requirements/f30dfc2.txt b/.riot/requirements/f30dfc2.txt index c9092e0225c..0f57ac61e7a 100644 --- a/.riot/requirements/f30dfc2.txt +++ b/.riot/requirements/f30dfc2.txt @@ -11,8 +11,8 @@ annotated-types==0.7.0 anyio==4.9.0 async-timeout==5.0.1 attrs==25.3.0 -boto3==1.37.17 -botocore==1.37.17 +boto3==1.37.22 +botocore==1.37.22 cachetools==5.5.2 certifi==2025.1.31 charset-normalizer==3.4.1 @@ -37,30 +37,30 @@ jiter==0.9.0 jmespath==1.0.1 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 -litellm==1.63.12 +litellm==1.64.1 markupsafe==3.0.2 mock==5.2.0 multidict==6.2.0 -openai==1.68.2 +openai==1.69.0 opentracing==2.4.0 packaging==24.2 pluggy==1.5.0 -propcache==0.3.0 +propcache==0.3.1 pyasn1==0.6.1 -pyasn1-modules==0.4.1 -pydantic==2.10.6 -pydantic-core==2.27.2 +pyasn1-modules==0.4.2 +pydantic==2.11.0 +pydantic-core==2.33.0 pytest==8.3.5 -pytest-asyncio==0.25.3 +pytest-asyncio==0.26.0 pytest-cov==6.0.0 pytest-mock==3.14.0 python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 +python-dotenv==1.1.0 pyyaml==6.0.2 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 -rpds-py==0.23.1 +rpds-py==0.24.0 rsa==4.9 s3transfer==0.11.4 six==1.17.0 @@ -70,7 +70,8 @@ tiktoken==0.9.0 tokenizers==0.21.1 tomli==2.2.1 tqdm==4.67.1 -typing-extensions==4.12.2 +typing-extensions==4.13.0 +typing-inspection==0.4.0 urllib3==1.26.20 vcrpy==7.0.0 wrapt==1.17.2 diff --git a/tests/contrib/litellm/cassettes/completion_multiple_choices_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_multiple_choices_with_tools.yaml new file mode 100644 index 00000000000..ebbbb24714e --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_multiple_choices_with_tools.yaml @@ -0,0 +1,107 @@ +interactions: +- request: + body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '487' + content-type: + - application/json + cookie: + - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A; + _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//7FTLbtswELzrK4g924Fkx4/olhRJkNZ12iatkdaBQFNrmTVFEiTVNjD8 + 74UoV5IfBXrooYfqIBA73Nnl7JCbgBDgKcQE2Io6lmvRvbod3X/U4cVlev368eZ81FsjhvcXt0/X + w+kUOmWGWnxF5n5lnTGVa4GOK1nBzCB1WLJGo/N+NI76o9ADuUpRlGmZdt3+2aDrCrNQ3TDqDXaZ + K8UZWojJl4AQQjb+X/YoU/wBMfE8PpKjtTRDiOtNhIBRoowAtZZbR6WDTgMyJR3Ksm1ZCNECnFIi + YVSIpnD1bVrrRigqRPL+avbm3fhusFZTFn64fzsYDqOUPdhWvYr6RfuGloVktUAtvI7HB8UIAUlz + n5uhS1hhDEqXfEfqVmgOaAgBarIiR+nKI8BmDkIxWhLPIZ7DA5XkxlDJuGWqQ15dzmELewzb4NT6 + uSWSwWVhqThWj0qpnK/l5XveIdt6UkJl2qiFPUiFJZfcrhKD1HoB2nMIWhRHJoj+FRPcjacLrj+H + T3z4ic/EYzRDOplM/pvgb5og2LUAxd6oQRuVa5c4tUZfdDyoSKF5jhqwP96BTjkqmnjU63dO0CUp + Osq9FWr3McpWmDapzVNEi5SrFtA273E3p7ir43OZ/Ql9AzCG2mGaaIMpZ/snbrYZLF/r322rRfYN + g0XzjTNMHEdTjiPFJS1EdYXAvliHebLkMkOjDa/vUbANfgIAAP//AwBATheKSwYAAA== + headers: + CF-RAY: + - 9278b28d69eb3ba6-BOS + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 28 Mar 2025 17:02:51 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '540' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999985' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_7f2afa7681587d4ae31ca5d9d75824f0 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml new file mode 100644 index 00000000000..515680c5d04 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml @@ -0,0 +1,130 @@ +interactions: +- request: + body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":false},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '542' + content-type: + - application/json + cookie: + - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A; + _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_ty5BH4ChPTiw8GnzCSqhxhoP","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + Francisco"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + CA"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 9278b292789f3ba6-BOS + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Fri, 28 Mar 2025 17:02:51 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '281' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999985' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_78940dfd1e163cd37e49e666383b7944 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml new file mode 100644 index 00000000000..fda11e5011a --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml @@ -0,0 +1,162 @@ +interactions: +- request: + body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":false},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '542' + content-type: + - application/json + cookie: + - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A; + _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_bWoRSYt75lffFv8JkMI4uDdz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_bWoRSYt75lffFv8JkMI4uDdz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + Francisco"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + Francisco"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + CA"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + CA"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]} + + + data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 9278b2960dbd3ba6-BOS + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Fri, 28 Mar 2025 17:02:52 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '406' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999985' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_7e8b09694a1029b3eb2fecf93deef4a3 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_with_tools.yaml new file mode 100644 index 00000000000..a68aabafd1d --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_with_tools.yaml @@ -0,0 +1,183 @@ +interactions: +- request: + body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '541' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_ZLW6rwgBZAlsBPJv2F9O3zhz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_ZLW6rwgBZAlsBPJv2F9O3zhz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + Francisco"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + Francisco"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + CA"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + CA"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\",\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"unit"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"c"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"elsius"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null} + + + data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":85,"completion_tokens":43,"total_tokens":128,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 9278b28799743ba6-BOS + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Fri, 28 Mar 2025 17:02:50 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=E5.J70d433QZA9Yb..SoyuL46jC1xpIxn4pnxkMjOWc-1743181370-1.0.1.1-bQRVFg.zcyoLYbcsK6DabkiL3ZaPDY.X.mSq2T37uuxnG9X7_mV50crYaQ8tZJdqTZAxOMLe2RYv8mB5jn6GmqPhSgI41BBm4DMMl4lW8FY; + path=/; expires=Fri, 28-Mar-25 17:32:50 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=wXfAZSge17hyjNvaLx1PDXNyLNpOX59UJ.sov3vRs0U-1743181370249-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '586' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999985' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_e2c3786bb1e2c88f639d2f20e45a9e88 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_with_tools.yaml new file mode 100644 index 00000000000..a14ca18675e --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_with_tools.yaml @@ -0,0 +1,151 @@ +interactions: +- request: + body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":true},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '541' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_HuRWIamjJM7bLsbCamjSgf8e","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + Francisco"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + CA"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\",\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"unit"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"c"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"elsius"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null} + + + data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":85,"completion_tokens":24,"total_tokens":109,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 9278b28278948f69-BOS + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Fri, 28 Mar 2025 17:02:49 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=eBHeBQfyYm2koe.GVJdLen4F5mzqqi7jmZT_YvXZOLk-1743181369-1.0.1.1-LT_wEU5NDwWbrNU7lyULsUd_ptgtackPCBbB6I8i.4_taWWP57cHdMtWDz1rfhzKB9f_pKfJzxijQ_Z27_P6iLDT1hf4ioC2b0otZHD3c4o; + path=/; expires=Fri, 28-Mar-25 17:32:49 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=x0zurymIN4SzRny8DSr5RMnqvVD_AwW_LNnMmnuYuRg-1743181369158-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '295' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999986' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_941396c1f446305a0aed13f33a158719 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_with_tools.yaml new file mode 100644 index 00000000000..78f08f3a0af --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_with_tools.yaml @@ -0,0 +1,110 @@ +interactions: +- request: + body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get + the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '487' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFNNj9owEL3nV1hzhlUoIGhusOqu1FIJqQekllVk7CHx4tiRPaFdIf57 + FYdNAkul5hBZ8/zevPnwKWIMlISEgcg5iaLUw+Xz7Msi36xkWcnHldLrxXoZfzt8nW2mPxcwqBl2 + 94qC3lkPwhalRlLWNLBwyAlr1dFsMh7N49k4DkBhJeqalpU0HD9Mh1S5nR3Go0/TCzO3SqCHhP2K + GGPsFP61RyPxDyQs6IRIgd7zDCFpLzEGzuo6Atx75YkbgkEHCmsITW3bVFr3ALJWp4Jr3SVuvlPv + 3DWKa52K9fhwHMvv89flLBOb4zE+ztUkX/XyNdJvZTC0r4xoG9TD23hyk4wxMLwI3AwpFZVzaCj9 + jZxydDcyjAF3WVWgoboEOG1BW8Fr4S0kW/jBDXty3AjlhR2wx8UWznClcI7unV96TXK4rzzXH7vH + jbEUcoX2vVyQczspbbPS2Z2/ocJeGeXz1CH3oQH9OUTvRoIFqK5GDaWzRUkp2QOGpPNpIwrdJnbg + 6PMFJEtc9+LxZHBHLpVIXIVVaLdPcJGj7KjdFvJKKtsDol7pH93c027KVyb7H/kOEAJLQpmWDqUS + 1xV31xzWD/Vf19omB8Pg0R2VwJQUunocEve80s0TAv/mCYt0r0yGrnSqfUfROfoLAAD//wMASyVc + NkYEAAA= + headers: + CF-RAY: + - 9278a2ee88a28ff6-BOS + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 28 Mar 2025 16:52:11 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A; + path=/; expires=Fri, 28-Mar-25 17:22:11 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-4 + openai-processing-ms: + - '397' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999986' + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_f08846e9b273c1b121279f2a187948dc + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index e64c5975c28..c76b63075f2 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -1,26 +1,26 @@ import pytest -from tests.contrib.litellm.utils import async_consume_stream, get_cassette_name, consume_stream, parse_response +from tests.contrib.litellm.utils import async_consume_stream, get_cassette_name, consume_stream, parse_response, tools from tests.llmobs._utils import _expected_llmobs_llm_span_event @pytest.mark.parametrize( "ddtrace_global_config", [dict(_llmobs_enabled=True, _llmobs_sample_rate=1.0, _llmobs_ml_app="")] ) +@pytest.mark.parametrize( + "stream,n,include_usage", + [ + (True, 1, True), + (True, 2, True), + (False, 1, True), + (False, 2, True), + (True, 1, False), + (True, 2, False), + (False, 1, False), + (False, 2, False), + ], +) class TestLLMObsLiteLLM: - @pytest.mark.parametrize( - "stream,n,include_usage", - [ - (True, 1, True), - (True, 2, True), - (False, 1, True), - (False, 2, True), - (True, 1, False), - (True, 2, False), - (False, 1, False), - (False, 2, False), - ], - ) def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): messages = [{"content": "Hey, what is up?", "role": "user"}] @@ -51,19 +51,41 @@ def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, ) ) - @pytest.mark.parametrize( - "stream,n,include_usage", - [ - (True, 1, True), - (True, 2, True), - (False, 1, True), - (False, 2, True), - (True, 1, False), - (True, 2, False), - (False, 1, False), - (False, 2, False), - ], - ) + def test_completion_with_tools(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): + if stream and n > 1: + pytest.skip("Streamed responses with multiple completions and tool calls are not supported: see open issue https://github.com/BerriAI/litellm/issues/8977") + with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage, tools=True)): + messages = [{"content": "What is the weather like in San Francisco, CA?", "role": "user"}] + resp = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + stream=stream, + n=n, + stream_options={"include_usage": include_usage}, + tools=tools, + tool_choice="auto", + ) + if stream: + output_messages, token_metrics = consume_stream(resp, n) + else: + output_messages, token_metrics = parse_response(resp) + + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + mock_llmobs_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=messages, + output_messages=output_messages, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}, "tool_choice": "auto"}, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) + ) + + async def test_acompletion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): messages = [{"content": "Hey, what is up?", "role": "user"}] @@ -94,19 +116,6 @@ async def test_acompletion(self, litellm, request_vcr, mock_llmobs_writer, mock_ ) ) - @pytest.mark.parametrize( - "stream,n,include_usage", - [ - (True, 1, True), - (True, 2, True), - (False, 1, True), - (False, 2, True), - (True, 1, False), - (True, 2, False), - (False, 1, False), - (False, 2, False), - ], - ) def test_text_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): prompt = "Hey, what is up?" @@ -137,19 +146,6 @@ def test_text_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tr ) ) - @pytest.mark.parametrize( - "stream,n,include_usage", - [ - (True, 1, True), - (True, 2, True), - (False, 1, True), - (False, 2, True), - (True, 1, False), - (True, 2, False), - (False, 1, False), - (False, 2, False), - ], - ) async def test_atext_completion( self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage ): @@ -181,3 +177,4 @@ async def test_atext_completion( tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) ) + diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py index 508d6a1fd3e..26bbc755a13 100644 --- a/tests/contrib/litellm/utils.py +++ b/tests/contrib/litellm/utils.py @@ -1,5 +1,6 @@ import vcr import os +import json CASETTE_EXTENSION = ".yaml" @@ -21,7 +22,7 @@ def get_request_vcr(): # Get the name of the cassette to use for a given test # All LiteLLM requests that use Open AI get routed to the chat completions endpoint, # so we can reuse the same cassette for each combination of stream and n -def get_cassette_name(stream, n, include_usage=True): +def get_cassette_name(stream, n, include_usage=True, tools=False): stream_suffix = "_stream" if stream else "" choice_suffix = "_multiple_choices" if n > 1 else "" # include_usage only affects streamed responses @@ -29,24 +30,27 @@ def get_cassette_name(stream, n, include_usage=True): usage_suffix = "_exclude_usage" else: usage_suffix = "" - return "completion" + stream_suffix + choice_suffix + usage_suffix + CASETTE_EXTENSION + tools_suffix = "_with_tools" if tools else "" + return "completion" + stream_suffix + choice_suffix + usage_suffix + tools_suffix + CASETTE_EXTENSION def consume_stream(resp, n, is_completion=False): - output_messages = [{"content": ""} for _ in range(n)] + output_messages = [{"content": "", "tool_calls": []} for _ in range(n)] token_metrics = {} role = None for chunk in resp: - role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion) + output_messages, token_metrics, role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion) + output_messages = parse_tool_calls(output_messages) return output_messages, token_metrics async def async_consume_stream(resp, n, is_completion=False): - output_messages = [{"content": ""} for _ in range(n)] + output_messages = [{"content": "", "tool_calls": []} for _ in range(n)] token_metrics = {} role = None async for chunk in resp: - role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion) + output_messages, token_metrics, role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion) + output_messages = parse_tool_calls(output_messages) return output_messages, token_metrics @@ -58,6 +62,16 @@ def extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_co if "role" not in output_messages[choice.index] and (choice.get("delta", {}).get("role") or role): role = choice.get("delta", {}).get("role") or role output_messages[choice.index]["role"] = role + if choice.get("delta", {}).get("tool_calls", []): + tool_calls_chunk = choice["delta"]["tool_calls"] + for tool_call in tool_calls_chunk: + while tool_call.index >= len(output_messages[choice.index]["tool_calls"]): + output_messages[choice.index]["tool_calls"].append({}) + arguments = output_messages[choice.index]["tool_calls"][tool_call.index].get("arguments", "") + output_messages[choice.index]["tool_calls"][tool_call.index]["name"] = output_messages[choice.index]["tool_calls"][tool_call.index].get("name", None) or tool_call.function.name + output_messages[choice.index]["tool_calls"][tool_call.index]["arguments"] = arguments + tool_call.function.arguments + output_messages[choice.index]["tool_calls"][tool_call.index]["tool_id"] = output_messages[choice.index]["tool_calls"][tool_call.index].get("tool_id", None) or tool_call.id + output_messages[choice.index]["tool_calls"][tool_call.index]["type"] = tool_call.type if "usage" in chunk and chunk["usage"]: token_metrics.update( @@ -68,15 +82,38 @@ def extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_co } ) - return role + return output_messages, token_metrics, role + + +def parse_tool_calls(output_messages): + # remove tool_calls from messages if they are empty and parse arguments + for message in output_messages: + if message["tool_calls"]: + for tool_call in message["tool_calls"]: + if "arguments" in tool_call: + tool_call["arguments"] = json.loads(tool_call["arguments"]) + else: + del message["tool_calls"] + return output_messages def parse_response(resp, is_completion=False): output_messages = [] for choice in resp.choices: - message = {"content": choice.text if is_completion else choice.message.content} + content = choice.text if is_completion else choice.message.content + message = {"content": content or ""} if choice.get("role", None) or choice.get("message", {}).get("role", None): message["role"] = choice["role"] if is_completion else choice["message"]["role"] + tool_calls = choice.get("message", {}).get("tool_calls", []) + if tool_calls: + message["tool_calls"] = [] + for tool_call in tool_calls: + message["tool_calls"].append({ + "name": tool_call["function"]["name"], + "arguments": json.loads(tool_call["function"]["arguments"]), + "tool_id": tool_call["id"], + "type": tool_call["type"] + }) output_messages.append(message) token_metrics = { "input_tokens": resp.usage.prompt_tokens, @@ -84,3 +121,24 @@ def parse_response(resp, is_completion=False): "total_tokens": resp.usage.total_tokens, } return output_messages, token_metrics + +tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location"], + }, + }, + } +] \ No newline at end of file From 64f48a1ac7a3a9518b0a374573fb37731c60dd24 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Mon, 31 Mar 2025 15:54:51 -0400 Subject: [PATCH 29/61] add test for case where integrations are enabled --- ddtrace/contrib/internal/litellm/patch.py | 3 +- ddtrace/llmobs/_integrations/litellm.py | 19 ++++++- tests/contrib/litellm/conftest.py | 5 +- tests/contrib/litellm/test_litellm_llmobs.py | 58 +++++++++++++++++++- 4 files changed, 77 insertions(+), 8 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 74515b8e829..c69697519b8 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -36,12 +36,11 @@ def _create_span(litellm, pin, func, instance, args, kwargs): """Helper function to create and configure a traced span.""" model = get_argument_value(args, kwargs, 0, "model", None) integration = litellm._datadog_integration - base_url = kwargs.get("api_base", None) span = integration.trace( pin, "litellm.%s" % func.__name__, model=model, - submit_to_llmobs=integration.should_submit_to_llmobs(base_url), + submit_to_llmobs=integration.should_submit_to_llmobs(model, kwargs), ) return span diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index c15c7836e4a..65d1e12bc3b 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -3,6 +3,7 @@ from typing import List from typing import Optional +import ddtrace from ddtrace.llmobs._constants import ( INPUT_TOKENS_METRIC_KEY, METRICS, @@ -13,7 +14,6 @@ from ddtrace.llmobs._constants import MODEL_PROVIDER from ddtrace.llmobs._constants import SPAN_KIND from ddtrace.llmobs._integrations.utils import ( - get_llmobs_metrics_tags, openai_set_meta_tags_from_chat, openai_set_meta_tags_from_completion, ) @@ -68,5 +68,18 @@ def _extract_llmobs_metrics(resp: Any) -> Dict[str, Any]: TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens, } - def should_submit_to_llmobs(self, base_url: Optional[str] = None) -> bool: - return base_url is None + def should_submit_to_llmobs(self, model: Optional[str] = None, kwargs: Dict[str, Any] = None) -> bool: + """ + Span should be NOT submitted to LLMObs if: + - base_url is not None + - model provider is Open AI or Azure AND request is not being streamed AND Open AI integration is enabled + """ + base_url = kwargs.get("api_base", None) + if base_url is not None: + return False + stream = kwargs.get("stream", False) + model_lower = model.lower() if model else "" + # model provider is unknown until request completes; therefore, this is a best effort attempt to check if model provider is Open AI or Azure + if ("gpt" in model_lower or "openai" in model_lower or "azure" in model_lower) and not stream and "openai" in ddtrace._monkey._get_patched_modules(): + return False + return True diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index 9bc9e4fa887..1ca4294bd80 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -66,9 +66,10 @@ def mock_tracer(litellm, ddtrace_global_config): pin.tracer._configure() if ddtrace_global_config.get("_llmobs_enabled", False): - # Have to disable and re-enable LLMObs to use to mock tracer. + # Have to disable and re-enable LLMObs to use the mock tracer. LLMObs.disable() - LLMObs.enable(_tracer=mock_tracer, integrations_enabled=False) + enable_integrations = ddtrace_global_config.get("_integrations_enabled", False) + LLMObs.enable(_tracer=mock_tracer, integrations_enabled=enable_integrations) yield mock_tracer diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index c76b63075f2..16f3e61f3a9 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -1,7 +1,10 @@ +from ddtrace._trace.pin import Pin +from ddtrace.llmobs._llmobs import LLMObs import pytest from tests.contrib.litellm.utils import async_consume_stream, get_cassette_name, consume_stream, parse_response, tools from tests.llmobs._utils import _expected_llmobs_llm_span_event +from tests.utils import DummyTracer @pytest.mark.parametrize( @@ -84,7 +87,6 @@ def test_completion_with_tools(self, litellm, request_vcr, mock_llmobs_writer, m tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) ) - async def test_acompletion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): @@ -178,3 +180,57 @@ async def test_atext_completion( ) ) + def test_completion_integrations_enabled(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): + if stream: + pytest.skip("Streamed Open AI requests will lead to unfinished spans; therefore, skip them for now") + with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + LLMObs.disable() + + LLMObs.enable(integrations_enabled=True) + mock_tracer = DummyTracer() + import litellm + import openai + + pin = Pin.get_from(litellm) + pin._override(litellm, tracer=mock_tracer) + pin._override(openai, tracer=mock_tracer) + + messages = [{"content": "Hey, what is up?", "role": "user"}] + resp = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + stream=stream, + n=n, + stream_options={"include_usage": include_usage}, + ) + LLMObs.disable() + if stream: + output_messages, token_metrics = consume_stream(resp, n) + else: + output_messages, token_metrics = parse_response(resp) + + openai_span = mock_tracer.pop_traces()[0][1] + # remove parent span since LiteLLM request span will not be submitted to LLMObs + openai_span._parent = None + assert mock_llmobs_writer.enqueue.call_count == 1 + mock_llmobs_writer.enqueue.assert_called_with( + _expected_llmobs_llm_span_event( + openai_span, + model_name="gpt-3.5-turbo-0125", + model_provider="openai", + input_messages=messages, + output_messages=output_messages, + metadata={ + "n": n, + "extra_body": {}, + "timeout": 600.0, + "extra_headers": { + "X-Stainless-Raw-Response": "true" + } + }, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) + ) + + \ No newline at end of file From 4dd467c7cb6f909c7e93f0ccf3202c25f3eea077 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Mon, 31 Mar 2025 16:40:17 -0400 Subject: [PATCH 30/61] add tests for proxy requests --- .../completion_multiple_choices_proxy.yaml | 133 ++++++++++ .../litellm/cassettes/completion_proxy.yaml | 132 ++++++++++ ...completion_stream_exclude_usage_proxy.yaml | 144 +++++++++++ ..._multiple_choices_exclude_usage_proxy.yaml | 240 ++++++++++++++++++ ...pletion_stream_multiple_choices_proxy.yaml | 186 ++++++++++++++ .../cassettes/completion_stream_proxy.yaml | 154 +++++++++++ tests/contrib/litellm/conftest.py | 4 + tests/contrib/litellm/test_litellm_llmobs.py | 19 ++ tests/contrib/litellm/utils.py | 10 +- 9 files changed, 1017 insertions(+), 5 deletions(-) create mode 100644 tests/contrib/litellm/cassettes/completion_multiple_choices_proxy.yaml create mode 100644 tests/contrib/litellm/cassettes/completion_proxy.yaml create mode 100644 tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml create mode 100644 tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml create mode 100644 tests/contrib/litellm/cassettes/completion_stream_multiple_choices_proxy.yaml create mode 100644 tests/contrib/litellm/cassettes/completion_stream_proxy.yaml diff --git a/tests/contrib/litellm/cassettes/completion_multiple_choices_proxy.yaml b/tests/contrib/litellm/cassettes/completion_multiple_choices_proxy.yaml new file mode 100644 index 00000000000..7641313c6a3 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_multiple_choices_proxy.yaml @@ -0,0 +1,133 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '89' + content-type: + - application/json + host: + - 0.0.0.0:4000 + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: http://0.0.0.0:4000/chat/completions + response: + body: + string: '{"id":"chatcmpl-BHGBewTYXrkQYXQ5DUtzsz7lL7gjy","created":1743453498,"model":"gpt-3.5-turbo-0125","object":"chat.completion","system_fingerprint":null,"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Not + much, just here to help you with anything you need. How can I assist you today?","role":"assistant","tool_calls":null,"function_call":null}},{"finish_reason":"stop","index":1,"message":{"content":"Not + much, just here to chat and help with anything you need. How can I assist + you today?","role":"assistant","tool_calls":null,"function_call":null}}],"usage":{"completion_tokens":43,"prompt_tokens":13,"total_tokens":56,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0}},"service_tier":"default"}' + headers: + content-length: + - '855' + content-type: + - application/json + date: + - Mon, 31 Mar 2025 20:38:18 GMT + llm_provider-access-control-expose-headers: + - X-Request-ID + llm_provider-alt-svc: + - h3=":443"; ma=86400 + llm_provider-cf-cache-status: + - DYNAMIC + llm_provider-cf-ray: + - 9292a64f29e7c989-IAD + llm_provider-connection: + - keep-alive + llm_provider-content-encoding: + - gzip + llm_provider-content-type: + - application/json + llm_provider-date: + - Mon, 31 Mar 2025 20:38:19 GMT + llm_provider-openai-organization: + - datadog-4 + llm_provider-openai-processing-ms: + - '420' + llm_provider-openai-version: + - '2020-10-01' + llm_provider-server: + - cloudflare + llm_provider-strict-transport-security: + - max-age=31536000; includeSubDomains; preload + llm_provider-transfer-encoding: + - chunked + llm_provider-x-content-type-options: + - nosniff + llm_provider-x-ratelimit-limit-requests: + - '15000' + llm_provider-x-ratelimit-limit-tokens: + - '2000000' + llm_provider-x-ratelimit-remaining-requests: + - '14999' + llm_provider-x-ratelimit-remaining-tokens: + - '1999993' + llm_provider-x-ratelimit-reset-requests: + - 4ms + llm_provider-x-ratelimit-reset-tokens: + - 0s + llm_provider-x-request-id: + - req_743e3e93e074d74f8c2dcdaff378a836 + server: + - uvicorn + x-litellm-attempted-fallbacks: + - '0' + x-litellm-attempted-retries: + - '0' + x-litellm-call-id: + - 7c65af0a-51fe-4b5b-8491-d52aeed495c6 + x-litellm-key-spend: + - '0.0' + x-litellm-model-api-base: + - https://api.openai.com + x-litellm-model-group: + - gpt-3.5-turbo + x-litellm-model-id: + - e748de28e0562f80b594285ded0e720cb492b705ea9ad445427dff7c5b722753 + x-litellm-overhead-duration-ms: + - '1.125' + x-litellm-response-cost: + - '7.099999999999999e-05' + x-litellm-response-duration-ms: + - '558.881' + x-litellm-version: + - 1.63.11 + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999993' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_proxy.yaml b/tests/contrib/litellm/cassettes/completion_proxy.yaml new file mode 100644 index 00000000000..15e7ea403f3 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_proxy.yaml @@ -0,0 +1,132 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '89' + content-type: + - application/json + host: + - 0.0.0.0:4000 + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: http://0.0.0.0:4000/chat/completions + response: + body: + string: '{"id":"chatcmpl-BHGAf6WA7lmIL9yuwftXYqc5kADAy","created":1743453437,"model":"gpt-3.5-turbo-0125","object":"chat.completion","system_fingerprint":null,"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Not + much, just here to help with any questions or tasks you may have. How can + I assist you today?","role":"assistant","tool_calls":null,"function_call":null}}],"usage":{"completion_tokens":24,"prompt_tokens":13,"total_tokens":37,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0}},"service_tier":"default"}' + headers: + content-length: + - '663' + content-type: + - application/json + date: + - Mon, 31 Mar 2025 20:37:16 GMT + llm_provider-access-control-expose-headers: + - X-Request-ID + llm_provider-alt-svc: + - h3=":443"; ma=86400 + llm_provider-cf-cache-status: + - DYNAMIC + llm_provider-cf-ray: + - 9292a4d20e95057d-IAD + llm_provider-connection: + - keep-alive + llm_provider-content-encoding: + - gzip + llm_provider-content-type: + - application/json + llm_provider-date: + - Mon, 31 Mar 2025 20:37:18 GMT + llm_provider-openai-organization: + - datadog-4 + llm_provider-openai-processing-ms: + - '406' + llm_provider-openai-version: + - '2020-10-01' + llm_provider-server: + - cloudflare + llm_provider-strict-transport-security: + - max-age=31536000; includeSubDomains; preload + llm_provider-transfer-encoding: + - chunked + llm_provider-x-content-type-options: + - nosniff + llm_provider-x-ratelimit-limit-requests: + - '15000' + llm_provider-x-ratelimit-limit-tokens: + - '2000000' + llm_provider-x-ratelimit-remaining-requests: + - '14999' + llm_provider-x-ratelimit-remaining-tokens: + - '1999994' + llm_provider-x-ratelimit-reset-requests: + - 4ms + llm_provider-x-ratelimit-reset-tokens: + - 0s + llm_provider-x-request-id: + - req_6fdefc3db6a6e5b77dae976930efe649 + server: + - uvicorn + x-litellm-attempted-fallbacks: + - '0' + x-litellm-attempted-retries: + - '0' + x-litellm-call-id: + - 30958a62-d9f7-47e6-8971-2b58852f2976 + x-litellm-key-spend: + - '0.0' + x-litellm-model-api-base: + - https://api.openai.com + x-litellm-model-group: + - gpt-3.5-turbo + x-litellm-model-id: + - e748de28e0562f80b594285ded0e720cb492b705ea9ad445427dff7c5b722753 + x-litellm-overhead-duration-ms: + - '1.424' + x-litellm-response-cost: + - '4.25e-05' + x-litellm-response-duration-ms: + - '611.3' + x-litellm-version: + - 1.63.11 + x-ratelimit-limit-requests: + - '15000' + x-ratelimit-limit-tokens: + - '2000000' + x-ratelimit-remaining-requests: + - '14999' + x-ratelimit-remaining-tokens: + - '1999994' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml new file mode 100644 index 00000000000..39cf74be7cb --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml @@ -0,0 +1,144 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":false}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '144' + content-type: + - application/json + host: + - 0.0.0.0:4000 + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: http://0.0.0.0:4000/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + much"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + just"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + here"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + to"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + help"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + with"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + anything"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + need"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + How"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + can"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + I"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + assist"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + today"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":false}} + + + data: [DONE] + + + ' + headers: + content-type: + - text/event-stream; charset=utf-8 + date: + - Mon, 31 Mar 2025 20:38:18 GMT + server: + - uvicorn + transfer-encoding: + - chunked + x-litellm-call-id: + - a5a87fc0-874f-4432-b608-91b437b91fb2 + x-litellm-key-spend: + - '0.0' + x-litellm-version: + - 1.63.11 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml new file mode 100644 index 00000000000..7cface6a716 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml @@ -0,0 +1,240 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":false}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '144' + content-type: + - application/json + host: + - 0.0.0.0:4000 + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: http://0.0.0.0:4000/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"Not"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + much"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + much"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":","}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + just"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + just"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + here"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + here"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + to"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + to"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + chat"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + assist"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + and"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + help"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + with"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + with"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + any"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + anything"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + questions"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + or"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + need"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + tasks"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + How"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + may"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + can"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + have"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + I"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"."}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + assist"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + How"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + can"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + today"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + I"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + help"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + you"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + today"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}} + + + data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":false}} + + + data: [DONE] + + + ' + headers: + content-type: + - text/event-stream; charset=utf-8 + date: + - Mon, 31 Mar 2025 20:38:19 GMT + server: + - uvicorn + transfer-encoding: + - chunked + x-litellm-call-id: + - b4f152d1-5074-4fb3-a79d-ad0529fa5aa1 + x-litellm-key-spend: + - '0.0' + x-litellm-version: + - 1.63.11 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_proxy.yaml new file mode 100644 index 00000000000..479f5817089 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_proxy.yaml @@ -0,0 +1,186 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '143' + content-type: + - application/json + host: + - 0.0.0.0:4000 + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: http://0.0.0.0:4000/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"Not"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + much"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + much"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":","}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + just"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + just"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + here"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + here"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + to"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + to"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + help"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + help"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"."}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + How"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + How"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + can"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + can"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + I"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + I"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + assist"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + assist"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + you"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + today"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" + today"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"?"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{}}],"stream_options":{"include_usage":true},"usage":{"completion_tokens":27,"prompt_tokens":13,"total_tokens":40}} + + + data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{}}],"stream_options":{"include_usage":true}} + + + data: [DONE] + + + ' + headers: + content-type: + - text/event-stream; charset=utf-8 + date: + - Mon, 31 Mar 2025 20:38:17 GMT + server: + - uvicorn + transfer-encoding: + - chunked + x-litellm-call-id: + - 7396b03c-8ab2-4593-8e46-a3e1285bd4d4 + x-litellm-key-spend: + - '0.0' + x-litellm-version: + - 1.63.11 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_proxy.yaml new file mode 100644 index 00000000000..32e111c1775 --- /dev/null +++ b/tests/contrib/litellm/cassettes/completion_stream_proxy.yaml @@ -0,0 +1,154 @@ +interactions: +- request: + body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":true}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '143' + content-type: + - application/json + host: + - 0.0.0.0:4000 + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: http://0.0.0.0:4000/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + much"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + just"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + here"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + to"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + chat"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + and"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + help"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + with"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + anything"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + need"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + How"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + can"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + I"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + assist"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + you"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" + today"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{}}],"stream_options":{"include_usage":true}} + + + data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{}}],"stream_options":{"include_usage":true},"usage":{"completion_tokens":22,"prompt_tokens":13,"total_tokens":35,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0}}} + + + data: [DONE] + + + ' + headers: + content-type: + - text/event-stream; charset=utf-8 + date: + - Mon, 31 Mar 2025 20:38:16 GMT + server: + - uvicorn + transfer-encoding: + - chunked + x-litellm-call-id: + - 8b484c9d-0eae-4d95-8b0e-fe1bdb114b9a + x-litellm-key-spend: + - '0.0' + x-litellm-version: + - 1.63.11 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index 1ca4294bd80..c94314d8902 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -79,3 +79,7 @@ def mock_tracer(litellm, ddtrace_global_config): @pytest.fixture def request_vcr(): return get_request_vcr() + +@pytest.fixture +def request_vcr_include_localhost(): + return get_request_vcr(ignore_localhost=False) diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 16f3e61f3a9..a7023042caf 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -232,5 +232,24 @@ def test_completion_integrations_enabled(self, litellm, request_vcr, mock_llmobs tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) ) + + def test_completion_proxy(self, litellm, request_vcr_include_localhost, mock_llmobs_writer, mock_tracer, stream, n, include_usage): + with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, include_usage, proxy=True)): + messages = [{"content": "Hey, what is up?", "role": "user"}] + resp = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + stream=stream, + n=n, + stream_options={"include_usage": include_usage}, + api_base="http://0.0.0.0:4000", + ) + if stream: + consume_stream(resp, n) + + # client side requests made to the proxy are not submitted to LLMObs + assert mock_llmobs_writer.enqueue.call_count == 0 + + \ No newline at end of file diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py index 26bbc755a13..cd5eadb97b2 100644 --- a/tests/contrib/litellm/utils.py +++ b/tests/contrib/litellm/utils.py @@ -8,21 +8,20 @@ # VCR is used to capture and store network requests made to Anthropic. # This is done to avoid making real calls to the API which could introduce # flakiness and cost. -def get_request_vcr(): +def get_request_vcr(ignore_localhost=True): return vcr.VCR( cassette_library_dir=os.path.join(os.path.dirname(__file__), "cassettes"), record_mode="once", match_on=["path"], filter_headers=["authorization", "x-api-key", "api-key"], - # Ignore requests to the agent - ignore_localhost=True, + ignore_localhost=ignore_localhost, ) # Get the name of the cassette to use for a given test # All LiteLLM requests that use Open AI get routed to the chat completions endpoint, # so we can reuse the same cassette for each combination of stream and n -def get_cassette_name(stream, n, include_usage=True, tools=False): +def get_cassette_name(stream, n, include_usage=True, tools=False, proxy=False): stream_suffix = "_stream" if stream else "" choice_suffix = "_multiple_choices" if n > 1 else "" # include_usage only affects streamed responses @@ -31,7 +30,8 @@ def get_cassette_name(stream, n, include_usage=True, tools=False): else: usage_suffix = "" tools_suffix = "_with_tools" if tools else "" - return "completion" + stream_suffix + choice_suffix + usage_suffix + tools_suffix + CASETTE_EXTENSION + proxy_suffix = "_proxy" if proxy else "" + return "completion" + stream_suffix + choice_suffix + usage_suffix + tools_suffix + proxy_suffix + CASETTE_EXTENSION def consume_stream(resp, n, is_completion=False): From f0b8a721afc615928d097cb14b259cb1d800835b Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 1 Apr 2025 10:03:04 -0400 Subject: [PATCH 31/61] add litellm tests to suitespec --- tests/llmobs/suitespec.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/llmobs/suitespec.yml b/tests/llmobs/suitespec.yml index a9d3da34dc5..40ba83b535a 100644 --- a/tests/llmobs/suitespec.yml +++ b/tests/llmobs/suitespec.yml @@ -12,6 +12,9 @@ components: langchain: - ddtrace/contrib/_langchain.py - ddtrace/contrib/internal/langchain/* + litellm: + - ddtrace/contrib/_litellm.py + - ddtrace/contrib/internal/litellm/* llmobs: - ddtrace/llmobs/* openai: @@ -73,6 +76,18 @@ suites: - tests/snapshots/tests.contrib.langchain.* runner: riot snapshot: true + litellm: + paths: + - '@bootstrap' + - '@core' + - '@tracing' + - '@contrib' + - '@litellm' + - '@llmobs' + - tests/contrib/litellm/* + - tests/snapshots/tests.contrib.litellm.* + runner: riot + snapshot: true llmobs: paths: - '@bootstrap' From 8d5d24b77a8fe0cd90458e1566cb5667b782359a Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 8 Apr 2025 10:17:12 -0400 Subject: [PATCH 32/61] consume streams for apm tests --- tests/contrib/litellm/test_litellm.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py index 5b1c787ddba..43a9a4d4036 100644 --- a/tests/contrib/litellm/test_litellm.py +++ b/tests/contrib/litellm/test_litellm.py @@ -33,12 +33,16 @@ def test_litellm_completion(litellm, snapshot_context, request_vcr, stream, n): with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion"): with request_vcr.use_cassette(get_cassette_name(stream, n)): messages = [{"content": "Hey, what is up?", "role": "user"}] - litellm.completion( + resp = litellm.completion( model="gpt-3.5-turbo", messages=messages, stream=stream, n=n, ) + if stream: + for _ in resp: + pass + @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) @@ -46,36 +50,44 @@ async def test_litellm_acompletion(litellm, snapshot_context, request_vcr, strea with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]): with request_vcr.use_cassette(get_cassette_name(stream, n)): messages = [{"content": "Hey, what is up?", "role": "user"}] - await litellm.acompletion( + resp = await litellm.acompletion( model="gpt-3.5-turbo", messages=messages, stream=stream, n=n, ) - - + if stream: + async for _ in resp: + pass + @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) def test_litellm_text_completion(litellm, snapshot_context, request_vcr, stream, n): with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]): with request_vcr.use_cassette(get_cassette_name(stream, n)): - litellm.text_completion( + resp = litellm.text_completion( model="gpt-3.5-turbo", prompt="Hello world", stream=stream, n=n, ) + if stream: + for _ in resp: + pass @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) async def test_litellm_atext_completion(litellm, snapshot_context, request_vcr, stream, n): with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]): with request_vcr.use_cassette(get_cassette_name(stream, n)): - await litellm.atext_completion( + resp = await litellm.atext_completion( model="gpt-3.5-turbo", prompt="Hello world", stream=stream, n=n, ) + if stream: + async for _ in resp: + pass @pytest.mark.parametrize("model", ["command-r", "anthropic/claude-3-5-sonnet-20240620"]) From 377b40d27c021f6ab6060ce6e3e378f3d5cef3bd Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 8 Apr 2025 10:26:38 -0400 Subject: [PATCH 33/61] remove unnecessary snapshot files and riot requirements --- .riot/requirements/45c12de.txt | 76 ------------ .riot/requirements/e05a904.txt | 79 ------------- .riot/requirements/e8c8851.txt | 76 ------------ .riot/requirements/f30dfc2.txt | 79 ------------- ddtrace/contrib/internal/litellm/patch.py | 1 - ddtrace/contrib/internal/litellm/utils.py | 5 - .../sources/min_compatible_versions.csv | 1 - min_compatible_versions.csv | 1 - .../claude-3-5-sonnet-20240620.yaml | 86 -------------- .../completion_vertex_ai/gemini-pro.yaml | 110 ------------------ tests/contrib/litellm/test_litellm_llmobs.py | 2 - ...llm.test_litellm_acompletion[False-1].json | 27 ----- ...llm.test_litellm_acompletion[False-2].json | 27 ----- ...ellm.test_litellm_acompletion[True-2].json | 27 ----- ...est_litellm_atext_completion[False-1].json | 27 ----- ...est_litellm_atext_completion[False-2].json | 27 ----- ...test_litellm_atext_completion[True-2].json | 27 ----- ...ellm.test_litellm_completion[False-1].json | 27 ----- ...ellm.test_litellm_completion[False-2].json | 27 ----- ...tellm.test_litellm_completion[True-2].json | 27 ----- ...t_litellm_completion_different_models.json | 27 ----- ...anthropic_claude-3-5-sonnet-20240620].json | 27 ----- ...test_litellm_text_completion[False-1].json | 27 ----- ...test_litellm_text_completion[False-2].json | 27 ----- ....test_litellm_text_completion[True-2].json | 27 ----- 25 files changed, 894 deletions(-) delete mode 100644 .riot/requirements/45c12de.txt delete mode 100644 .riot/requirements/e05a904.txt delete mode 100644 .riot/requirements/e8c8851.txt delete mode 100644 .riot/requirements/f30dfc2.txt delete mode 100644 tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml delete mode 100644 tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json diff --git a/.riot/requirements/45c12de.txt b/.riot/requirements/45c12de.txt deleted file mode 100644 index 3f0c6ae7839..00000000000 --- a/.riot/requirements/45c12de.txt +++ /dev/null @@ -1,76 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# pip-compile --allow-unsafe --no-annotate .riot/requirements/45c12de.in -# -aiohappyeyeballs==2.6.1 -aiohttp==3.11.14 -aiosignal==1.3.2 -annotated-types==0.7.0 -anyio==4.9.0 -attrs==25.3.0 -boto3==1.37.22 -botocore==1.37.22 -cachetools==5.5.2 -certifi==2025.1.31 -charset-normalizer==3.4.1 -click==8.1.8 -coverage[toml]==7.7.1 -distro==1.9.0 -filelock==3.18.0 -frozenlist==1.5.0 -fsspec==2025.3.0 -google-auth==2.38.0 -h11==0.14.0 -httpcore==1.0.7 -httpx==0.28.1 -huggingface-hub==0.29.3 -hypothesis==6.45.0 -idna==3.10 -importlib-metadata==8.6.1 -iniconfig==2.1.0 -jinja2==3.1.6 -jiter==0.9.0 -jmespath==1.0.1 -jsonschema==4.23.0 -jsonschema-specifications==2024.10.1 -litellm==1.64.1 -markupsafe==3.0.2 -mock==5.2.0 -multidict==6.2.0 -openai==1.69.0 -opentracing==2.4.0 -packaging==24.2 -pluggy==1.5.0 -propcache==0.3.1 -pyasn1==0.6.1 -pyasn1-modules==0.4.2 -pydantic==2.11.0 -pydantic-core==2.33.0 -pytest==8.3.5 -pytest-asyncio==0.26.0 -pytest-cov==6.0.0 -pytest-mock==3.14.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.1.0 -pyyaml==6.0.2 -referencing==0.36.2 -regex==2024.11.6 -requests==2.32.3 -rpds-py==0.24.0 -rsa==4.9 -s3transfer==0.11.4 -six==1.17.0 -sniffio==1.3.1 -sortedcontainers==2.4.0 -tiktoken==0.9.0 -tokenizers==0.21.1 -tqdm==4.67.1 -typing-extensions==4.13.0 -typing-inspection==0.4.0 -urllib3==2.3.0 -vcrpy==7.0.0 -wrapt==1.17.2 -yarl==1.18.3 -zipp==3.21.0 diff --git a/.riot/requirements/e05a904.txt b/.riot/requirements/e05a904.txt deleted file mode 100644 index e46108467ec..00000000000 --- a/.riot/requirements/e05a904.txt +++ /dev/null @@ -1,79 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --allow-unsafe --no-annotate .riot/requirements/e05a904.in -# -aiohappyeyeballs==2.6.1 -aiohttp==3.11.14 -aiosignal==1.3.2 -annotated-types==0.7.0 -anyio==4.9.0 -async-timeout==5.0.1 -attrs==25.3.0 -boto3==1.37.22 -botocore==1.37.22 -cachetools==5.5.2 -certifi==2025.1.31 -charset-normalizer==3.4.1 -click==8.1.8 -coverage[toml]==7.7.1 -distro==1.9.0 -exceptiongroup==1.2.2 -filelock==3.18.0 -frozenlist==1.5.0 -fsspec==2025.3.0 -google-auth==2.38.0 -h11==0.14.0 -httpcore==1.0.7 -httpx==0.28.1 -huggingface-hub==0.29.3 -hypothesis==6.45.0 -idna==3.10 -importlib-metadata==8.6.1 -iniconfig==2.1.0 -jinja2==3.1.6 -jiter==0.9.0 -jmespath==1.0.1 -jsonschema==4.23.0 -jsonschema-specifications==2024.10.1 -litellm==1.64.1 -markupsafe==3.0.2 -mock==5.2.0 -multidict==6.2.0 -openai==1.69.0 -opentracing==2.4.0 -packaging==24.2 -pluggy==1.5.0 -propcache==0.3.1 -pyasn1==0.6.1 -pyasn1-modules==0.4.2 -pydantic==2.11.0 -pydantic-core==2.33.0 -pytest==8.3.5 -pytest-asyncio==0.26.0 -pytest-cov==6.0.0 -pytest-mock==3.14.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.1.0 -pyyaml==6.0.2 -referencing==0.36.2 -regex==2024.11.6 -requests==2.32.3 -rpds-py==0.24.0 -rsa==4.9 -s3transfer==0.11.4 -six==1.17.0 -sniffio==1.3.1 -sortedcontainers==2.4.0 -tiktoken==0.9.0 -tokenizers==0.21.1 -tomli==2.2.1 -tqdm==4.67.1 -typing-extensions==4.13.0 -typing-inspection==0.4.0 -urllib3==2.3.0 -vcrpy==7.0.0 -wrapt==1.17.2 -yarl==1.18.3 -zipp==3.21.0 diff --git a/.riot/requirements/e8c8851.txt b/.riot/requirements/e8c8851.txt deleted file mode 100644 index 55dd71876c7..00000000000 --- a/.riot/requirements/e8c8851.txt +++ /dev/null @@ -1,76 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.11 -# by the following command: -# -# pip-compile --allow-unsafe --no-annotate .riot/requirements/e8c8851.in -# -aiohappyeyeballs==2.6.1 -aiohttp==3.11.14 -aiosignal==1.3.2 -annotated-types==0.7.0 -anyio==4.9.0 -attrs==25.3.0 -boto3==1.37.22 -botocore==1.37.22 -cachetools==5.5.2 -certifi==2025.1.31 -charset-normalizer==3.4.1 -click==8.1.8 -coverage[toml]==7.7.1 -distro==1.9.0 -filelock==3.18.0 -frozenlist==1.5.0 -fsspec==2025.3.0 -google-auth==2.38.0 -h11==0.14.0 -httpcore==1.0.7 -httpx==0.28.1 -huggingface-hub==0.29.3 -hypothesis==6.45.0 -idna==3.10 -importlib-metadata==8.6.1 -iniconfig==2.1.0 -jinja2==3.1.6 -jiter==0.9.0 -jmespath==1.0.1 -jsonschema==4.23.0 -jsonschema-specifications==2024.10.1 -litellm==1.64.1 -markupsafe==3.0.2 -mock==5.2.0 -multidict==6.2.0 -openai==1.69.0 -opentracing==2.4.0 -packaging==24.2 -pluggy==1.5.0 -propcache==0.3.1 -pyasn1==0.6.1 -pyasn1-modules==0.4.2 -pydantic==2.11.0 -pydantic-core==2.33.0 -pytest==8.3.5 -pytest-asyncio==0.26.0 -pytest-cov==6.0.0 -pytest-mock==3.14.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.1.0 -pyyaml==6.0.2 -referencing==0.36.2 -regex==2024.11.6 -requests==2.32.3 -rpds-py==0.24.0 -rsa==4.9 -s3transfer==0.11.4 -six==1.17.0 -sniffio==1.3.1 -sortedcontainers==2.4.0 -tiktoken==0.9.0 -tokenizers==0.21.1 -tqdm==4.67.1 -typing-extensions==4.13.0 -typing-inspection==0.4.0 -urllib3==2.3.0 -vcrpy==7.0.0 -wrapt==1.17.2 -yarl==1.18.3 -zipp==3.21.0 diff --git a/.riot/requirements/f30dfc2.txt b/.riot/requirements/f30dfc2.txt deleted file mode 100644 index 0f57ac61e7a..00000000000 --- a/.riot/requirements/f30dfc2.txt +++ /dev/null @@ -1,79 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --allow-unsafe --no-annotate .riot/requirements/f30dfc2.in -# -aiohappyeyeballs==2.6.1 -aiohttp==3.11.14 -aiosignal==1.3.2 -annotated-types==0.7.0 -anyio==4.9.0 -async-timeout==5.0.1 -attrs==25.3.0 -boto3==1.37.22 -botocore==1.37.22 -cachetools==5.5.2 -certifi==2025.1.31 -charset-normalizer==3.4.1 -click==8.1.8 -coverage[toml]==7.7.1 -distro==1.9.0 -exceptiongroup==1.2.2 -filelock==3.18.0 -frozenlist==1.5.0 -fsspec==2025.3.0 -google-auth==2.38.0 -h11==0.14.0 -httpcore==1.0.7 -httpx==0.28.1 -huggingface-hub==0.29.3 -hypothesis==6.45.0 -idna==3.10 -importlib-metadata==8.6.1 -iniconfig==2.1.0 -jinja2==3.1.6 -jiter==0.9.0 -jmespath==1.0.1 -jsonschema==4.23.0 -jsonschema-specifications==2024.10.1 -litellm==1.64.1 -markupsafe==3.0.2 -mock==5.2.0 -multidict==6.2.0 -openai==1.69.0 -opentracing==2.4.0 -packaging==24.2 -pluggy==1.5.0 -propcache==0.3.1 -pyasn1==0.6.1 -pyasn1-modules==0.4.2 -pydantic==2.11.0 -pydantic-core==2.33.0 -pytest==8.3.5 -pytest-asyncio==0.26.0 -pytest-cov==6.0.0 -pytest-mock==3.14.0 -python-dateutil==2.9.0.post0 -python-dotenv==1.1.0 -pyyaml==6.0.2 -referencing==0.36.2 -regex==2024.11.6 -requests==2.32.3 -rpds-py==0.24.0 -rsa==4.9 -s3transfer==0.11.4 -six==1.17.0 -sniffio==1.3.1 -sortedcontainers==2.4.0 -tiktoken==0.9.0 -tokenizers==0.21.1 -tomli==2.2.1 -tqdm==4.67.1 -typing-extensions==4.13.0 -typing-inspection==0.4.0 -urllib3==1.26.20 -vcrpy==7.0.0 -wrapt==1.17.2 -yarl==1.18.3 -zipp==3.21.0 diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index c00d4c819f5..9aa7b77d89c 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -7,7 +7,6 @@ from ddtrace.contrib.trace_utils import unwrap from ddtrace.contrib.trace_utils import with_traced_module from ddtrace.contrib.trace_utils import wrap -from ddtrace.contrib.internal.litellm.utils import tag_request from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMStream from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMAsyncStream from ddtrace.llmobs._integrations import LiteLLMIntegration diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index 37fdcf01b55..9f813458e57 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -12,11 +12,6 @@ log = get_logger(__name__) -def tag_request(span, kwargs): - if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]: - span.set_tag_str("litellm.request.host", kwargs["metadata"]["headers"]["host"]) - - class BaseTracedLiteLLMStream: def __init__(self, generator, integration, span, args, kwargs, is_completion=False): n = kwargs.get("n", 1) or 1 diff --git a/lib-injection/sources/min_compatible_versions.csv b/lib-injection/sources/min_compatible_versions.csv index 7617732a2e2..97aa880036f 100644 --- a/lib-injection/sources/min_compatible_versions.csv +++ b/lib-injection/sources/min_compatible_versions.csv @@ -67,7 +67,6 @@ flask-caching,~=1.10.0 flask-openapi3,0 gevent,~=20.12.0 google-ai-generativelanguage,0 -google-auth,0 google-generativeai,0 googleapis-common-protos,0 graphene,~=3.0.0 diff --git a/min_compatible_versions.csv b/min_compatible_versions.csv index 7617732a2e2..97aa880036f 100644 --- a/min_compatible_versions.csv +++ b/min_compatible_versions.csv @@ -67,7 +67,6 @@ flask-caching,~=1.10.0 flask-openapi3,0 gevent,~=20.12.0 google-ai-generativelanguage,0 -google-auth,0 google-generativeai,0 googleapis-common-protos,0 graphene,~=3.0.0 diff --git a/tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml b/tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml deleted file mode 100644 index a719ac0af83..00000000000 --- a/tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml +++ /dev/null @@ -1,86 +0,0 @@ -interactions: -- request: - body: '{"model": "claude-3-5-sonnet-20240620", "messages": [{"role": "user", "content": - [{"type": "text", "text": "Hey, what is up?"}]}], "max_tokens": 4096}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - anthropic-version: - - '2023-06-01' - connection: - - keep-alive - content-length: - - '150' - content-type: - - application/json - host: - - api.anthropic.com - user-agent: - - litellm/1.63.12 - method: POST - uri: https://api.anthropic.com/v1/messages - response: - body: - string: !!binary | - H4sIAAAAAAAAA2RQTW/UQAz9K8aXvcyibNoCygWVU5dyQagnhCJ3xiSjztpp7Gm7Wu1/RwlUAnGy - 9L70nk+YE3Z4sKFvdrd37Xhf33+gT5+/yXT7ZUdPX9s7DOjHiRcVm9HAGHDWsgBkls1JHAMeNHHB - DmOhmnh7sb3amoqwb9umvWzetQ0GjCrO4th9P72GOr8s9vV0eMOl6Bu4NiCB6z0UkqHSwLDGB9hD - Utk4jPTEMPFsKlSAXyaeM0tkA53hJ3PJMliA++qw3xxg5JnBFUYuExy1wnP2EUiO8FjZPKusRtcp - R1sEmwQlP6yelC1Ws7dwo88QSWAPv3evQa6Jjh/x/COguU79zGQq2CFL6r3Ogn8I48e6NMROaikB - 6/rK7oRZpuq96wOLYbe7CBgpjtzHmWlp1v8raF75mSn9z2n1v5HLq/P5FwAAAP//AwAsXlFX5AEA - AA== - headers: - CF-RAY: - - 923fd79c4a4a7cfc-EWR - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Fri, 21 Mar 2025 19:26:40 GMT - Server: - - cloudflare - Transfer-Encoding: - - chunked - X-Robots-Tag: - - none - anthropic-organization-id: - - 0280e0cf-573a-4392-b276-1b73319958fb - anthropic-ratelimit-input-tokens-limit: - - '20000' - anthropic-ratelimit-input-tokens-remaining: - - '20000' - anthropic-ratelimit-input-tokens-reset: - - '2025-03-21T19:26:40Z' - anthropic-ratelimit-output-tokens-limit: - - '4000' - anthropic-ratelimit-output-tokens-remaining: - - '4000' - anthropic-ratelimit-output-tokens-reset: - - '2025-03-21T19:26:40Z' - anthropic-ratelimit-requests-limit: - - '5' - anthropic-ratelimit-requests-remaining: - - '4' - anthropic-ratelimit-requests-reset: - - '2025-03-21T19:26:52Z' - anthropic-ratelimit-tokens-limit: - - '24000' - anthropic-ratelimit-tokens-remaining: - - '24000' - anthropic-ratelimit-tokens-reset: - - '2025-03-21T19:26:40Z' - cf-cache-status: - - DYNAMIC - request-id: - - req_01RRDNDcX3wjQFEMkLiTep47 - via: - - 1.1 google - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml b/tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml deleted file mode 100644 index 5e712cce0bb..00000000000 --- a/tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml +++ /dev/null @@ -1,110 +0,0 @@ -interactions: -- request: - body: assertion=eyJ0eXAiOiAiSldUIiwgImFsZyI6ICJSUzI1NiIsICJraWQiOiAiZjc0ODI1ZGMzZDE4ZWU2YTY5Y2I2YTE0NmQ5OGUxNTg4YTM5YWU3YyJ9.eyJpYXQiOiAxNzQyNTg1NTY0LCAiZXhwIjogMTc0MjU4OTE2NCwgImlzcyI6ICJsbG1vYnMtdGVzdEBkYXRhZG9nLXNhbmRib3guaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iLCAiYXVkIjogImh0dHBzOi8vb2F1dGgyLmdvb2dsZWFwaXMuY29tL3Rva2VuIiwgInNjb3BlIjogImh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29tL2F1dGgvY2xvdWQtcGxhdGZvcm0ifQ.ZivXu-4DxUTH_3JzIngRTswHdKmxiPR_yFl7T7o7C7FSrp4zf0cHa-fid8jMBiwzOz0ooBny11AezGE0w5b15NvhbrQq3HDYdoHXGooo9yBOnhez7v5EaP8iMfpkcp0EW8DUdSUrs2-y9rYT67rA6KxxWcdQLPFyk15ka-FC3f1BsdF_c0CdoPfKEG0mpBj5OHvmwjE3L5GP-2OLgx75B9loCFs3npkEa74YfCJ5OZXHUAPgONXC9VxiXf7__Secb-sDqZLKnGi2HSwaTZJ7TWkLyVufp71IMWpYaExI9Qw2IPPok3h-tCRJjljjJ1kfFy4N0AZCv1STT3p7w8jxyA&grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '801' - Content-Type: - - application/x-www-form-urlencoded - User-Agent: - - python-requests/2.32.3 - x-goog-api-client: - - gl-python/3.11.10 auth/2.38.0 auth-request-type/at cred-type/sa - method: POST - uri: https://oauth2.googleapis.com/token - response: - body: - string: '{"access_token":"ya29.c.c0ASRK0GZU0bfQUP0GYf4fuNe4BhHzumcFny6u68QFuxIsG64_oIXfd5scBCRKnTn4AavchTw0iXipb8jUGWZnRC-3IuQPmizs1oQAAWKL90jbwCqpwefQVEMkoPi6Sp1qs0RJjm3gjX_KVPPZlleAEHtWA-lxhbPkP56KRmOHaPWIC6z019UAO4wAuylihJSAq0QheNEW42e2E9NA6MAaCfMgARAvvrNhoaJ2NvvFPYTc_B4Ii8J-fdweojRHAn115d6k4LV0hDqdMeuuDmycrZPjXb6_DTRCYbXrbEjuSlHAAYLFqXaq1q1Uv_rzTt5yaGN8fQig3SZ0b6kQ959wq_6MWRrIlip3UnO3kAjMl7HKp8cZUS2l7sLAN385CjUXmxoq36By_bvYQRmavYvqlIxei6d7mym5I6Fnon1xb69jl2c4Ykatuey3yF2I195zhf1Q_nStxax5ikZoa8gztfzUQO8uz7wXt5zawkgYvvg_y4fZsU4J2gjb44encv0oaFlvw-uXS4WI-_meXXVp-vgexUvZaS6yXO31lhyiVe60BzFsanMMQu4UyJycm2bo4pfdOe6fg3uXZS2fvv8-551p9iWcixYswr-o6h_0FrucJQ0yp-wQ791mYaMx7q7a90iSaJ1s4OqeB_Mw4kl5nscbI-xjb1YVMMYd07MfMwS5fg-0Y-7lv2tYqJbk31hq_npBnvgX-96iupo6y2ZiXebMgIgBRgZxoqvQtsgBM4Jn2Wntjebw7Sy5UIqZOaleqz1nZjeXX7bw2VQkqrgFaXF-RdaBoxJF46Us0hhkRSdIkQMMQmbOf692lo6Sjwvc2ZkziieWp1-am1uaezpamYnckZy0g8eQoZY_S6dgMpMtxcS9Jinf5c2qmMxvIwQdzx3hjt3lsedhre3mhfQvf7ypS-j5g1JVxhyxRaw01jF7tycWp4ecbuVccwwWati9M-qpQ254Mtx89v1Rt85Xh7U66lu12cJc0oktJ7nX766QlZSsx-qf1ri","expires_in":3599,"token_type":"Bearer"}' - headers: - Alt-Svc: - - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 - Content-Encoding: - - gzip - Content-Type: - - application/json; charset=UTF-8 - Date: - - Fri, 21 Mar 2025 19:32:45 GMT - Server: - - scaffolding on HTTPServer2 - Transfer-Encoding: - - chunked - Vary: - - Origin - - X-Origin - - Referer - X-Content-Type-Options: - - nosniff - X-Frame-Options: - - SAMEORIGIN - X-XSS-Protection: - - '0' - status: - code: 200 - message: OK -- request: - body: '{"contents":[{"role":"user","parts":[{"text":"Hey, what is up?"}]}],"generationConfig":{"candidate_count":1}}' - headers: - accept: - - '*/*' - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '109' - content-type: - - application/json - host: - - us-central1-aiplatform.googleapis.com - user-agent: - - litellm/1.63.12 - method: POST - uri: https://us-central1-aiplatform.googleapis.com/v1/projects/datadog-sandbox/locations/us-central1/publishers/google/models/gemini-pro:generateContent - response: - body: - string: !!binary | - H4sIAAAAAAAC/7VVXW/aMBR951d4edkLIMf5IOGlYtSCTBQQSbvSqUKGuMFbiFHstEUV/312AjRM - 6sNUlofo5n4c33MdH781ADBWJItZTCQVRhf8VB4A3sq3jvFM0kyqwNGlnDlPqfIYGx7T1Gi+B7Yk - l+8g1fNWs1WKpK8azRiyL2DMJdgUq3UT/CqEBGuSJSxLAC8kYBmQawpiljBJUvDC8zRughfCpM6Q - HCwpWNN0+1SkbTDkL4AsddmOF1fgx5rIrwIkvATLNJby5xWIqo3J7sqodbU/2Y8Ha3/kZDyxjIn1 - jBLBM912GE2mJ8aGIE9U7mZEN3XOu85aDVjShOe7kndvdrPo9yI8mMzmi6EyFuEU4/6wNkc9yZwv - yZKlTJZVYzwYBYPg2wh/mBaueK53BbahBVHHsi3vLFXQZ5of4MomQnyHZ0E0X3wEfqyoISPo+y70 - 7VPavvmvlK974wGeTW7DRX8yjvA4uiBx3zNt27Euz9uGyLMQtD7BW331wvDmwoShpdv6Dxtt+h3k - Ou4nCIf4/rY3Gs0X+H46CvrBBXkj3/aR6V3+BzehCTs2Qu+0D9bj6cyT52TEE92WPvEtVeO5jqlO - hoNcz3N8r5qZLiyLjEKQhN5QSZTEkpOQamKbrYz4b5r1eVFKrFstUhPks7CFDnHJlSaehw6jqKOK - a7UmS+u6VNNwpd3kOPsI39c3R+HXmzpqYqM2h79bvNBaFjpfrHH476qb5o7mglUynNCNEuaW2YYt - xbgFISoxjVVOVU8R25T3E4LIaUGrhczI9LsW6tpO24KO7ZsPVXpOxZZnggaxTreWnfjBn4ZYuP2N - H0xzu/Pd6wVGY9/4A9Z/lKYoBwAA - headers: - Alt-Svc: - - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 - Content-Encoding: - - gzip - Content-Type: - - application/json; charset=UTF-8 - Date: - - Fri, 21 Mar 2025 19:32:46 GMT - Server: - - scaffolding on HTTPServer2 - Transfer-Encoding: - - chunked - Vary: - - Origin - - X-Origin - - Referer - X-Content-Type-Options: - - nosniff - X-Frame-Options: - - SAMEORIGIN - X-XSS-Protection: - - '0' - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 0e08d5af1ca..143842b701a 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -181,8 +181,6 @@ async def test_atext_completion( ) def test_completion_integrations_enabled(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): - if stream: - pytest.skip("Streamed Open AI requests will lead to unfinished spans; therefore, skip them for now") with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): LLMObs.disable() diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json deleted file mode 100644 index 214afe91718..00000000000 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json +++ /dev/null @@ -1,27 +0,0 @@ -[[ - { - "name": "litellm.request", - "service": "tests.contrib.litellm", - "resource": "litellm.acompletion", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "67ddaa2400000000", - "language": "python", - "litellm.request.model": "gpt-3.5-turbo", - "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" - }, - "metrics": { - "_dd.measured": 1, - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 44834 - }, - "duration": 11473000, - "start": 1742580260689536000 - }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json deleted file mode 100644 index c0e47e63442..00000000000 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json +++ /dev/null @@ -1,27 +0,0 @@ -[[ - { - "name": "litellm.request", - "service": "tests.contrib.litellm", - "resource": "litellm.acompletion", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "67ddaa2400000000", - "language": "python", - "litellm.request.model": "gpt-3.5-turbo", - "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" - }, - "metrics": { - "_dd.measured": 1, - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 44834 - }, - "duration": 11410000, - "start": 1742580260717377000 - }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json deleted file mode 100644 index 2edd0a58339..00000000000 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json +++ /dev/null @@ -1,27 +0,0 @@ -[[ - { - "name": "litellm.request", - "service": "tests.contrib.litellm", - "resource": "litellm.acompletion", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "67ddaa2400000000", - "language": "python", - "litellm.request.model": "gpt-3.5-turbo", - "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" - }, - "metrics": { - "_dd.measured": 1, - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 44834 - }, - "duration": 10910000, - "start": 1742580260660336000 - }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json deleted file mode 100644 index 4823265c91e..00000000000 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json +++ /dev/null @@ -1,27 +0,0 @@ -[[ - { - "name": "litellm.request", - "service": "tests.contrib.litellm", - "resource": "litellm.atext_completion", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "67ddaa2400000000", - "language": "python", - "litellm.request.model": "gpt-3.5-turbo", - "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" - }, - "metrics": { - "_dd.measured": 1, - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 44834 - }, - "duration": 11645000, - "start": 1742580260892670000 - }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json deleted file mode 100644 index 82aa9e0797b..00000000000 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json +++ /dev/null @@ -1,27 +0,0 @@ -[[ - { - "name": "litellm.request", - "service": "tests.contrib.litellm", - "resource": "litellm.atext_completion", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "67ddaa2400000000", - "language": "python", - "litellm.request.model": "gpt-3.5-turbo", - "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" - }, - "metrics": { - "_dd.measured": 1, - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 44834 - }, - "duration": 11638000, - "start": 1742580260921802000 - }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json deleted file mode 100644 index 019e6861dd0..00000000000 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json +++ /dev/null @@ -1,27 +0,0 @@ -[[ - { - "name": "litellm.request", - "service": "tests.contrib.litellm", - "resource": "litellm.atext_completion", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "67ddaa2400000000", - "language": "python", - "litellm.request.model": "gpt-3.5-turbo", - "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" - }, - "metrics": { - "_dd.measured": 1, - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 44834 - }, - "duration": 10966000, - "start": 1742580260866220000 - }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json deleted file mode 100644 index 70b4b725406..00000000000 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json +++ /dev/null @@ -1,27 +0,0 @@ -[[ - { - "name": "litellm.request", - "service": "tests.contrib.litellm", - "resource": "litellm.completion", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "67ddaa2400000000", - "language": "python", - "litellm.request.model": "gpt-3.5-turbo", - "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" - }, - "metrics": { - "_dd.measured": 1, - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 44834 - }, - "duration": 64700000, - "start": 1742580260523466000 - }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json deleted file mode 100644 index 70100a1b6fe..00000000000 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json +++ /dev/null @@ -1,27 +0,0 @@ -[[ - { - "name": "litellm.request", - "service": "tests.contrib.litellm", - "resource": "litellm.completion", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "67ddaa2400000000", - "language": "python", - "litellm.request.model": "gpt-3.5-turbo", - "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" - }, - "metrics": { - "_dd.measured": 1, - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 44834 - }, - "duration": 6278000, - "start": 1742580260607014000 - }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json deleted file mode 100644 index 9f99831ec22..00000000000 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json +++ /dev/null @@ -1,27 +0,0 @@ -[[ - { - "name": "litellm.request", - "service": "tests.contrib.litellm", - "resource": "litellm.completion", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "67ddaa2400000000", - "language": "python", - "litellm.request.model": "gpt-3.5-turbo", - "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" - }, - "metrics": { - "_dd.measured": 1, - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 44834 - }, - "duration": 6885000, - "start": 1742580260495830000 - }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json deleted file mode 100644 index 3f4326ea8f5..00000000000 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json +++ /dev/null @@ -1,27 +0,0 @@ -[[ - { - "name": "litellm.request", - "service": "tests.contrib.litellm", - "resource": "litellm.completion", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "67ddbedc00000000", - "language": "python", - "litellm.request.model": "vertex_ai/gemini-pro", - "runtime-id": "0b4c7b69fe1b4d69a2cf9a6cb3449ea9" - }, - "metrics": { - "_dd.measured": 1, - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 4385 - }, - "duration": 1211993000, - "start": 1742585564858520000 - }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json deleted file mode 100644 index 852a1cdfd4a..00000000000 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json +++ /dev/null @@ -1,27 +0,0 @@ -[[ - { - "name": "litellm.request", - "service": "tests.contrib.litellm", - "resource": "litellm.completion", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "67ddbede00000000", - "language": "python", - "litellm.request.model": "anthropic/claude-3-5-sonnet-20240620", - "runtime-id": "0b4c7b69fe1b4d69a2cf9a6cb3449ea9" - }, - "metrics": { - "_dd.measured": 1, - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 4385 - }, - "duration": 11249000, - "start": 1742585566097702000 - }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json deleted file mode 100644 index 3639ac22839..00000000000 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json +++ /dev/null @@ -1,27 +0,0 @@ -[[ - { - "name": "litellm.request", - "service": "tests.contrib.litellm", - "resource": "litellm.text_completion", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "67ddaa2400000000", - "language": "python", - "litellm.request.model": "gpt-3.5-turbo", - "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" - }, - "metrics": { - "_dd.measured": 1, - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 44834 - }, - "duration": 7357000, - "start": 1742580260791261000 - }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json deleted file mode 100644 index cfa5ca52417..00000000000 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json +++ /dev/null @@ -1,27 +0,0 @@ -[[ - { - "name": "litellm.request", - "service": "tests.contrib.litellm", - "resource": "litellm.text_completion", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "67ddaa2400000000", - "language": "python", - "litellm.request.model": "gpt-3.5-turbo", - "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" - }, - "metrics": { - "_dd.measured": 1, - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 44834 - }, - "duration": 6527000, - "start": 1742580260815275000 - }]] diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json deleted file mode 100644 index e68a5d04c10..00000000000 --- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json +++ /dev/null @@ -1,27 +0,0 @@ -[[ - { - "name": "litellm.request", - "service": "tests.contrib.litellm", - "resource": "litellm.text_completion", - "trace_id": 0, - "span_id": 1, - "parent_id": 0, - "type": "", - "error": 0, - "meta": { - "_dd.p.dm": "-0", - "_dd.p.tid": "67ddaa2400000000", - "language": "python", - "litellm.request.model": "gpt-3.5-turbo", - "runtime-id": "08aa25bdd7e14886aec0589c876d2f70" - }, - "metrics": { - "_dd.measured": 1, - "_dd.top_level": 1, - "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 1, - "process_id": 44834 - }, - "duration": 5879000, - "start": 1742580260771604000 - }]] From d4185d8cab2d16ff7d9ed8341f026f9259f058ff Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 8 Apr 2025 11:35:36 -0400 Subject: [PATCH 34/61] fix merge conflict bug for openai span linking --- ddtrace/llmobs/_integrations/openai.py | 96 -------------------------- ddtrace/llmobs/_integrations/utils.py | 30 ++++++-- 2 files changed, 26 insertions(+), 100 deletions(-) diff --git a/ddtrace/llmobs/_integrations/openai.py b/ddtrace/llmobs/_integrations/openai.py index aa07ff0aff6..c913282a2e5 100644 --- a/ddtrace/llmobs/_integrations/openai.py +++ b/ddtrace/llmobs/_integrations/openai.py @@ -142,102 +142,6 @@ def _llmobs_set_tags( {SPAN_KIND: span_kind, MODEL_NAME: model_name or "", MODEL_PROVIDER: model_provider, METRICS: metrics} ) - @staticmethod - def _llmobs_set_meta_tags_from_completion(span: Span, kwargs: Dict[str, Any], completions: Any) -> None: - """Extract prompt/response tags from a completion and set them as temporary "_ml_obs.meta.*" tags.""" - prompt = kwargs.get("prompt", "") - if isinstance(prompt, str): - prompt = [prompt] - parameters = {k: v for k, v in kwargs.items() if k not in ("model", "prompt")} - output_messages = [{"content": ""}] - if not span.error and completions: - choices = getattr(completions, "choices", completions) - output_messages = [{"content": _get_attr(choice, "text", "")} for choice in choices] - span._set_ctx_items( - { - INPUT_MESSAGES: [{"content": str(p)} for p in prompt], - METADATA: parameters, - OUTPUT_MESSAGES: output_messages, - } - ) - - @staticmethod - def _llmobs_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: Optional[Any]) -> None: - """Extract prompt/response tags from a chat completion and set them as temporary "_ml_obs.meta.*" tags.""" - input_messages = [] - for m in kwargs.get("messages", []): - tool_call_id = m.get("tool_call_id") - if tool_call_id: - core.dispatch(DISPATCH_ON_TOOL_CALL_OUTPUT_USED, (tool_call_id, span)) - input_messages.append({"content": str(_get_attr(m, "content", "")), "role": str(_get_attr(m, "role", ""))}) - parameters = {k: v for k, v in kwargs.items() if k not in ("model", "messages", "tools", "functions")} - span._set_ctx_items({INPUT_MESSAGES: input_messages, METADATA: parameters}) - - if span.error or not messages: - span._set_ctx_item(OUTPUT_MESSAGES, [{"content": ""}]) - return - if isinstance(messages, list): # streamed response - output_messages = [] - for streamed_message in messages: - message = {"content": streamed_message["content"], "role": streamed_message["role"]} - tool_calls = streamed_message.get("tool_calls", []) - if tool_calls: - message["tool_calls"] = [ - { - "name": tool_call.get("name", ""), - "arguments": json.loads(tool_call.get("arguments", "")), - "tool_id": tool_call.get("tool_id", ""), - "type": tool_call.get("type", ""), - } - for tool_call in tool_calls - ] - output_messages.append(message) - span._set_ctx_item(OUTPUT_MESSAGES, output_messages) - return - choices = _get_attr(messages, "choices", []) - output_messages = [] - for idx, choice in enumerate(choices): - tool_calls_info = [] - choice_message = _get_attr(choice, "message", {}) - role = _get_attr(choice_message, "role", "") - content = _get_attr(choice_message, "content", "") or "" - function_call = _get_attr(choice_message, "function_call", None) - if function_call: - function_name = _get_attr(function_call, "name", "") - arguments = json.loads(_get_attr(function_call, "arguments", "")) - function_call_info = {"name": function_name, "arguments": arguments} - output_messages.append({"content": content, "role": role, "tool_calls": [function_call_info]}) - continue - tool_calls = _get_attr(choice_message, "tool_calls", []) or [] - for tool_call in tool_calls: - tool_args = getattr(tool_call.function, "arguments", "") - tool_name = getattr(tool_call.function, "name", "") - tool_id = getattr(tool_call, "id", "") - tool_call_info = { - "name": tool_name, - "arguments": json.loads(tool_args), - "tool_id": tool_id, - "type": "function", - } - tool_calls_info.append(tool_call_info) - core.dispatch( - DISPATCH_ON_LLM_TOOL_CHOICE, - ( - tool_id, - tool_name, - tool_args, - { - "trace_id": format_trace_id(span.trace_id), - "span_id": str(span.span_id), - }, - ), - ) - if tool_calls_info: - output_messages.append({"content": content, "role": role, "tool_calls": tool_calls_info}) - continue - output_messages.append({"content": content, "role": role}) - span._set_ctx_item(OUTPUT_MESSAGES, output_messages) - @staticmethod def _llmobs_set_meta_tags_from_embedding(span: Span, kwargs: Dict[str, Any], resp: Any) -> None: """Extract prompt tags from an embedding and set them as temporary "_ml_obs.meta.*" tags.""" diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index 0f1bbb94239..06367d2e677 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -10,6 +10,10 @@ from typing import Tuple from typing import Union from urllib.parse import urlparse +from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE +from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED +from ddtrace.internal import core +from ddtrace.internal.utils.formats import format_trace_id from ddtrace.internal.logger import get_logger from ddtrace.llmobs._constants import OAI_HANDOFF_TOOL_ARG @@ -310,6 +314,9 @@ def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: """Extract prompt/response tags from a chat completion and set them as temporary "_ml_obs.meta.*" tags.""" input_messages = [] for m in kwargs.get("messages", []): + tool_call_id = m.get("tool_call_id") + if tool_call_id: + core.dispatch(DISPATCH_ON_TOOL_CALL_OUTPUT_USED, (tool_call_id, span)) input_messages.append({"content": str(_get_attr(m, "content", "")), "role": str(_get_attr(m, "role", ""))}) parameters = { k: v @@ -358,13 +365,28 @@ def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: continue tool_calls = _get_attr(choice_message, "tool_calls", []) or [] for tool_call in tool_calls: + tool_args = getattr(tool_call.function, "arguments", "") + tool_name = getattr(tool_call.function, "name", "") + tool_id = getattr(tool_call, "id", "") tool_call_info = { - "name": getattr(tool_call.function, "name", ""), - "arguments": json.loads(getattr(tool_call.function, "arguments", "")), - "tool_id": getattr(tool_call, "id", ""), - "type": getattr(tool_call, "type", ""), + "name": tool_name, + "arguments": json.loads(tool_args), + "tool_id": tool_id, + "type": "function", } tool_calls_info.append(tool_call_info) + core.dispatch( + DISPATCH_ON_LLM_TOOL_CHOICE, + ( + tool_id, + tool_name, + tool_args, + { + "trace_id": format_trace_id(span.trace_id), + "span_id": str(span.span_id), + }, + ), + ) if tool_calls_info: output_messages.append({"content": content, "role": role, "tool_calls": tool_calls_info}) continue From 48e57c3138cb6aa943bf5564ddc1b2f0c879a4c0 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 8 Apr 2025 12:56:24 -0400 Subject: [PATCH 35/61] fix integrations enabled test --- tests/contrib/litellm/test_litellm_llmobs.py | 49 +++++++++++++------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 143842b701a..c287caaa32b 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -201,35 +201,50 @@ def test_completion_integrations_enabled(self, litellm, request_vcr, mock_llmobs n=n, stream_options={"include_usage": include_usage}, ) - LLMObs.disable() if stream: output_messages, token_metrics = consume_stream(resp, n) else: output_messages, token_metrics = parse_response(resp) + + LLMObs.disable() - openai_span = mock_tracer.pop_traces()[0][1] - # remove parent span since LiteLLM request span will not be submitted to LLMObs - openai_span._parent = None + spans = mock_tracer.pop_traces() + # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request + if stream: + span = spans[0][0] + metadata = { + "stream": stream, + "n": n, + "stream_options": { + "include_usage": include_usage + } + } + model_name = "gpt-3.5-turbo" + else: + span = spans[0][1] + # remove parent span since LiteLLM request span will not be submitted to LLMObs + span._parent = None + metadata = { + "n": n, + "extra_body": {}, + "timeout": 600.0, + "extra_headers": { + "X-Stainless-Raw-Response": "true" + } + } + model_name = "gpt-3.5-turbo-0125" assert mock_llmobs_writer.enqueue.call_count == 1 - mock_llmobs_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - openai_span, - model_name="gpt-3.5-turbo-0125", + expected_event = _expected_llmobs_llm_span_event( + span, + model_name=model_name, model_provider="openai", input_messages=messages, output_messages=output_messages, - metadata={ - "n": n, - "extra_body": {}, - "timeout": 600.0, - "extra_headers": { - "X-Stainless-Raw-Response": "true" - } - }, + metadata=metadata, token_metrics=token_metrics, tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - ) + mock_llmobs_writer.enqueue.assert_called_with(expected_event) def test_completion_proxy(self, litellm, request_vcr_include_localhost, mock_llmobs_writer, mock_tracer, stream, n, include_usage): with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, include_usage, proxy=True)): From 252f0927cfb592bf01578239510fbd18cf50f96b Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 8 Apr 2025 13:34:12 -0400 Subject: [PATCH 36/61] add next and anext methods to traced stream classes --- ddtrace/contrib/internal/litellm/patch.py | 2 +- ddtrace/contrib/internal/litellm/utils.py | 33 +++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 9aa7b77d89c..823e8f12d5b 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -119,7 +119,7 @@ def traced_get_llm_provider(litellm, pin, func, instance, args, kwargs): requested_model = get_argument_value(args, kwargs, 0, "model", None) integration = litellm._datadog_integration model, custom_llm_provider, dynamic_api_key, api_base = func(*args, **kwargs) - # Store the provider information in the integration + # store the model name and provider in the integration integration._model_map[requested_model] = (model, custom_llm_provider) return model, custom_llm_provider, dynamic_api_key, api_base diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index 9f813458e57..f4d234b8ae9 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -49,6 +49,23 @@ def __iter__(self): ) self._dd_span.finish() + def __next__(self): + try: + chunk = self._generator.__next__() + _loop_handler(chunk, self._streamed_chunks) + return chunk + except StopIteration: + _process_finished_stream( + self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion + ) + self._dd_span.finish() + raise + except Exception: + self._dd_span.set_exc_info(*sys.exc_info()) + self._dd_span.finish() + raise + + class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream): async def __aenter__(self): @@ -74,6 +91,22 @@ async def __aiter__(self): self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion ) self._dd_span.finish() + + async def __anext__(self): + try: + chunk = await self._generator.__anext__() + _loop_handler(chunk, self._streamed_chunks) + return chunk + except StopAsyncIteration: + _process_finished_stream( + self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion + ) + self._dd_span.finish() + raise + except Exception: + self._dd_span.set_exc_info(*sys.exc_info()) + self._dd_span.finish() + raise def _loop_handler(chunk, streamed_chunks): From 07685a83df07c05c7b3dc34f70d8d6a6e0a97718 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 8 Apr 2025 13:55:22 -0400 Subject: [PATCH 37/61] remove unnecessary config stuff --- ddtrace/llmobs/_integrations/litellm.py | 3 ++- tests/contrib/litellm/conftest.py | 25 +++++++------------- tests/contrib/litellm/test_litellm_llmobs.py | 4 ---- 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index 5f0a1c93f80..c32a4aa937b 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -44,9 +44,10 @@ def _llmobs_set_tags( operation: str = "", ) -> None: model_name = span.get_tag("litellm.request.model") + # get resolved model name and provider model_name, model_provider = self._model_map.get(model_name, (model_name, "")) - # response format will match Open AI + # use Open AI helpers since response format will match Open AI if operation == "completion": openai_set_meta_tags_from_completion(span, kwargs, response) else: diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index 8b8e615d93d..205d0a65bd8 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -6,7 +6,6 @@ from ddtrace.contrib.internal.litellm.patch import unpatch from tests.utils import DummyTracer from tests.utils import DummyWriter -from tests.utils import override_config from tests.utils import override_global_config from tests.contrib.litellm.utils import get_request_vcr from ddtrace.llmobs import LLMObs @@ -21,11 +20,6 @@ def ddtrace_global_config(): return {} -@pytest.fixture -def ddtrace_config_litellm(): - return {} - - @pytest.fixture() def mock_llmobs_writer(): patcher = mock.patch("ddtrace.llmobs._llmobs.LLMObsSpanWriter") @@ -39,19 +33,18 @@ def mock_llmobs_writer(): @pytest.fixture -def litellm(ddtrace_global_config, ddtrace_config_litellm, monkeypatch): +def litellm(ddtrace_global_config, monkeypatch): global_config = default_global_config() global_config.update(ddtrace_global_config) with override_global_config(global_config): - with override_config("litellm", ddtrace_config_litellm): - monkeypatch.setenv("OPENAI_API_KEY", "") - monkeypatch.setenv("ANTHROPIC_API_KEY", "") - monkeypatch.setenv("COHERE_API_KEY", "") - patch() - import litellm - - yield litellm - unpatch() + monkeypatch.setenv("OPENAI_API_KEY", "") + monkeypatch.setenv("ANTHROPIC_API_KEY", "") + monkeypatch.setenv("COHERE_API_KEY", "") + patch() + import litellm + + yield litellm + unpatch() @pytest.fixture diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index c287caaa32b..8e39f5f3dba 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -262,7 +262,3 @@ def test_completion_proxy(self, litellm, request_vcr_include_localhost, mock_llm # client side requests made to the proxy are not submitted to LLMObs assert mock_llmobs_writer.enqueue.call_count == 0 - - - - \ No newline at end of file From c273b52525d56fdeb114a764c560518e268c9fe1 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 8 Apr 2025 13:59:10 -0400 Subject: [PATCH 38/61] add release note --- releasenotes/notes/feat-llmobs-litellm-7fc73fd28ab88fac.yaml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 releasenotes/notes/feat-llmobs-litellm-7fc73fd28ab88fac.yaml diff --git a/releasenotes/notes/feat-llmobs-litellm-7fc73fd28ab88fac.yaml b/releasenotes/notes/feat-llmobs-litellm-7fc73fd28ab88fac.yaml new file mode 100644 index 00000000000..5c88002a4a4 --- /dev/null +++ b/releasenotes/notes/feat-llmobs-litellm-7fc73fd28ab88fac.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + LLM Observability: Adds support to automatically submit LiteLLM SDK requests to LLM Observability. \ No newline at end of file From 5ee0f038a5c393735690267c222486d34530a3b4 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 8 Apr 2025 15:29:07 -0400 Subject: [PATCH 39/61] fix errored out streamed requests not setting llmobs tags properly --- ddtrace/contrib/internal/litellm/patch.py | 4 +-- ddtrace/contrib/internal/litellm/utils.py | 38 ++++++++++------------- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 823e8f12d5b..e5c2ce52296 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -73,7 +73,7 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion raise finally: # streamed spans will be finished separately once the stream generator is exhausted - if span.error or not stream: + if not stream: if integration.is_pc_sampled_llmobs(span): integration.llmobs_set_tags( span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat" @@ -106,7 +106,7 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com raise finally: # streamed spans will be finished separately once the stream generator is exhausted - if span.error or not stream: + if not stream: if integration.is_pc_sampled_llmobs(span): integration.llmobs_set_tags( span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat" diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index f4d234b8ae9..e33e3bf5726 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -33,20 +33,17 @@ def __exit__(self, exc_type, exc_val, exc_tb): self._generator.__exit__(exc_type, exc_val, exc_tb) def __iter__(self): - exception_raised = False try: for chunk in self._generator: yield chunk _loop_handler(chunk, self._streamed_chunks) except Exception: self._dd_span.set_exc_info(*sys.exc_info()) - exception_raised = True raise finally: - if not exception_raised: - _process_finished_stream( - self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion - ) + _process_finished_stream( + self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion + ) self._dd_span.finish() def __next__(self): @@ -55,15 +52,15 @@ def __next__(self): _loop_handler(chunk, self._streamed_chunks) return chunk except StopIteration: - _process_finished_stream( - self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion - ) - self._dd_span.finish() raise except Exception: self._dd_span.set_exc_info(*sys.exc_info()) - self._dd_span.finish() raise + finally: + _process_finished_stream( + self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion + ) + self._dd_span.finish() @@ -76,20 +73,17 @@ async def __aexit__(self, exc_type, exc_val, exc_tb): await self._generator.__aexit__(exc_type, exc_val, exc_tb) async def __aiter__(self): - exception_raised = False try: async for chunk in self._generator: yield chunk _loop_handler(chunk, self._streamed_chunks) except Exception: self._dd_span.set_exc_info(*sys.exc_info()) - exception_raised = True raise finally: - if not exception_raised: - _process_finished_stream( - self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion - ) + _process_finished_stream( + self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion + ) self._dd_span.finish() async def __anext__(self): @@ -98,15 +92,15 @@ async def __anext__(self): _loop_handler(chunk, self._streamed_chunks) return chunk except StopAsyncIteration: - _process_finished_stream( - self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion - ) - self._dd_span.finish() raise except Exception: self._dd_span.set_exc_info(*sys.exc_info()) - self._dd_span.finish() raise + finally: + _process_finished_stream( + self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion + ) + self._dd_span.finish() def _loop_handler(chunk, streamed_chunks): From a5cdfa5e1bf38f9e09c459d8897759b7cccd06ca Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 8 Apr 2025 15:43:16 -0400 Subject: [PATCH 40/61] run black --- ddtrace/contrib/internal/litellm/utils.py | 3 +- ddtrace/llmobs/_integrations/litellm.py | 6 +- ddtrace/llmobs/_integrations/utils.py | 2 + tests/contrib/litellm/conftest.py | 1 + tests/contrib/litellm/test_litellm.py | 4 +- tests/contrib/litellm/test_litellm_llmobs.py | 69 ++++++++++++-------- tests/contrib/litellm/utils.py | 36 ++++++---- 7 files changed, 77 insertions(+), 44 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index e33e3bf5726..f9c4463190d 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -61,7 +61,6 @@ def __next__(self): self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion ) self._dd_span.finish() - class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream): @@ -85,7 +84,7 @@ async def __aiter__(self): self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion ) self._dd_span.finish() - + async def __anext__(self): try: chunk = await self._generator.__anext__() diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index c32a4aa937b..0eea298f87d 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -85,6 +85,10 @@ def should_submit_to_llmobs(self, model: Optional[str] = None, kwargs: Dict[str, stream = kwargs.get("stream", False) model_lower = model.lower() if model else "" # model provider is unknown until request completes; therefore, this is a best effort attempt to check if model provider is Open AI or Azure - if ("gpt" in model_lower or "openai" in model_lower or "azure" in model_lower) and not stream and "openai" in ddtrace._monkey._get_patched_modules(): + if ( + ("gpt" in model_lower or "openai" in model_lower or "azure" in model_lower) + and not stream + and "openai" in ddtrace._monkey._get_patched_modules() + ): return False return True diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index 06367d2e677..c1d600a9713 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -468,6 +468,8 @@ def openai_construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> message.pop("tool_calls", None) message["content"] = message["content"].strip() return message + + class OaiSpanAdapter: """Adapter for Oai Agents SDK Span objects that the llmobs integration code will use. This is to consolidate the code where we access oai library types which provides a clear starting point for diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index 205d0a65bd8..2e846ddd5e7 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -69,6 +69,7 @@ def mock_tracer(litellm, ddtrace_global_config): def request_vcr(): return get_request_vcr() + @pytest.fixture def request_vcr_include_localhost(): return get_request_vcr(ignore_localhost=False) diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py index 43a9a4d4036..5ddee4173e6 100644 --- a/tests/contrib/litellm/test_litellm.py +++ b/tests/contrib/litellm/test_litellm.py @@ -44,7 +44,6 @@ def test_litellm_completion(litellm, snapshot_context, request_vcr, stream, n): pass - @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) async def test_litellm_acompletion(litellm, snapshot_context, request_vcr, stream, n): with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]): @@ -59,7 +58,8 @@ async def test_litellm_acompletion(litellm, snapshot_context, request_vcr, strea if stream: async for _ in resp: pass - + + @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) def test_litellm_text_completion(litellm, snapshot_context, request_vcr, stream, n): with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]): diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 8e39f5f3dba..c68fd103223 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -8,7 +8,15 @@ @pytest.mark.parametrize( - "ddtrace_global_config", [dict(_llmobs_enabled=True, _llmobs_sample_rate=1.0, _llmobs_ml_app="", _dd_api_key="")] + "ddtrace_global_config", + [ + dict( + _llmobs_enabled=True, + _llmobs_sample_rate=1.0, + _llmobs_ml_app="", + _dd_api_key="", + ) + ], ) @pytest.mark.parametrize( "stream,n,include_usage", @@ -54,9 +62,13 @@ def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, ) ) - def test_completion_with_tools(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): + def test_completion_with_tools( + self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage + ): if stream and n > 1: - pytest.skip("Streamed responses with multiple completions and tool calls are not supported: see open issue https://github.com/BerriAI/litellm/issues/8977") + pytest.skip( + "Streamed responses with multiple completions and tool calls are not supported: see open issue https://github.com/BerriAI/litellm/issues/8977" + ) with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage, tools=True)): messages = [{"content": "What is the weather like in San Francisco, CA?", "role": "user"}] resp = litellm.completion( @@ -72,7 +84,7 @@ def test_completion_with_tools(self, litellm, request_vcr, mock_llmobs_writer, m output_messages, token_metrics = consume_stream(resp, n) else: output_messages, token_metrics = parse_response(resp) - + span = mock_tracer.pop_traces()[0][0] assert mock_llmobs_writer.enqueue.call_count == 1 mock_llmobs_writer.enqueue.assert_called_with( @@ -82,7 +94,12 @@ def test_completion_with_tools(self, litellm, request_vcr, mock_llmobs_writer, m model_provider="openai", input_messages=messages, output_messages=output_messages, - metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}, "tool_choice": "auto"}, + metadata={ + "stream": stream, + "n": n, + "stream_options": {"include_usage": include_usage}, + "tool_choice": "auto", + }, token_metrics=token_metrics, tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) @@ -180,7 +197,9 @@ async def test_atext_completion( ) ) - def test_completion_integrations_enabled(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): + def test_completion_integrations_enabled( + self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage + ): with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): LLMObs.disable() @@ -205,20 +224,14 @@ def test_completion_integrations_enabled(self, litellm, request_vcr, mock_llmobs output_messages, token_metrics = consume_stream(resp, n) else: output_messages, token_metrics = parse_response(resp) - + LLMObs.disable() spans = mock_tracer.pop_traces() # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request if stream: span = spans[0][0] - metadata = { - "stream": stream, - "n": n, - "stream_options": { - "include_usage": include_usage - } - } + metadata = {"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}} model_name = "gpt-3.5-turbo" else: span = spans[0][1] @@ -228,25 +241,25 @@ def test_completion_integrations_enabled(self, litellm, request_vcr, mock_llmobs "n": n, "extra_body": {}, "timeout": 600.0, - "extra_headers": { - "X-Stainless-Raw-Response": "true" - } + "extra_headers": {"X-Stainless-Raw-Response": "true"}, } model_name = "gpt-3.5-turbo-0125" assert mock_llmobs_writer.enqueue.call_count == 1 expected_event = _expected_llmobs_llm_span_event( - span, - model_name=model_name, - model_provider="openai", - input_messages=messages, - output_messages=output_messages, - metadata=metadata, - token_metrics=token_metrics, - tags={"ml_app": "", "service": "tests.contrib.litellm"}, - ) + span, + model_name=model_name, + model_provider="openai", + input_messages=messages, + output_messages=output_messages, + metadata=metadata, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) mock_llmobs_writer.enqueue.assert_called_with(expected_event) - - def test_completion_proxy(self, litellm, request_vcr_include_localhost, mock_llmobs_writer, mock_tracer, stream, n, include_usage): + + def test_completion_proxy( + self, litellm, request_vcr_include_localhost, mock_llmobs_writer, mock_tracer, stream, n, include_usage + ): with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, include_usage, proxy=True)): messages = [{"content": "Hey, what is up?", "role": "user"}] resp = litellm.completion( diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py index f41e1098a50..a9b6309770b 100644 --- a/tests/contrib/litellm/utils.py +++ b/tests/contrib/litellm/utils.py @@ -39,7 +39,9 @@ def consume_stream(resp, n, is_completion=False): token_metrics = {} role = None for chunk in resp: - output_messages, token_metrics, role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion) + output_messages, token_metrics, role = extract_output_from_chunk( + chunk, output_messages, token_metrics, role, is_completion + ) output_messages = parse_tool_calls(output_messages) return output_messages, token_metrics @@ -49,7 +51,9 @@ async def async_consume_stream(resp, n, is_completion=False): token_metrics = {} role = None async for chunk in resp: - output_messages, token_metrics, role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion) + output_messages, token_metrics, role = extract_output_from_chunk( + chunk, output_messages, token_metrics, role, is_completion + ) output_messages = parse_tool_calls(output_messages) return output_messages, token_metrics @@ -68,9 +72,16 @@ def extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_co while tool_call.index >= len(output_messages[choice.index]["tool_calls"]): output_messages[choice.index]["tool_calls"].append({}) arguments = output_messages[choice.index]["tool_calls"][tool_call.index].get("arguments", "") - output_messages[choice.index]["tool_calls"][tool_call.index]["name"] = output_messages[choice.index]["tool_calls"][tool_call.index].get("name", None) or tool_call.function.name - output_messages[choice.index]["tool_calls"][tool_call.index]["arguments"] = arguments + tool_call.function.arguments - output_messages[choice.index]["tool_calls"][tool_call.index]["tool_id"] = output_messages[choice.index]["tool_calls"][tool_call.index].get("tool_id", None) or tool_call.id + output_messages[choice.index]["tool_calls"][tool_call.index]["name"] = ( + output_messages[choice.index]["tool_calls"][tool_call.index].get("name", None) + or tool_call.function.name + ) + output_messages[choice.index]["tool_calls"][tool_call.index]["arguments"] = ( + arguments + tool_call.function.arguments + ) + output_messages[choice.index]["tool_calls"][tool_call.index]["tool_id"] = ( + output_messages[choice.index]["tool_calls"][tool_call.index].get("tool_id", None) or tool_call.id + ) output_messages[choice.index]["tool_calls"][tool_call.index]["type"] = tool_call.type if "usage" in chunk and chunk["usage"]: @@ -108,12 +119,14 @@ def parse_response(resp, is_completion=False): if tool_calls: message["tool_calls"] = [] for tool_call in tool_calls: - message["tool_calls"].append({ - "name": tool_call["function"]["name"], - "arguments": json.loads(tool_call["function"]["arguments"]), - "tool_id": tool_call["id"], - "type": tool_call["type"] - }) + message["tool_calls"].append( + { + "name": tool_call["function"]["name"], + "arguments": json.loads(tool_call["function"]["arguments"]), + "tool_id": tool_call["id"], + "type": tool_call["type"], + } + ) output_messages.append(message) token_metrics = { "input_tokens": resp.usage.prompt_tokens, @@ -122,6 +135,7 @@ def parse_response(resp, is_completion=False): } return output_messages, token_metrics + tools = [ { "type": "function", From 1474258b57696b43941deb321e3ae718165ada80 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 10 Apr 2025 11:41:15 -0400 Subject: [PATCH 41/61] style fixes --- ddtrace/contrib/internal/litellm/patch.py | 12 ++------ ddtrace/contrib/internal/litellm/utils.py | 3 +- ddtrace/contrib/internal/openai/utils.py | 6 ++-- ddtrace/llmobs/_integrations/litellm.py | 29 +++++++++----------- ddtrace/llmobs/_integrations/openai.py | 8 ++---- tests/contrib/litellm/conftest.py | 1 - tests/contrib/litellm/test_litellm_llmobs.py | 6 +++- 7 files changed, 27 insertions(+), 38 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index e5c2ce52296..8275fc0d049 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -14,13 +14,7 @@ from ddtrace.internal.utils import get_argument_value -config._add( - "litellm", - { - "span_prompt_completion_sample_rate": float(os.getenv("DD_LITELLM_SPAN_PROMPT_COMPLETION_SAMPLE_RATE", 1.0)), - "span_char_limit": int(os.getenv("DD_LITELLM_SPAN_CHAR_LIMIT", 128)), - }, -) +config._add("litellm", {}) def get_version() -> str: @@ -66,7 +60,7 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion try: resp = func(*args, **kwargs) if stream: - return TracedLiteLLMStream(resp, integration, span, args, kwargs, is_completion) + return TracedLiteLLMStream(resp, integration, span, kwargs, is_completion) return resp except Exception: span.set_exc_info(*sys.exc_info()) @@ -99,7 +93,7 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com try: resp = await func(*args, **kwargs) if stream: - return TracedLiteLLMAsyncStream(resp, integration, span, args, kwargs, is_completion) + return TracedLiteLLMAsyncStream(resp, integration, span, kwargs, is_completion) return resp except Exception: span.set_exc_info(*sys.exc_info()) diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index f9c4463190d..1f36a8331f2 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -13,12 +13,11 @@ class BaseTracedLiteLLMStream: - def __init__(self, generator, integration, span, args, kwargs, is_completion=False): + def __init__(self, generator, integration, span, kwargs, is_completion=False): n = kwargs.get("n", 1) or 1 self._generator = generator self._dd_integration = integration self._dd_span = span - self._args = args self._kwargs = kwargs self._streamed_chunks = [[] for _ in range(n)] self._is_completion = is_completion diff --git a/ddtrace/contrib/internal/openai/utils.py b/ddtrace/contrib/internal/openai/utils.py index ddf0eb37b49..58d3075cbbc 100644 --- a/ddtrace/contrib/internal/openai/utils.py +++ b/ddtrace/contrib/internal/openai/utils.py @@ -6,10 +6,8 @@ from typing import Generator from typing import List -from ddtrace.llmobs._integrations.utils import ( - openai_construct_completion_from_streamed_chunks, - openai_construct_message_from_streamed_chunks, -) +from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks +from ddtrace.llmobs._integrations.utils import openai_construct_message_from_streamed_chunks import wrapt from ddtrace.internal.logger import get_logger diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index 0eea298f87d..e262dca6d3b 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -2,21 +2,19 @@ from typing import Dict from typing import List from typing import Optional +from typing import Tuple import ddtrace -from ddtrace.llmobs._constants import ( - INPUT_TOKENS_METRIC_KEY, - METRICS, - OUTPUT_TOKENS_METRIC_KEY, - TOTAL_TOKENS_METRIC_KEY, -) +from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY +from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY +from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY +from ddtrace.llmobs._constants import METRICS from ddtrace.llmobs._constants import MODEL_NAME from ddtrace.llmobs._constants import MODEL_PROVIDER from ddtrace.llmobs._constants import SPAN_KIND -from ddtrace.llmobs._integrations.utils import ( - openai_set_meta_tags_from_chat, - openai_set_meta_tags_from_completion, -) +from ddtrace.llmobs._integrations.openai import openai_set_meta_tags_from_chat +from ddtrace.llmobs._integrations.openai import openai_set_meta_tags_from_completion +from ddtrace.llmobs._llmobs import LLMObs from ddtrace.llmobs._utils import _get_attr from ddtrace.trace import Span from ddtrace.llmobs._integrations.base import BaseLLMIntegration @@ -25,7 +23,7 @@ class LiteLLMIntegration(BaseLLMIntegration): _integration_name = "litellm" # maps requested model name to parsed model name and provider - _model_map = {} + _model_map: Dict[str, Tuple[str, str]] = {} def _set_base_span_tags( self, span: Span, model: Optional[str] = None, host: Optional[str] = None, **kwargs: Dict[str, Any] @@ -44,7 +42,6 @@ def _llmobs_set_tags( operation: str = "", ) -> None: model_name = span.get_tag("litellm.request.model") - # get resolved model name and provider model_name, model_provider = self._model_map.get(model_name, (model_name, "")) # use Open AI helpers since response format will match Open AI @@ -76,8 +73,8 @@ def _extract_llmobs_metrics(resp: Any) -> Dict[str, Any]: def should_submit_to_llmobs(self, model: Optional[str] = None, kwargs: Dict[str, Any] = None) -> bool: """ Span should be NOT submitted to LLMObs if: - - base_url is not None - - model provider is Open AI or Azure AND request is not being streamed AND Open AI integration is enabled + - base_url is not None: is a proxy request and we will capture the LLM request downstream + - non-streamed request and model provider is OpenAI/AzureOpenAI and the OpenAI integration is enabled: this request will be captured in the OpenAI integration instead """ base_url = kwargs.get("api_base", None) if base_url is not None: @@ -86,9 +83,9 @@ def should_submit_to_llmobs(self, model: Optional[str] = None, kwargs: Dict[str, model_lower = model.lower() if model else "" # model provider is unknown until request completes; therefore, this is a best effort attempt to check if model provider is Open AI or Azure if ( - ("gpt" in model_lower or "openai" in model_lower or "azure" in model_lower) + any(prefix in model_lower for prefix in ("gpt", "openai", "azure")) and not stream - and "openai" in ddtrace._monkey._get_patched_modules() + and LLMObs._integration_is_enabled("openai") ): return False return True diff --git a/ddtrace/llmobs/_integrations/openai.py b/ddtrace/llmobs/_integrations/openai.py index c913282a2e5..87e6a100e9d 100644 --- a/ddtrace/llmobs/_integrations/openai.py +++ b/ddtrace/llmobs/_integrations/openai.py @@ -24,11 +24,9 @@ from ddtrace.llmobs._constants import SPAN_KIND from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY from ddtrace.llmobs._integrations.base import BaseLLMIntegration -from ddtrace.llmobs._integrations.utils import ( - get_llmobs_metrics_tags, - openai_set_meta_tags_from_chat, - openai_set_meta_tags_from_completion, -) +from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags +from ddtrace.llmobs._integrations.utils import openai_set_meta_tags_from_chat +from ddtrace.llmobs._integrations.utils import openai_set_meta_tags_from_completion from ddtrace.llmobs._integrations.utils import is_openai_default_base_url from ddtrace.llmobs._utils import _get_attr from ddtrace.llmobs.utils import Document diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index 2e846ddd5e7..3d4d300acd3 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -52,7 +52,6 @@ def mock_tracer(litellm, ddtrace_global_config): pin = Pin.get_from(litellm) mock_tracer = DummyTracer(writer=DummyWriter(trace_flush_enabled=False)) pin._override(litellm, tracer=mock_tracer) - pin.tracer.configure() if ddtrace_global_config.get("_llmobs_enabled", False): # Have to disable and re-enable LLMObs to use the mock tracer. diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index c68fd103223..baac526c941 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -2,7 +2,11 @@ from ddtrace.llmobs._llmobs import LLMObs import pytest -from tests.contrib.litellm.utils import async_consume_stream, get_cassette_name, consume_stream, parse_response, tools +from tests.contrib.litellm.utils import async_consume_stream +from tests.contrib.litellm.utils import get_cassette_name +from tests.contrib.litellm.utils import consume_stream +from tests.contrib.litellm.utils import parse_response +from tests.contrib.litellm.utils import tools from tests.llmobs._utils import _expected_llmobs_llm_span_event from tests.utils import DummyTracer From c9ba90a6461e39f50972bfe3007566857b45fc35 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 10 Apr 2025 12:02:13 -0400 Subject: [PATCH 42/61] use wrapt.ObjectProxy for traced streams --- ddtrace/contrib/internal/litellm/patch.py | 4 ++-- ddtrace/contrib/internal/litellm/utils.py | 23 ++++++++++++----------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 8275fc0d049..02157c1690d 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -133,7 +133,7 @@ def patch(): wrap("litellm", "text_completion", traced_text_completion(litellm)) wrap("litellm", "atext_completion", traced_atext_completion(litellm)) wrap("litellm", "get_llm_provider", traced_get_llm_provider(litellm)) - wrap("litellm", "litellm.main.get_llm_provider", traced_get_llm_provider(litellm)) + wrap("litellm", "main.get_llm_provider", traced_get_llm_provider(litellm)) def unpatch(): @@ -147,6 +147,6 @@ def unpatch(): unwrap(litellm, "text_completion") unwrap(litellm, "atext_completion") unwrap(litellm, "get_llm_provider") - unwrap(litellm.litellm.main, "get_llm_provider") + unwrap(litellm.main, "get_llm_provider") delattr(litellm, "_datadog_integration") diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index 1f36a8331f2..59c94b7d192 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -2,6 +2,7 @@ from typing import Any from typing import Dict from typing import List +import wrapt from ddtrace.internal.logger import get_logger from ddtrace.llmobs._integrations.utils import ( @@ -12,10 +13,10 @@ log = get_logger(__name__) -class BaseTracedLiteLLMStream: - def __init__(self, generator, integration, span, kwargs, is_completion=False): +class BaseTracedLiteLLMStream(wrapt.ObjectProxy): + def __init__(self, wrapped, integration, span, kwargs, is_completion=False): + super().__init__(wrapped) n = kwargs.get("n", 1) or 1 - self._generator = generator self._dd_integration = integration self._dd_span = span self._kwargs = kwargs @@ -25,15 +26,15 @@ def __init__(self, generator, integration, span, kwargs, is_completion=False): class TracedLiteLLMStream(BaseTracedLiteLLMStream): def __enter__(self): - self._generator.__enter__() + self.__wrapped__.__enter__() return self def __exit__(self, exc_type, exc_val, exc_tb): - self._generator.__exit__(exc_type, exc_val, exc_tb) + self.__wrapped__.__exit__(exc_type, exc_val, exc_tb) def __iter__(self): try: - for chunk in self._generator: + for chunk in self.__wrapped__: yield chunk _loop_handler(chunk, self._streamed_chunks) except Exception: @@ -47,7 +48,7 @@ def __iter__(self): def __next__(self): try: - chunk = self._generator.__next__() + chunk = self.__wrapped__.__next__() _loop_handler(chunk, self._streamed_chunks) return chunk except StopIteration: @@ -64,15 +65,15 @@ def __next__(self): class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream): async def __aenter__(self): - await self._generator.__aenter__() + await self.__wrapped__.__aenter__() return self async def __aexit__(self, exc_type, exc_val, exc_tb): - await self._generator.__aexit__(exc_type, exc_val, exc_tb) + await self.__wrapped__.__aexit__(exc_type, exc_val, exc_tb) async def __aiter__(self): try: - async for chunk in self._generator: + async for chunk in self.__wrapped__: yield chunk _loop_handler(chunk, self._streamed_chunks) except Exception: @@ -86,7 +87,7 @@ async def __aiter__(self): async def __anext__(self): try: - chunk = await self._generator.__anext__() + chunk = await self.__wrapped__.__anext__() _loop_handler(chunk, self._streamed_chunks) return chunk except StopAsyncIteration: From b2e8f59c41cd03c701d2daf436c8777ce00cd382 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 10 Apr 2025 12:27:22 -0400 Subject: [PATCH 43/61] small improvements to model name and metric extraction --- ddtrace/llmobs/_integrations/litellm.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index e262dca6d3b..b40df3a1b18 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -5,6 +5,7 @@ from typing import Tuple import ddtrace +from ddtrace.internal.utils import get_argument_value from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY @@ -41,7 +42,7 @@ def _llmobs_set_tags( response: Optional[Any] = None, operation: str = "", ) -> None: - model_name = span.get_tag("litellm.request.model") + model_name = get_argument_value(args, kwargs, 0, "model", None) model_name, model_provider = self._model_map.get(model_name, (model_name, "")) # use Open AI helpers since response format will match Open AI @@ -57,18 +58,21 @@ def _llmobs_set_tags( @staticmethod def _extract_llmobs_metrics(resp: Any) -> Dict[str, Any]: + if not resp: + return {} if isinstance(resp, list): token_usage = _get_attr(resp[0], "usage", None) else: token_usage = _get_attr(resp, "usage", None) - if token_usage is not None: - prompt_tokens = _get_attr(token_usage, "prompt_tokens", 0) - completion_tokens = _get_attr(token_usage, "completion_tokens", 0) - return { - INPUT_TOKENS_METRIC_KEY: prompt_tokens, - OUTPUT_TOKENS_METRIC_KEY: completion_tokens, - TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens, - } + if token_usage is None: + return {} + prompt_tokens = _get_attr(token_usage, "prompt_tokens", 0) + completion_tokens = _get_attr(token_usage, "completion_tokens", 0) + return { + INPUT_TOKENS_METRIC_KEY: prompt_tokens, + OUTPUT_TOKENS_METRIC_KEY: completion_tokens, + TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens, + } def should_submit_to_llmobs(self, model: Optional[str] = None, kwargs: Dict[str, Any] = None) -> bool: """ From 4b149210f176eceafde8789859c8e16e255fec90 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 10 Apr 2025 14:09:29 -0400 Subject: [PATCH 44/61] cleaned up configuration for litellm tests --- tests/contrib/litellm/conftest.py | 59 ++++--- tests/contrib/litellm/test_litellm_llmobs.py | 161 ++++++++----------- 2 files changed, 100 insertions(+), 120 deletions(-) diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index 3d4d300acd3..12311e95de4 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -1,15 +1,21 @@ -import mock - +from ddtrace.llmobs._writer import LLMObsSpanWriter import pytest from ddtrace.contrib.internal.litellm.patch import patch from ddtrace.trace import Pin from ddtrace.contrib.internal.litellm.patch import unpatch from tests.utils import DummyTracer -from tests.utils import DummyWriter from tests.utils import override_global_config from tests.contrib.litellm.utils import get_request_vcr -from ddtrace.llmobs import LLMObs +from ddtrace.llmobs import LLMObs as llmobs_service +from ddtrace.llmobs._constants import AGENTLESS_BASE_URL + +class TestLLMObsSpanWriter(LLMObsSpanWriter): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.events = [] + def enqueue(self, event): + self.events.append(event) def default_global_config(): return {} @@ -19,17 +25,15 @@ def default_global_config(): def ddtrace_global_config(): return {} +@pytest.fixture +def llmobs_span_writer(): + agentless_url = "{}.{}".format(AGENTLESS_BASE_URL, "datad0g.com") + yield TestLLMObsSpanWriter(is_agentless=True, agentless_url=agentless_url, interval=1.0, timeout=1.0) -@pytest.fixture() -def mock_llmobs_writer(): - patcher = mock.patch("ddtrace.llmobs._llmobs.LLMObsSpanWriter") - try: - LLMObsSpanWriterMock = patcher.start() - m = mock.MagicMock() - LLMObsSpanWriterMock.return_value = m - yield m - finally: - patcher.stop() + +@pytest.fixture +def llmobs_events(litellm_llmobs, llmobs_span_writer): + return llmobs_span_writer.events @pytest.fixture @@ -46,23 +50,28 @@ def litellm(ddtrace_global_config, monkeypatch): yield litellm unpatch() +@pytest.fixture +def litellm_llmobs(tracer, mock_tracer, llmobs_span_writer, ddtrace_global_config, monkeypatch): + llmobs_service.disable() + with override_global_config( + { + "_llmobs_ml_app": "", + "_dd_api_key": "", + } + ): + enable_integrations = ddtrace_global_config.get("_llmobs_integrations_enabled", False) + llmobs_service.enable(_tracer=mock_tracer, integrations_enabled=enable_integrations) + llmobs_service._instance._llmobs_span_writer = llmobs_span_writer + yield llmobs_service + llmobs_service.disable() @pytest.fixture -def mock_tracer(litellm, ddtrace_global_config): +def mock_tracer(litellm): + mock_tracer = DummyTracer() pin = Pin.get_from(litellm) - mock_tracer = DummyTracer(writer=DummyWriter(trace_flush_enabled=False)) pin._override(litellm, tracer=mock_tracer) - - if ddtrace_global_config.get("_llmobs_enabled", False): - # Have to disable and re-enable LLMObs to use the mock tracer. - LLMObs.disable() - enable_integrations = ddtrace_global_config.get("_integrations_enabled", False) - LLMObs.enable(_tracer=mock_tracer, integrations_enabled=enable_integrations) - yield mock_tracer - LLMObs.disable() - @pytest.fixture def request_vcr(): diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index baac526c941..258cfab86d5 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -11,17 +11,6 @@ from tests.utils import DummyTracer -@pytest.mark.parametrize( - "ddtrace_global_config", - [ - dict( - _llmobs_enabled=True, - _llmobs_sample_rate=1.0, - _llmobs_ml_app="", - _dd_api_key="", - ) - ], -) @pytest.mark.parametrize( "stream,n,include_usage", [ @@ -36,7 +25,7 @@ ], ) class TestLLMObsLiteLLM: - def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): + def test_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): messages = [{"content": "Hey, what is up?", "role": "user"}] resp = litellm.completion( @@ -52,22 +41,20 @@ def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, output_messages, token_metrics = parse_response(resp) span = mock_tracer.pop_traces()[0][0] - assert mock_llmobs_writer.enqueue.call_count == 1 - mock_llmobs_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, - model_name="gpt-3.5-turbo", - model_provider="openai", - input_messages=messages, - output_messages=output_messages, - metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, - token_metrics=token_metrics, - tags={"ml_app": "", "service": "tests.contrib.litellm"}, - ) + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=messages, + output_messages=output_messages, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) def test_completion_with_tools( - self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage + self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage ): if stream and n > 1: pytest.skip( @@ -90,26 +77,24 @@ def test_completion_with_tools( output_messages, token_metrics = parse_response(resp) span = mock_tracer.pop_traces()[0][0] - assert mock_llmobs_writer.enqueue.call_count == 1 - mock_llmobs_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, - model_name="gpt-3.5-turbo", - model_provider="openai", - input_messages=messages, - output_messages=output_messages, - metadata={ - "stream": stream, - "n": n, - "stream_options": {"include_usage": include_usage}, - "tool_choice": "auto", - }, - token_metrics=token_metrics, - tags={"ml_app": "", "service": "tests.contrib.litellm"}, - ) + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=messages, + output_messages=output_messages, + metadata={ + "stream": stream, + "n": n, + "stream_options": {"include_usage": include_usage}, + "tool_choice": "auto", + }, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - async def test_acompletion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): + async def test_acompletion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): messages = [{"content": "Hey, what is up?", "role": "user"}] resp = await litellm.acompletion( @@ -125,21 +110,19 @@ async def test_acompletion(self, litellm, request_vcr, mock_llmobs_writer, mock_ output_messages, token_metrics = parse_response(resp) span = mock_tracer.pop_traces()[0][0] - assert mock_llmobs_writer.enqueue.call_count == 1 - mock_llmobs_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, - model_name="gpt-3.5-turbo", - model_provider="openai", - input_messages=messages, - output_messages=output_messages, - metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, - token_metrics=token_metrics, - tags={"ml_app": "", "service": "tests.contrib.litellm"}, - ) + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=messages, + output_messages=output_messages, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - def test_text_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage): + def test_text_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): prompt = "Hey, what is up?" resp = litellm.text_completion( @@ -155,22 +138,20 @@ def test_text_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tr output_messages, token_metrics = parse_response(resp, is_completion=True) span = mock_tracer.pop_traces()[0][0] - assert mock_llmobs_writer.enqueue.call_count == 1 - mock_llmobs_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, - model_name="gpt-3.5-turbo", - model_provider="openai", - input_messages=[{"content": prompt}], - output_messages=output_messages, - metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, - token_metrics=token_metrics, - tags={"ml_app": "", "service": "tests.contrib.litellm"}, - ) + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=[{"content": prompt}], + output_messages=output_messages, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) async def test_atext_completion( - self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage + self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage ): with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): prompt = "Hey, what is up?" @@ -187,33 +168,25 @@ async def test_atext_completion( output_messages, token_metrics = parse_response(resp, is_completion=True) span = mock_tracer.pop_traces()[0][0] - assert mock_llmobs_writer.enqueue.call_count == 1 - mock_llmobs_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, - model_name="gpt-3.5-turbo", - model_provider="openai", - input_messages=[{"content": prompt}], - output_messages=output_messages, - metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, - token_metrics=token_metrics, - tags={"ml_app": "", "service": "tests.contrib.litellm"}, - ) + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=[{"content": prompt}], + output_messages=output_messages, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) + @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)]) def test_completion_integrations_enabled( - self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage + self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage ): with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): - LLMObs.disable() - - LLMObs.enable(integrations_enabled=True) - mock_tracer = DummyTracer() - import litellm import openai - pin = Pin.get_from(litellm) - pin._override(litellm, tracer=mock_tracer) pin._override(openai, tracer=mock_tracer) messages = [{"content": "Hey, what is up?", "role": "user"}] @@ -229,8 +202,6 @@ def test_completion_integrations_enabled( else: output_messages, token_metrics = parse_response(resp) - LLMObs.disable() - spans = mock_tracer.pop_traces() # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request if stream: @@ -248,7 +219,7 @@ def test_completion_integrations_enabled( "extra_headers": {"X-Stainless-Raw-Response": "true"}, } model_name = "gpt-3.5-turbo-0125" - assert mock_llmobs_writer.enqueue.call_count == 1 + assert len(llmobs_events) == 1 expected_event = _expected_llmobs_llm_span_event( span, model_name=model_name, @@ -259,10 +230,10 @@ def test_completion_integrations_enabled( token_metrics=token_metrics, tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - mock_llmobs_writer.enqueue.assert_called_with(expected_event) + assert llmobs_events[0] == expected_event def test_completion_proxy( - self, litellm, request_vcr_include_localhost, mock_llmobs_writer, mock_tracer, stream, n, include_usage + self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n, include_usage ): with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, include_usage, proxy=True)): messages = [{"content": "Hey, what is up?", "role": "user"}] @@ -278,4 +249,4 @@ def test_completion_proxy( consume_stream(resp, n) # client side requests made to the proxy are not submitted to LLMObs - assert mock_llmobs_writer.enqueue.call_count == 0 + assert len(llmobs_events) == 0 From 73417dbfc8504fc3bc5ee2ed12e3c84160e5e75f Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 10 Apr 2025 14:17:49 -0400 Subject: [PATCH 45/61] style fixes --- tests/contrib/litellm/conftest.py | 7 ++++++- tests/contrib/litellm/test_litellm_llmobs.py | 9 +++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index 12311e95de4..9415c45d682 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -9,6 +9,7 @@ from ddtrace.llmobs import LLMObs as llmobs_service from ddtrace.llmobs._constants import AGENTLESS_BASE_URL + class TestLLMObsSpanWriter(LLMObsSpanWriter): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -17,6 +18,7 @@ def __init__(self, *args, **kwargs): def enqueue(self, event): self.events.append(event) + def default_global_config(): return {} @@ -25,6 +27,7 @@ def default_global_config(): def ddtrace_global_config(): return {} + @pytest.fixture def llmobs_span_writer(): agentless_url = "{}.{}".format(AGENTLESS_BASE_URL, "datad0g.com") @@ -50,8 +53,9 @@ def litellm(ddtrace_global_config, monkeypatch): yield litellm unpatch() + @pytest.fixture -def litellm_llmobs(tracer, mock_tracer, llmobs_span_writer, ddtrace_global_config, monkeypatch): +def litellm_llmobs(mock_tracer, llmobs_span_writer, ddtrace_global_config): llmobs_service.disable() with override_global_config( { @@ -65,6 +69,7 @@ def litellm_llmobs(tracer, mock_tracer, llmobs_span_writer, ddtrace_global_confi yield llmobs_service llmobs_service.disable() + @pytest.fixture def mock_tracer(litellm): mock_tracer = DummyTracer() diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 258cfab86d5..167cd0b0428 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -53,9 +53,7 @@ def test_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stre tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - def test_completion_with_tools( - self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage - ): + def test_completion_with_tools(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): if stream and n > 1: pytest.skip( "Streamed responses with multiple completions and tool calls are not supported: see open issue https://github.com/BerriAI/litellm/issues/8977" @@ -150,9 +148,7 @@ def test_text_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - async def test_atext_completion( - self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage - ): + async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): prompt = "Hey, what is up?" resp = await litellm.atext_completion( @@ -186,6 +182,7 @@ def test_completion_integrations_enabled( ): with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): import openai + pin = Pin.get_from(litellm) pin._override(openai, tracer=mock_tracer) From f163063ec938ca2813eb5aeae978ec87fd7c897e Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 10 Apr 2025 14:32:59 -0400 Subject: [PATCH 46/61] make test specifically for excluding usage --- ...completion_stream_exclude_usage_proxy.yaml | 144 ----------- ...etion_stream_exclude_usage_with_tools.yaml | 130 ---------- ..._multiple_choices_exclude_usage_proxy.yaml | 240 ------------------ ...iple_choices_exclude_usage_with_tools.yaml | 162 ------------ tests/contrib/litellm/test_litellm_llmobs.py | 97 ++++--- 5 files changed, 61 insertions(+), 712 deletions(-) delete mode 100644 tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml delete mode 100644 tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml delete mode 100644 tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml delete mode 100644 tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml diff --git a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml deleted file mode 100644 index 39cf74be7cb..00000000000 --- a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml +++ /dev/null @@ -1,144 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":false}}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '144' - content-type: - - application/json - host: - - 0.0.0.0:4000 - user-agent: - - OpenAI/Python 1.68.2 - x-stainless-arch: - - arm64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.68.2 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: http://0.0.0.0:4000/chat/completions - response: - body: - string: 'data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - much"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - just"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - here"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - to"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - help"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - you"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - with"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - anything"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - you"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - need"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - How"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - can"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - I"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - assist"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - you"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - today"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":false}} - - - data: [DONE] - - - ' - headers: - content-type: - - text/event-stream; charset=utf-8 - date: - - Mon, 31 Mar 2025 20:38:18 GMT - server: - - uvicorn - transfer-encoding: - - chunked - x-litellm-call-id: - - a5a87fc0-874f-4432-b608-91b437b91fb2 - x-litellm-key-spend: - - '0.0' - x-litellm-version: - - 1.63.11 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml deleted file mode 100644 index 515680c5d04..00000000000 --- a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml +++ /dev/null @@ -1,130 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":false},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get - the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The - city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '542' - content-type: - - application/json - cookie: - - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A; - _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000 - host: - - api.openai.com - user-agent: - - OpenAI/Python 1.68.2 - x-stainless-arch: - - arm64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.68.2 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: 'data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_ty5BH4ChPTiw8GnzCSqhxhoP","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" - Francisco"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" - CA"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]} - - - data: [DONE] - - - ' - headers: - CF-RAY: - - 9278b292789f3ba6-BOS - Connection: - - keep-alive - Content-Type: - - text/event-stream; charset=utf-8 - Date: - - Fri, 28 Mar 2025 17:02:51 GMT - Server: - - cloudflare - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - datadog-4 - openai-processing-ms: - - '281' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '15000' - x-ratelimit-limit-tokens: - - '2000000' - x-ratelimit-remaining-requests: - - '14999' - x-ratelimit-remaining-tokens: - - '1999985' - x-ratelimit-reset-requests: - - 4ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_78940dfd1e163cd37e49e666383b7944 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml deleted file mode 100644 index 7cface6a716..00000000000 --- a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml +++ /dev/null @@ -1,240 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":false}}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '144' - content-type: - - application/json - host: - - 0.0.0.0:4000 - user-agent: - - OpenAI/Python 1.68.2 - x-stainless-arch: - - arm64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.68.2 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: http://0.0.0.0:4000/chat/completions - response: - body: - string: 'data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"Not"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - much"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - much"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":","}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - just"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - just"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - here"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - here"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - to"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - to"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - chat"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - assist"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - and"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - you"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - help"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - with"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - with"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - any"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - anything"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - questions"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - you"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - or"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - need"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - tasks"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - you"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - How"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - may"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - can"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - have"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - I"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"."}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - assist"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - How"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - you"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - can"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" - today"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - I"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - help"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - you"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":" - today"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}} - - - data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":false}} - - - data: [DONE] - - - ' - headers: - content-type: - - text/event-stream; charset=utf-8 - date: - - Mon, 31 Mar 2025 20:38:19 GMT - server: - - uvicorn - transfer-encoding: - - chunked - x-litellm-call-id: - - b4f152d1-5074-4fb3-a79d-ad0529fa5aa1 - x-litellm-key-spend: - - '0.0' - x-litellm-version: - - 1.63.11 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml deleted file mode 100644 index fda11e5011a..00000000000 --- a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml +++ /dev/null @@ -1,162 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":false},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get - the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The - city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '542' - content-type: - - application/json - cookie: - - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A; - _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000 - host: - - api.openai.com - user-agent: - - OpenAI/Python 1.68.2 - x-stainless-arch: - - arm64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.68.2 - x-stainless-raw-response: - - 'true' - x-stainless-read-timeout: - - '600.0' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.10 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: 'data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_bWoRSYt75lffFv8JkMI4uDdz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_bWoRSYt75lffFv8JkMI4uDdz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" - Francisco"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" - Francisco"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" - CA"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" - CA"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]} - - - data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]} - - - data: [DONE] - - - ' - headers: - CF-RAY: - - 9278b2960dbd3ba6-BOS - Connection: - - keep-alive - Content-Type: - - text/event-stream; charset=utf-8 - Date: - - Fri, 28 Mar 2025 17:02:52 GMT - Server: - - cloudflare - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - datadog-4 - openai-processing-ms: - - '406' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '15000' - x-ratelimit-limit-tokens: - - '2000000' - x-ratelimit-remaining-requests: - - '14999' - x-ratelimit-remaining-tokens: - - '1999985' - x-ratelimit-reset-requests: - - 4ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_7e8b09694a1029b3eb2fecf93deef4a3 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 167cd0b0428..2d715c7b21b 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -12,28 +12,24 @@ @pytest.mark.parametrize( - "stream,n,include_usage", + "stream,n", [ - (True, 1, True), - (True, 2, True), - (False, 1, True), - (False, 2, True), - (True, 1, False), - (True, 2, False), - (False, 1, False), - (False, 2, False), + (True, 1), + (True, 2), + (False, 1), + (False, 2), ], ) class TestLLMObsLiteLLM: - def test_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): - with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + def test_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n): + with request_vcr.use_cassette(get_cassette_name(stream, n)): messages = [{"content": "Hey, what is up?", "role": "user"}] resp = litellm.completion( model="gpt-3.5-turbo", messages=messages, stream=stream, n=n, - stream_options={"include_usage": include_usage}, + stream_options={"include_usage": True}, ) if stream: output_messages, token_metrics = consume_stream(resp, n) @@ -48,24 +44,53 @@ def test_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stre model_provider="openai", input_messages=messages, output_messages=output_messages, - metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": True}}, token_metrics=token_metrics, tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - def test_completion_with_tools(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): + def test_completion_exclude_usage(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n): + with request_vcr.use_cassette(get_cassette_name(stream, n, False)): + messages = [{"content": "Hey, what is up?", "role": "user"}] + resp = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + stream=stream, + n=n, + stream_options={"include_usage": False}, + ) + if stream: + output_messages, token_metrics = consume_stream(resp, n) + else: + output_messages, token_metrics = parse_response(resp) + + span = mock_tracer.pop_traces()[0][0] + assert len(llmobs_events) == 1 + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + model_name="gpt-3.5-turbo", + model_provider="openai", + input_messages=messages, + output_messages=output_messages, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": False}}, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) + + + def test_completion_with_tools(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n): if stream and n > 1: pytest.skip( "Streamed responses with multiple completions and tool calls are not supported: see open issue https://github.com/BerriAI/litellm/issues/8977" ) - with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage, tools=True)): + with request_vcr.use_cassette(get_cassette_name(stream, n, tools=True)): messages = [{"content": "What is the weather like in San Francisco, CA?", "role": "user"}] resp = litellm.completion( model="gpt-3.5-turbo", messages=messages, stream=stream, n=n, - stream_options={"include_usage": include_usage}, + stream_options={"include_usage": True}, tools=tools, tool_choice="auto", ) @@ -85,22 +110,22 @@ def test_completion_with_tools(self, litellm, request_vcr, llmobs_events, mock_t metadata={ "stream": stream, "n": n, - "stream_options": {"include_usage": include_usage}, + "stream_options": {"include_usage": True}, "tool_choice": "auto", }, token_metrics=token_metrics, tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - async def test_acompletion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): - with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + async def test_acompletion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n): + with request_vcr.use_cassette(get_cassette_name(stream, n)): messages = [{"content": "Hey, what is up?", "role": "user"}] resp = await litellm.acompletion( model="gpt-3.5-turbo", messages=messages, stream=stream, n=n, - stream_options={"include_usage": include_usage}, + stream_options={"include_usage": True}, ) if stream: output_messages, token_metrics = await async_consume_stream(resp, n) @@ -115,20 +140,20 @@ async def test_acompletion(self, litellm, request_vcr, llmobs_events, mock_trace model_provider="openai", input_messages=messages, output_messages=output_messages, - metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": True}}, token_metrics=token_metrics, tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - def test_text_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): - with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + def test_text_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n): + with request_vcr.use_cassette(get_cassette_name(stream, n)): prompt = "Hey, what is up?" resp = litellm.text_completion( model="gpt-3.5-turbo", prompt=prompt, stream=stream, n=n, - stream_options={"include_usage": include_usage}, + stream_options={"include_usage": True}, ) if stream: output_messages, token_metrics = consume_stream(resp, n, is_completion=True) @@ -143,20 +168,20 @@ def test_text_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, model_provider="openai", input_messages=[{"content": prompt}], output_messages=output_messages, - metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": True}}, token_metrics=token_metrics, tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage): - with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n): + with request_vcr.use_cassette(get_cassette_name(stream, n)): prompt = "Hey, what is up?" resp = await litellm.atext_completion( model="gpt-3.5-turbo", prompt=prompt, stream=stream, n=n, - stream_options={"include_usage": include_usage}, + stream_options={"include_usage": True}, ) if stream: output_messages, token_metrics = await async_consume_stream(resp, n, is_completion=True) @@ -171,16 +196,16 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_ model_provider="openai", input_messages=[{"content": prompt}], output_messages=output_messages, - metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}, + metadata={"stream": stream, "n": n, "stream_options": {"include_usage": True}}, token_metrics=token_metrics, tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)]) def test_completion_integrations_enabled( - self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage + self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n ): - with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)): + with request_vcr.use_cassette(get_cassette_name(stream, n)): import openai pin = Pin.get_from(litellm) @@ -192,7 +217,7 @@ def test_completion_integrations_enabled( messages=messages, stream=stream, n=n, - stream_options={"include_usage": include_usage}, + stream_options={"include_usage": True}, ) if stream: output_messages, token_metrics = consume_stream(resp, n) @@ -203,7 +228,7 @@ def test_completion_integrations_enabled( # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request if stream: span = spans[0][0] - metadata = {"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}} + metadata = {"stream": stream, "n": n, "stream_options": {"include_usage": True}} model_name = "gpt-3.5-turbo" else: span = spans[0][1] @@ -230,16 +255,16 @@ def test_completion_integrations_enabled( assert llmobs_events[0] == expected_event def test_completion_proxy( - self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n, include_usage + self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n ): - with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, include_usage, proxy=True)): + with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)): messages = [{"content": "Hey, what is up?", "role": "user"}] resp = litellm.completion( model="gpt-3.5-turbo", messages=messages, stream=stream, n=n, - stream_options={"include_usage": include_usage}, + stream_options={"include_usage": True}, api_base="http://0.0.0.0:4000", ) if stream: From 59bc4d4dd45a89023a036df4b08c0482e59c1cfa Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 10 Apr 2025 14:59:23 -0400 Subject: [PATCH 47/61] style and type fixes --- ddtrace/contrib/internal/litellm/patch.py | 4 ++-- ddtrace/llmobs/_integrations/litellm.py | 5 ++--- ddtrace/llmobs/_integrations/utils.py | 2 +- tests/contrib/litellm/test_litellm_llmobs.py | 9 ++------- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 02157c1690d..541faef1bbe 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -53,7 +53,7 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion func.__name__, model=model, host=host, - submit_to_llmobs=integration.should_submit_to_llmobs(model, kwargs), + submit_to_llmobs=integration.should_submit_to_llmobs(kwargs, model), ) stream = kwargs.get("stream", False) resp = None @@ -86,7 +86,7 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com func.__name__, model=model, host=host, - submit_to_llmobs=integration.should_submit_to_llmobs(model, kwargs), + submit_to_llmobs=integration.should_submit_to_llmobs(kwargs, model), ) stream = kwargs.get("stream", False) resp = None diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index b40df3a1b18..cad49cd1cb9 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -4,7 +4,6 @@ from typing import Optional from typing import Tuple -import ddtrace from ddtrace.internal.utils import get_argument_value from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY @@ -42,7 +41,7 @@ def _llmobs_set_tags( response: Optional[Any] = None, operation: str = "", ) -> None: - model_name = get_argument_value(args, kwargs, 0, "model", None) + model_name = get_argument_value(args, kwargs, 0, "model", False) or "" model_name, model_provider = self._model_map.get(model_name, (model_name, "")) # use Open AI helpers since response format will match Open AI @@ -74,7 +73,7 @@ def _extract_llmobs_metrics(resp: Any) -> Dict[str, Any]: TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens, } - def should_submit_to_llmobs(self, model: Optional[str] = None, kwargs: Dict[str, Any] = None) -> bool: + def should_submit_to_llmobs(self, kwargs: Dict[str, Any], model: Optional[str] = None) -> bool: """ Span should be NOT submitted to LLMObs if: - base_url is not None: is a proxy request and we will capture the LLM request downstream diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index c1d600a9713..dcd8edd24ba 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -433,7 +433,7 @@ def openai_construct_tool_call_from_streamed_chunk(stored_tool_calls, tool_call_ stored_tool_calls[list_idx]["arguments"] += getattr(function_call, "arguments", "") -def openai_construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]: +def openai_construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, Any]: """Constructs a chat completion message dictionary from streamed chunks. The resulting message dictionary is of form: {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."} diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 2d715c7b21b..eaa87a49bc8 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -77,7 +77,6 @@ def test_completion_exclude_usage(self, litellm, request_vcr, llmobs_events, moc tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - def test_completion_with_tools(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n): if stream and n > 1: pytest.skip( @@ -202,9 +201,7 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_ ) @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)]) - def test_completion_integrations_enabled( - self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n - ): + def test_completion_integrations_enabled(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n): with request_vcr.use_cassette(get_cassette_name(stream, n)): import openai @@ -254,9 +251,7 @@ def test_completion_integrations_enabled( ) assert llmobs_events[0] == expected_event - def test_completion_proxy( - self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n - ): + def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n): with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)): messages = [{"content": "Hey, what is up?", "role": "user"}] resp = litellm.completion( From 65297a3157ef756e033d447289ada2c525c8e740 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 10 Apr 2025 16:09:52 -0400 Subject: [PATCH 48/61] add typing for message variable --- ddtrace/llmobs/_integrations/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index dcd8edd24ba..464480bd643 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -438,7 +438,7 @@ def openai_construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> The resulting message dictionary is of form: {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."} """ - message = {"content": "", "tool_calls": []} + message: Dict[str, Any] = {"content": "", "tool_calls": []} for chunk in streamed_chunks: if getattr(chunk, "usage", None): message["usage"] = chunk.usage From dd01f975f145e44f57a41eaf25698664beffb623 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 10 Apr 2025 16:36:37 -0400 Subject: [PATCH 49/61] ruff fixes --- ddtrace/contrib/internal/litellm/patch.py | 7 +++---- ddtrace/contrib/internal/litellm/utils.py | 11 ++++------- ddtrace/contrib/internal/openai/utils.py | 7 ++----- ddtrace/llmobs/_integrations/litellm.py | 12 +++++++----- ddtrace/llmobs/_integrations/openai.py | 9 +-------- ddtrace/llmobs/_integrations/utils.py | 15 +++++++++------ tests/contrib/litellm/conftest.py | 11 ++++++----- tests/contrib/litellm/test_litellm_llmobs.py | 6 ++---- tests/contrib/litellm/utils.py | 6 ++++-- 9 files changed, 38 insertions(+), 46 deletions(-) diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py index 541faef1bbe..6aed01be783 100644 --- a/ddtrace/contrib/internal/litellm/patch.py +++ b/ddtrace/contrib/internal/litellm/patch.py @@ -1,17 +1,16 @@ -import os import sys import litellm from ddtrace import config +from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMAsyncStream +from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMStream from ddtrace.contrib.trace_utils import unwrap from ddtrace.contrib.trace_utils import with_traced_module from ddtrace.contrib.trace_utils import wrap -from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMStream -from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMAsyncStream +from ddtrace.internal.utils import get_argument_value from ddtrace.llmobs._integrations import LiteLLMIntegration from ddtrace.trace import Pin -from ddtrace.internal.utils import get_argument_value config._add("litellm", {}) diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py index 59c94b7d192..11b996891a4 100644 --- a/ddtrace/contrib/internal/litellm/utils.py +++ b/ddtrace/contrib/internal/litellm/utils.py @@ -1,14 +1,11 @@ import sys -from typing import Any -from typing import Dict -from typing import List + import wrapt from ddtrace.internal.logger import get_logger -from ddtrace.llmobs._integrations.utils import ( - openai_construct_completion_from_streamed_chunks, - openai_construct_message_from_streamed_chunks, -) +from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks +from ddtrace.llmobs._integrations.utils import openai_construct_message_from_streamed_chunks + log = get_logger(__name__) diff --git a/ddtrace/contrib/internal/openai/utils.py b/ddtrace/contrib/internal/openai/utils.py index 58d3075cbbc..dca02cb8ed9 100644 --- a/ddtrace/contrib/internal/openai/utils.py +++ b/ddtrace/contrib/internal/openai/utils.py @@ -1,16 +1,13 @@ import re import sys -from typing import Any from typing import AsyncGenerator -from typing import Dict from typing import Generator -from typing import List -from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks -from ddtrace.llmobs._integrations.utils import openai_construct_message_from_streamed_chunks import wrapt from ddtrace.internal.logger import get_logger +from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks +from ddtrace.llmobs._integrations.utils import openai_construct_message_from_streamed_chunks from ddtrace.llmobs._utils import _get_attr diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py index cad49cd1cb9..80107500afe 100644 --- a/ddtrace/llmobs/_integrations/litellm.py +++ b/ddtrace/llmobs/_integrations/litellm.py @@ -6,18 +6,18 @@ from ddtrace.internal.utils import get_argument_value from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY -from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY -from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import METRICS from ddtrace.llmobs._constants import MODEL_NAME from ddtrace.llmobs._constants import MODEL_PROVIDER +from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import SPAN_KIND +from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY +from ddtrace.llmobs._integrations.base import BaseLLMIntegration from ddtrace.llmobs._integrations.openai import openai_set_meta_tags_from_chat from ddtrace.llmobs._integrations.openai import openai_set_meta_tags_from_completion from ddtrace.llmobs._llmobs import LLMObs from ddtrace.llmobs._utils import _get_attr from ddtrace.trace import Span -from ddtrace.llmobs._integrations.base import BaseLLMIntegration class LiteLLMIntegration(BaseLLMIntegration): @@ -77,14 +77,16 @@ def should_submit_to_llmobs(self, kwargs: Dict[str, Any], model: Optional[str] = """ Span should be NOT submitted to LLMObs if: - base_url is not None: is a proxy request and we will capture the LLM request downstream - - non-streamed request and model provider is OpenAI/AzureOpenAI and the OpenAI integration is enabled: this request will be captured in the OpenAI integration instead + - non-streamed request and model provider is OpenAI/AzureOpenAI and the OpenAI integration + is enabled: this request will be captured in the OpenAI integration instead """ base_url = kwargs.get("api_base", None) if base_url is not None: return False stream = kwargs.get("stream", False) model_lower = model.lower() if model else "" - # model provider is unknown until request completes; therefore, this is a best effort attempt to check if model provider is Open AI or Azure + # model provider is unknown until request completes; therefore, this is a best effort attempt to check + # if model provider is Open AI or Azure if ( any(prefix in model_lower for prefix in ("gpt", "openai", "azure")) and not stream diff --git a/ddtrace/llmobs/_integrations/openai.py b/ddtrace/llmobs/_integrations/openai.py index 87e6a100e9d..aebe8ae3207 100644 --- a/ddtrace/llmobs/_integrations/openai.py +++ b/ddtrace/llmobs/_integrations/openai.py @@ -1,33 +1,26 @@ -import json from typing import Any from typing import Dict from typing import List from typing import Optional from typing import Tuple -from ddtrace.internal import core from ddtrace.internal.constants import COMPONENT -from ddtrace.internal.utils.formats import format_trace_id from ddtrace.internal.utils.version import parse_version -from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE -from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED from ddtrace.llmobs._constants import INPUT_DOCUMENTS -from ddtrace.llmobs._constants import INPUT_MESSAGES from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import METADATA from ddtrace.llmobs._constants import METRICS from ddtrace.llmobs._constants import MODEL_NAME from ddtrace.llmobs._constants import MODEL_PROVIDER -from ddtrace.llmobs._constants import OUTPUT_MESSAGES from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import OUTPUT_VALUE from ddtrace.llmobs._constants import SPAN_KIND from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY from ddtrace.llmobs._integrations.base import BaseLLMIntegration from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags +from ddtrace.llmobs._integrations.utils import is_openai_default_base_url from ddtrace.llmobs._integrations.utils import openai_set_meta_tags_from_chat from ddtrace.llmobs._integrations.utils import openai_set_meta_tags_from_completion -from ddtrace.llmobs._integrations.utils import is_openai_default_base_url from ddtrace.llmobs._utils import _get_attr from ddtrace.llmobs.utils import Document from ddtrace.trace import Pin diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index 464480bd643..104b9c63f7c 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -10,15 +10,18 @@ from typing import Tuple from typing import Union from urllib.parse import urlparse -from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE -from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED -from ddtrace.internal import core -from ddtrace.internal.utils.formats import format_trace_id +from ddtrace._trace.span import Span +from ddtrace.internal import core from ddtrace.internal.logger import get_logger +from ddtrace.internal.utils.formats import format_trace_id +from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE +from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED +from ddtrace.llmobs._constants import INPUT_MESSAGES +from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY +from ddtrace.llmobs._constants import METADATA from ddtrace.llmobs._constants import OAI_HANDOFF_TOOL_ARG -from ddtrace._trace.span import Span -from ddtrace.llmobs._constants import INPUT_MESSAGES, INPUT_TOKENS_METRIC_KEY, METADATA, OUTPUT_MESSAGES +from ddtrace.llmobs._constants import OUTPUT_MESSAGES from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY from ddtrace.llmobs._utils import _get_attr diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index 9415c45d682..7d6d7684914 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -1,13 +1,14 @@ -from ddtrace.llmobs._writer import LLMObsSpanWriter import pytest + from ddtrace.contrib.internal.litellm.patch import patch -from ddtrace.trace import Pin from ddtrace.contrib.internal.litellm.patch import unpatch -from tests.utils import DummyTracer -from tests.utils import override_global_config -from tests.contrib.litellm.utils import get_request_vcr from ddtrace.llmobs import LLMObs as llmobs_service from ddtrace.llmobs._constants import AGENTLESS_BASE_URL +from ddtrace.llmobs._writer import LLMObsSpanWriter +from ddtrace.trace import Pin +from tests.contrib.litellm.utils import get_request_vcr +from tests.utils import DummyTracer +from tests.utils import override_global_config class TestLLMObsSpanWriter(LLMObsSpanWriter): diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index eaa87a49bc8..764886d981a 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -1,14 +1,12 @@ -from ddtrace._trace.pin import Pin -from ddtrace.llmobs._llmobs import LLMObs import pytest +from ddtrace._trace.pin import Pin from tests.contrib.litellm.utils import async_consume_stream -from tests.contrib.litellm.utils import get_cassette_name from tests.contrib.litellm.utils import consume_stream +from tests.contrib.litellm.utils import get_cassette_name from tests.contrib.litellm.utils import parse_response from tests.contrib.litellm.utils import tools from tests.llmobs._utils import _expected_llmobs_llm_span_event -from tests.utils import DummyTracer @pytest.mark.parametrize( diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py index a9b6309770b..1779f5dfda9 100644 --- a/tests/contrib/litellm/utils.py +++ b/tests/contrib/litellm/utils.py @@ -1,6 +1,8 @@ -import vcr -import os import json +import os + +import vcr + CASETTE_EXTENSION = ".yaml" From 72a6fe5f29840843aa327cfee75bf03a02ee400c Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Fri, 11 Apr 2025 13:25:46 -0400 Subject: [PATCH 50/61] add mock tracer to openai pin instead of litellm --- tests/contrib/litellm/test_litellm_llmobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 764886d981a..5e75cb00be2 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -203,7 +203,7 @@ def test_completion_integrations_enabled(self, litellm, request_vcr, llmobs_even with request_vcr.use_cassette(get_cassette_name(stream, n)): import openai - pin = Pin.get_from(litellm) + pin = Pin.get_from(openai) pin._override(openai, tracer=mock_tracer) messages = [{"content": "Hey, what is up?", "role": "user"}] From 5b2e62e368e06e22fb595dc42d45554e959fdac2 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Sun, 20 Apr 2025 17:55:31 +0200 Subject: [PATCH 51/61] add argument for parametrized config --- tests/contrib/litellm/test_litellm_llmobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 5e75cb00be2..631a0f2cf45 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -199,7 +199,7 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_ ) @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)]) - def test_completion_integrations_enabled(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n): + def test_completion_integrations_enabled(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, ddtrace_global_config): with request_vcr.use_cassette(get_cassette_name(stream, n)): import openai From a8ec73551e02cc2f027b978ae37c645140defc03 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Sun, 20 Apr 2025 18:28:09 +0200 Subject: [PATCH 52/61] run black --- tests/contrib/litellm/test_litellm_llmobs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 631a0f2cf45..331c2051c2d 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -199,7 +199,9 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_ ) @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)]) - def test_completion_integrations_enabled(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, ddtrace_global_config): + def test_completion_integrations_enabled( + self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, ddtrace_global_config + ): with request_vcr.use_cassette(get_cassette_name(stream, n)): import openai From bcfbd4ec1d972a5fef6c1caa38e31d51a472a0c5 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Mon, 21 Apr 2025 15:46:39 +0200 Subject: [PATCH 53/61] patch openai manually --- tests/contrib/litellm/conftest.py | 5 ++--- tests/contrib/litellm/test_litellm_llmobs.py | 7 ++++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index 7d6d7684914..8cc6417386e 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -56,7 +56,7 @@ def litellm(ddtrace_global_config, monkeypatch): @pytest.fixture -def litellm_llmobs(mock_tracer, llmobs_span_writer, ddtrace_global_config): +def litellm_llmobs(mock_tracer, llmobs_span_writer): llmobs_service.disable() with override_global_config( { @@ -64,8 +64,7 @@ def litellm_llmobs(mock_tracer, llmobs_span_writer, ddtrace_global_config): "_dd_api_key": "", } ): - enable_integrations = ddtrace_global_config.get("_llmobs_integrations_enabled", False) - llmobs_service.enable(_tracer=mock_tracer, integrations_enabled=enable_integrations) + llmobs_service.enable(_tracer=mock_tracer, integrations_enabled=False) llmobs_service._instance._llmobs_span_writer = llmobs_span_writer yield llmobs_service llmobs_service.disable() diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 331c2051c2d..a7b6c7b9a3f 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -1,6 +1,7 @@ import pytest from ddtrace._trace.pin import Pin +from ddtrace._monkey import patch from tests.contrib.litellm.utils import async_consume_stream from tests.contrib.litellm.utils import consume_stream from tests.contrib.litellm.utils import get_cassette_name @@ -198,11 +199,11 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_ tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)]) - def test_completion_integrations_enabled( - self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, ddtrace_global_config + def test_completion_openai_enabled( + self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n ): with request_vcr.use_cassette(get_cassette_name(stream, n)): + patch(openai=True) import openai pin = Pin.get_from(openai) From 22b052de58a1cba41801e6e5459a95637ce46fb4 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 22 Apr 2025 13:54:45 +0200 Subject: [PATCH 54/61] try moving flaky test --- tests/contrib/litellm/test_litellm_llmobs.py | 49 ++++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index a7b6c7b9a3f..17757c56dce 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -199,8 +199,34 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_ tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - def test_completion_openai_enabled( - self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n + def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n): + with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)): + messages = [{"content": "Hey, what is up?", "role": "user"}] + resp = litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + stream=stream, + n=n, + stream_options={"include_usage": True}, + api_base="http://0.0.0.0:4000", + ) + if stream: + consume_stream(resp, n) + + # client side requests made to the proxy are not submitted to LLMObs + assert len(llmobs_events) == 0 + +@pytest.mark.parametrize( + "stream,n", + [ + (True, 1), + (True, 2), + (False, 1), + (False, 2), + ], +) +def test_completion_openai_enabled( + litellm, request_vcr, llmobs_events, mock_tracer, stream, n ): with request_vcr.use_cassette(get_cassette_name(stream, n)): patch(openai=True) @@ -250,21 +276,4 @@ def test_completion_openai_enabled( token_metrics=token_metrics, tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - assert llmobs_events[0] == expected_event - - def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n): - with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)): - messages = [{"content": "Hey, what is up?", "role": "user"}] - resp = litellm.completion( - model="gpt-3.5-turbo", - messages=messages, - stream=stream, - n=n, - stream_options={"include_usage": True}, - api_base="http://0.0.0.0:4000", - ) - if stream: - consume_stream(resp, n) - - # client side requests made to the proxy are not submitted to LLMObs - assert len(llmobs_events) == 0 + assert llmobs_events[0] == expected_event \ No newline at end of file From e5c18f03d9d815b0d5b3c5d586a93d6ebb4099ce Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 22 Apr 2025 14:12:39 +0200 Subject: [PATCH 55/61] simplify flaky openai enabled test --- tests/contrib/litellm/test_litellm_llmobs.py | 79 ++++++-------------- 1 file changed, 21 insertions(+), 58 deletions(-) diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 17757c56dce..a081bd5daa2 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -199,8 +199,16 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_ tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n): - with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)): + def test_completion_openai_enabled( + self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n + ): + with request_vcr.use_cassette(get_cassette_name(stream, n)): + patch(openai=True) + import openai + + pin = Pin.get_from(openai) + pin._override(openai, tracer=mock_tracer) + messages = [{"content": "Hey, what is up?", "role": "user"}] resp = litellm.completion( model="gpt-3.5-turbo", @@ -208,33 +216,16 @@ def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_e stream=stream, n=n, stream_options={"include_usage": True}, - api_base="http://0.0.0.0:4000", ) if stream: - consume_stream(resp, n) - - # client side requests made to the proxy are not submitted to LLMObs - assert len(llmobs_events) == 0 - -@pytest.mark.parametrize( - "stream,n", - [ - (True, 1), - (True, 2), - (False, 1), - (False, 2), - ], -) -def test_completion_openai_enabled( - litellm, request_vcr, llmobs_events, mock_tracer, stream, n - ): - with request_vcr.use_cassette(get_cassette_name(stream, n)): - patch(openai=True) - import openai - - pin = Pin.get_from(openai) - pin._override(openai, tracer=mock_tracer) + for _ in resp: + pass + assert len(llmobs_events) == 1 + assert llmobs_events[0]["name"] == "OpenAI.createChatCompletion" if not stream else "litellm.request" + + def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n): + with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)): messages = [{"content": "Hey, what is up?", "role": "user"}] resp = litellm.completion( model="gpt-3.5-turbo", @@ -242,38 +233,10 @@ def test_completion_openai_enabled( stream=stream, n=n, stream_options={"include_usage": True}, + api_base="http://0.0.0.0:4000", ) if stream: - output_messages, token_metrics = consume_stream(resp, n) - else: - output_messages, token_metrics = parse_response(resp) + consume_stream(resp, n) - spans = mock_tracer.pop_traces() - # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request - if stream: - span = spans[0][0] - metadata = {"stream": stream, "n": n, "stream_options": {"include_usage": True}} - model_name = "gpt-3.5-turbo" - else: - span = spans[0][1] - # remove parent span since LiteLLM request span will not be submitted to LLMObs - span._parent = None - metadata = { - "n": n, - "extra_body": {}, - "timeout": 600.0, - "extra_headers": {"X-Stainless-Raw-Response": "true"}, - } - model_name = "gpt-3.5-turbo-0125" - assert len(llmobs_events) == 1 - expected_event = _expected_llmobs_llm_span_event( - span, - model_name=model_name, - model_provider="openai", - input_messages=messages, - output_messages=output_messages, - metadata=metadata, - token_metrics=token_metrics, - tags={"ml_app": "", "service": "tests.contrib.litellm"}, - ) - assert llmobs_events[0] == expected_event \ No newline at end of file + # client side requests made to the proxy are not submitted to LLMObs + assert len(llmobs_events) == 0 From 80ee6c676fd67678a630a6874417ca1d89c2172f Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 22 Apr 2025 18:50:58 +0200 Subject: [PATCH 56/61] revert to passing test --- tests/contrib/litellm/test_litellm_llmobs.py | 54 ++++++++++++++++---- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index a081bd5daa2..1114c285382 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -11,12 +11,16 @@ @pytest.mark.parametrize( - "stream,n", + "stream,n, include_usage", [ - (True, 1), - (True, 2), - (False, 1), - (False, 2), + (True, 1, True), + (True, 2, True), + (False, 1, True), + (False, 2, True), + (True, 1, False), + (True, 2, False), + (False, 1, False), + (False, 2, False), ], ) class TestLLMObsLiteLLM: @@ -199,14 +203,14 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_ tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - def test_completion_openai_enabled( + @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)]) + def test_completion_integrations_enabled( self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n ): with request_vcr.use_cassette(get_cassette_name(stream, n)): - patch(openai=True) import openai - pin = Pin.get_from(openai) + pin = Pin.get_from(litellm) pin._override(openai, tracer=mock_tracer) messages = [{"content": "Hey, what is up?", "role": "user"}] @@ -218,11 +222,39 @@ def test_completion_openai_enabled( stream_options={"include_usage": True}, ) if stream: - for _ in resp: - pass + output_messages, token_metrics = consume_stream(resp, n) + else: + output_messages, token_metrics = parse_response(resp) + spans = mock_tracer.pop_traces() + # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request + if stream: + span = spans[0][0] + metadata = {"stream": stream, "n": n, "stream_options": {"include_usage": True}} + model_name = "gpt-3.5-turbo" + else: + span = spans[0][1] + # remove parent span since LiteLLM request span will not be submitted to LLMObs + span._parent = None + metadata = { + "n": n, + "extra_body": {}, + "timeout": 600.0, + "extra_headers": {"X-Stainless-Raw-Response": "true"}, + } + model_name = "gpt-3.5-turbo-0125" assert len(llmobs_events) == 1 - assert llmobs_events[0]["name"] == "OpenAI.createChatCompletion" if not stream else "litellm.request" + expected_event = _expected_llmobs_llm_span_event( + span, + model_name=model_name, + model_provider="openai", + input_messages=messages, + output_messages=output_messages, + metadata=metadata, + token_metrics=token_metrics, + tags={"ml_app": "", "service": "tests.contrib.litellm"}, + ) + assert llmobs_events[0] == expected_event def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n): with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)): From 7813adabf06c2630908f3be59718f599a93e300b Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Tue, 22 Apr 2025 19:03:16 +0200 Subject: [PATCH 57/61] Revert "revert to passing test" This reverts commit 80ee6c676fd67678a630a6874417ca1d89c2172f. --- tests/contrib/litellm/test_litellm_llmobs.py | 54 ++++---------------- 1 file changed, 11 insertions(+), 43 deletions(-) diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 1114c285382..a081bd5daa2 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -11,16 +11,12 @@ @pytest.mark.parametrize( - "stream,n, include_usage", + "stream,n", [ - (True, 1, True), - (True, 2, True), - (False, 1, True), - (False, 2, True), - (True, 1, False), - (True, 2, False), - (False, 1, False), - (False, 2, False), + (True, 1), + (True, 2), + (False, 1), + (False, 2), ], ) class TestLLMObsLiteLLM: @@ -203,14 +199,14 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_ tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)]) - def test_completion_integrations_enabled( + def test_completion_openai_enabled( self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n ): with request_vcr.use_cassette(get_cassette_name(stream, n)): + patch(openai=True) import openai - pin = Pin.get_from(litellm) + pin = Pin.get_from(openai) pin._override(openai, tracer=mock_tracer) messages = [{"content": "Hey, what is up?", "role": "user"}] @@ -222,39 +218,11 @@ def test_completion_integrations_enabled( stream_options={"include_usage": True}, ) if stream: - output_messages, token_metrics = consume_stream(resp, n) - else: - output_messages, token_metrics = parse_response(resp) + for _ in resp: + pass - spans = mock_tracer.pop_traces() - # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request - if stream: - span = spans[0][0] - metadata = {"stream": stream, "n": n, "stream_options": {"include_usage": True}} - model_name = "gpt-3.5-turbo" - else: - span = spans[0][1] - # remove parent span since LiteLLM request span will not be submitted to LLMObs - span._parent = None - metadata = { - "n": n, - "extra_body": {}, - "timeout": 600.0, - "extra_headers": {"X-Stainless-Raw-Response": "true"}, - } - model_name = "gpt-3.5-turbo-0125" assert len(llmobs_events) == 1 - expected_event = _expected_llmobs_llm_span_event( - span, - model_name=model_name, - model_provider="openai", - input_messages=messages, - output_messages=output_messages, - metadata=metadata, - token_metrics=token_metrics, - tags={"ml_app": "", "service": "tests.contrib.litellm"}, - ) - assert llmobs_events[0] == expected_event + assert llmobs_events[0]["name"] == "OpenAI.createChatCompletion" if not stream else "litellm.request" def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n): with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)): From efefbb2097ad8096247bf4908a417b8bd5469b44 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Wed, 23 Apr 2025 13:25:54 +0200 Subject: [PATCH 58/61] use common test llmobs span writer --- tests/contrib/litellm/conftest.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index 8cc6417386e..e89039adbbf 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -3,21 +3,11 @@ from ddtrace.contrib.internal.litellm.patch import patch from ddtrace.contrib.internal.litellm.patch import unpatch from ddtrace.llmobs import LLMObs as llmobs_service -from ddtrace.llmobs._constants import AGENTLESS_BASE_URL -from ddtrace.llmobs._writer import LLMObsSpanWriter from ddtrace.trace import Pin from tests.contrib.litellm.utils import get_request_vcr from tests.utils import DummyTracer from tests.utils import override_global_config - - -class TestLLMObsSpanWriter(LLMObsSpanWriter): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.events = [] - - def enqueue(self, event): - self.events.append(event) +from tests.llmobs._utils import TestLLMObsSpanWriter def default_global_config(): @@ -31,8 +21,7 @@ def ddtrace_global_config(): @pytest.fixture def llmobs_span_writer(): - agentless_url = "{}.{}".format(AGENTLESS_BASE_URL, "datad0g.com") - yield TestLLMObsSpanWriter(is_agentless=True, agentless_url=agentless_url, interval=1.0, timeout=1.0) + yield TestLLMObsSpanWriter(is_agentless=True, interval=1.0, timeout=1.0) @pytest.fixture From 83b68fced34bb74e325906e4362358f1a4f62b5a Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 24 Apr 2025 11:08:30 +0200 Subject: [PATCH 59/61] run black --- tests/contrib/litellm/test_litellm_llmobs.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index a081bd5daa2..5e64fc33e91 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -199,9 +199,7 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_ tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - def test_completion_openai_enabled( - self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n - ): + def test_completion_openai_enabled(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n): with request_vcr.use_cassette(get_cassette_name(stream, n)): patch(openai=True) import openai @@ -223,7 +221,7 @@ def test_completion_openai_enabled( assert len(llmobs_events) == 1 assert llmobs_events[0]["name"] == "OpenAI.createChatCompletion" if not stream else "litellm.request" - + def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n): with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)): messages = [{"content": "Hey, what is up?", "role": "user"}] From e982630fdcdcc9bc713e14520e0878b2ebf16911 Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 24 Apr 2025 11:13:35 +0200 Subject: [PATCH 60/61] run ruff --- tests/contrib/litellm/conftest.py | 2 +- tests/contrib/litellm/test_litellm_llmobs.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py index e89039adbbf..bd7be20fd48 100644 --- a/tests/contrib/litellm/conftest.py +++ b/tests/contrib/litellm/conftest.py @@ -5,9 +5,9 @@ from ddtrace.llmobs import LLMObs as llmobs_service from ddtrace.trace import Pin from tests.contrib.litellm.utils import get_request_vcr +from tests.llmobs._utils import TestLLMObsSpanWriter from tests.utils import DummyTracer from tests.utils import override_global_config -from tests.llmobs._utils import TestLLMObsSpanWriter def default_global_config(): diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index 5e64fc33e91..d68597730b7 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -1,7 +1,7 @@ import pytest -from ddtrace._trace.pin import Pin from ddtrace._monkey import patch +from ddtrace._trace.pin import Pin from tests.contrib.litellm.utils import async_consume_stream from tests.contrib.litellm.utils import consume_stream from tests.contrib.litellm.utils import get_cassette_name From a6d8500a22aee82d2b523029bdb32b29758b2e9a Mon Sep 17 00:00:00 2001 From: Nicole Cybul Date: Thu, 24 Apr 2025 16:57:31 +0200 Subject: [PATCH 61/61] manually override tracer for litellm and openai --- tests/contrib/litellm/test_litellm_llmobs.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py index d68597730b7..587b7837eaa 100644 --- a/tests/contrib/litellm/test_litellm_llmobs.py +++ b/tests/contrib/litellm/test_litellm_llmobs.py @@ -8,6 +8,7 @@ from tests.contrib.litellm.utils import parse_response from tests.contrib.litellm.utils import tools from tests.llmobs._utils import _expected_llmobs_llm_span_event +from ddtrace.llmobs._llmobs import LLMObs @pytest.mark.parametrize( @@ -199,13 +200,18 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_ tags={"ml_app": "", "service": "tests.contrib.litellm"}, ) - def test_completion_openai_enabled(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n): + def test_completion_openai_enabled(self, litellm, request_vcr, llmobs_span_writer, llmobs_events, mock_tracer, stream, n): with request_vcr.use_cassette(get_cassette_name(stream, n)): patch(openai=True) + LLMObs.enable(integrations_enabled=True, _tracer=mock_tracer) + LLMObs._instance._llmobs_span_writer = llmobs_span_writer import openai + import litellm - pin = Pin.get_from(openai) - pin._override(openai, tracer=mock_tracer) + litellm_pin = Pin.get_from(litellm) + litellm_pin._override(litellm, tracer=mock_tracer) + openai_pin = Pin.get_from(openai) + openai_pin._override(openai, tracer=mock_tracer) messages = [{"content": "Hey, what is up?", "role": "user"}] resp = litellm.completion( @@ -218,6 +224,7 @@ def test_completion_openai_enabled(self, litellm, request_vcr, llmobs_events, mo if stream: for _ in resp: pass + LLMObs.disable() assert len(llmobs_events) == 1 assert llmobs_events[0]["name"] == "OpenAI.createChatCompletion" if not stream else "litellm.request"