From 69d97098fa798a5dfb9aa6e5c46bb0d10bf6419a Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Wed, 19 Mar 2025 14:01:18 -0400
Subject: [PATCH 01/61] add litellm apm integration

---
 ddtrace/_monkey.py                        |  1 +
 ddtrace/contrib/internal/litellm/patch.py | 95 +++++++++++++++++++++++
 ddtrace/contrib/internal/litellm/utils.py |  7 ++
 ddtrace/contrib/litellm/__init__.py       | 14 ++++
 ddtrace/llmobs/_integrations/__init__.py  |  2 +
 ddtrace/llmobs/_integrations/litellm.py   | 18 +++++
 ddtrace/settings/_config.py               |  1 +
 7 files changed, 138 insertions(+)
 create mode 100644 ddtrace/contrib/internal/litellm/patch.py
 create mode 100644 ddtrace/contrib/internal/litellm/utils.py
 create mode 100644 ddtrace/contrib/litellm/__init__.py
 create mode 100644 ddtrace/llmobs/_integrations/litellm.py

diff --git a/ddtrace/_monkey.py b/ddtrace/_monkey.py
index 4f04ec8fca2..0ac4d56d672 100644
--- a/ddtrace/_monkey.py
+++ b/ddtrace/_monkey.py
@@ -51,6 +51,7 @@
     "httpx": True,
     "kafka": True,
     "langgraph": False,
+    "litellm": True,
     "mongoengine": True,
     "mysql": True,
     "mysqldb": True,
diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
new file mode 100644
index 00000000000..7ffa141551d
--- /dev/null
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -0,0 +1,95 @@
+import os
+import sys
+
+import litellm
+
+from ddtrace import config
+from ddtrace.contrib.trace_utils import unwrap
+from ddtrace.contrib.trace_utils import with_traced_module
+from ddtrace.contrib.trace_utils import wrap
+from ddtrace.contrib.internal.litellm.utils import get_provider
+from ddtrace.llmobs._integrations import LiteLLMIntegration
+from ddtrace.trace import Pin
+from ddtrace.internal.utils import get_argument_value
+
+
+config._add(
+    "litellm",
+    {
+        "span_prompt_completion_sample_rate": float(os.getenv("DD_LITELLM_SPAN_PROMPT_COMPLETION_SAMPLE_RATE", 1.0)),
+        "span_char_limit": int(os.getenv("DD_LITELLM_SPAN_CHAR_LIMIT", 128)),
+    },
+)
+
+
+def get_version():
+    # type: () -> str
+    return getattr(litellm, "__version__", "")
+
+
+def _create_span(litellm, pin, func, instance, args, kwargs):
+    """Helper function to create and configure a traced span."""
+    integration = litellm._datadog_integration
+    model = get_argument_value(args, kwargs, 0, "model", None)
+    span = integration.trace(
+        pin,
+        "litellm.%s" % func.__name__,
+        model=model,
+        provider=get_provider(model),
+        submit_to_llmobs=False,
+    )
+    return span
+
+
+@with_traced_module
+def traced_completion(litellm, pin, func, instance, args, kwargs):
+    span = _create_span(litellm, pin, func, instance, args, kwargs)
+    try:
+        return func(*args, **kwargs)
+    except Exception:
+        span.set_exc_info(*sys.exc_info())
+        raise
+    finally:
+        span.finish()
+
+
+@with_traced_module
+async def traced_acompletion(litellm, pin, func, instance, args, kwargs):
+    span = _create_span(litellm, pin, func, instance, args, kwargs)
+    try:
+        return await func(*args, **kwargs)
+    except Exception:
+        span.set_exc_info(*sys.exc_info())
+        raise
+    finally:
+        span.finish()
+
+
+def patch():
+    if getattr(litellm, "_datadog_patch", False):
+        return
+
+    litellm._datadog_patch = True
+
+    Pin().onto(litellm)
+    integration = LiteLLMIntegration(integration_config=config.litellm)
+    litellm._datadog_integration = integration
+
+    wrap("litellm", "completion", traced_completion(litellm))
+    wrap("litellm", "acompletion", traced_acompletion(litellm))
+    wrap("litellm", "text_completion", traced_completion(litellm))
+    wrap("litellm", "atext_completion", traced_acompletion(litellm))
+
+
+def unpatch():
+    if not getattr(litellm, "_datadog_patch", False):
+        return
+
+    litellm._datadog_patch = False
+
+    unwrap("litellm", "completion", traced_completion(litellm))
+    unwrap("litellm", "acompletion", traced_acompletion(litellm))
+    unwrap("litellm", "text_completion", traced_completion(litellm))
+    unwrap("litellm", "atext_completion", traced_acompletion(litellm))
+
+    delattr(litellm, "_datadog_integration")
\ No newline at end of file
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
new file mode 100644
index 00000000000..dd6d766ad2b
--- /dev/null
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -0,0 +1,7 @@
+# TODO: temporary since we may want to intercept get_llm_provider response
+def get_provider(model):
+    parsed_model = model.split("/")
+    if len(parsed_model) == 2:
+        return parsed_model[0]
+    else:
+        return ""
diff --git a/ddtrace/contrib/litellm/__init__.py b/ddtrace/contrib/litellm/__init__.py
new file mode 100644
index 00000000000..af284d63775
--- /dev/null
+++ b/ddtrace/contrib/litellm/__init__.py
@@ -0,0 +1,14 @@
+# TODO: documentation
+
+from ddtrace.internal.utils.importlib import require_modules
+
+
+required_modules = ["litellm"]
+
+with require_modules(required_modules) as missing_modules:
+    if not missing_modules:
+        from ddtrace.contrib.internal.litellm.patch import get_version
+        from ddtrace.contrib.internal.litellm.patch import patch
+        from ddtrace.contrib.internal.litellm.patch import unpatch
+
+        __all__ = ["patch", "unpatch", "get_version"]
\ No newline at end of file
diff --git a/ddtrace/llmobs/_integrations/__init__.py b/ddtrace/llmobs/_integrations/__init__.py
index 71cae092197..d7c5bdab0da 100644
--- a/ddtrace/llmobs/_integrations/__init__.py
+++ b/ddtrace/llmobs/_integrations/__init__.py
@@ -3,6 +3,7 @@
 from .bedrock import BedrockIntegration
 from .gemini import GeminiIntegration
 from .langchain import LangChainIntegration
+from .litellm import LiteLLMIntegration
 from .openai import OpenAIIntegration
 from .vertexai import VertexAIIntegration
 
@@ -13,6 +14,7 @@
     "BedrockIntegration",
     "GeminiIntegration",
     "LangChainIntegration",
+    "LiteLLMIntegration",
     "OpenAIIntegration",
     "VertexAIIntegration",
 ]
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
new file mode 100644
index 00000000000..afe7a373f06
--- /dev/null
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -0,0 +1,18 @@
+from typing import Any
+from typing import Dict
+from typing import Optional
+
+from ddtrace.trace import Span
+from ddtrace.llmobs._integrations.base import BaseLLMIntegration
+
+
+class LiteLLMIntegration(BaseLLMIntegration):
+    _integration_name = "litellm"
+
+    def _set_base_span_tags(
+        self, span: Span, provider: Optional[str] = None, model: Optional[str] = None, **kwargs: Dict[str, Any]
+    ) -> None:
+        if provider is not None:
+            span.set_tag_str("litellm.request.provider", provider)
+        if model is not None:
+            span.set_tag_str("litellm.request.model", model)
\ No newline at end of file
diff --git a/ddtrace/settings/_config.py b/ddtrace/settings/_config.py
index f91845fe4b9..7921de0c288 100644
--- a/ddtrace/settings/_config.py
+++ b/ddtrace/settings/_config.py
@@ -111,6 +111,7 @@
         "unittest",
         "falcon",
         "langgraph",
+        "litellm",
         "aioredis",
         "test_visibility",
         "redis",

From f6c9ead0b7b91764696a18acf16c2f884b556d26 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Wed, 19 Mar 2025 17:04:23 -0400
Subject: [PATCH 02/61] add tests for litellm

---
 .riot/requirements/17b7978.txt                |  64 +++++
 .riot/requirements/1db92d0.txt                |  67 +++++
 .riot/requirements/1f657b3.txt                |  64 +++++
 .riot/requirements/8c9f21c.txt                |  67 +++++
 ddtrace/contrib/internal/litellm/patch.py     |  14 +-
 riotfile.py                                   |  10 +
 tests/contrib/litellm/__init__.py             |   0
 .../litellm/cassettes/acompletion.yaml        | 106 +++++++
 .../acompletion.yaml_multiple_choices         | 107 ++++++++
 .../litellm/cassettes/acompletion_stream.yaml | 145 ++++++++++
 .../acompletion_stream.yaml_multiple_choices  | 258 ++++++++++++++++++
 .../litellm/cassettes/atext_completion.yaml   | 106 +++++++
 .../atext_completion.yaml_multiple_choices    | 106 +++++++
 .../cassettes/atext_completion_stream.yaml    | 141 ++++++++++
 ...xt_completion_stream.yaml_multiple_choices | 180 ++++++++++++
 .../contrib/litellm/cassettes/completion.yaml | 107 ++++++++
 .../completion.yaml_multiple_choices          | 107 ++++++++
 .../litellm/cassettes/completion_stream.yaml  | 181 ++++++++++++
 .../completion_stream.yaml_multiple_choices   | 234 ++++++++++++++++
 .../litellm/cassettes/text_completion.yaml    | 106 +++++++
 .../text_completion.yaml_multiple_choices     | 106 +++++++
 .../cassettes/text_completion_stream.yaml     | 133 +++++++++
 ...xt_completion_stream.yaml_multiple_choices | 177 ++++++++++++
 tests/contrib/litellm/conftest.py             |  57 ++++
 tests/contrib/litellm/test_litellm.py         |  55 ++++
 tests/contrib/litellm/test_litellm_patch.py   |  30 ++
 tests/contrib/litellm/utils.py                |  15 +
 ....test_litellm.test_litellm_completion.json |  28 ++
 28 files changed, 2766 insertions(+), 5 deletions(-)
 create mode 100644 .riot/requirements/17b7978.txt
 create mode 100644 .riot/requirements/1db92d0.txt
 create mode 100644 .riot/requirements/1f657b3.txt
 create mode 100644 .riot/requirements/8c9f21c.txt
 create mode 100644 tests/contrib/litellm/__init__.py
 create mode 100644 tests/contrib/litellm/cassettes/acompletion.yaml
 create mode 100644 tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices
 create mode 100644 tests/contrib/litellm/cassettes/acompletion_stream.yaml
 create mode 100644 tests/contrib/litellm/cassettes/acompletion_stream.yaml_multiple_choices
 create mode 100644 tests/contrib/litellm/cassettes/atext_completion.yaml
 create mode 100644 tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices
 create mode 100644 tests/contrib/litellm/cassettes/atext_completion_stream.yaml
 create mode 100644 tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices
 create mode 100644 tests/contrib/litellm/cassettes/completion.yaml
 create mode 100644 tests/contrib/litellm/cassettes/completion.yaml_multiple_choices
 create mode 100644 tests/contrib/litellm/cassettes/completion_stream.yaml
 create mode 100644 tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices
 create mode 100644 tests/contrib/litellm/cassettes/text_completion.yaml
 create mode 100644 tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices
 create mode 100644 tests/contrib/litellm/cassettes/text_completion_stream.yaml
 create mode 100644 tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices
 create mode 100644 tests/contrib/litellm/conftest.py
 create mode 100644 tests/contrib/litellm/test_litellm.py
 create mode 100644 tests/contrib/litellm/test_litellm_patch.py
 create mode 100644 tests/contrib/litellm/utils.py
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json

diff --git a/.riot/requirements/17b7978.txt b/.riot/requirements/17b7978.txt
new file mode 100644
index 00000000000..798b258db60
--- /dev/null
+++ b/.riot/requirements/17b7978.txt
@@ -0,0 +1,64 @@
+#
+# This file is autogenerated by pip-compile with Python 3.12
+# by the following command:
+#
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/17b7978.in
+#
+aiohappyeyeballs==2.6.1
+aiohttp==3.11.14
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.9.0
+attrs==25.3.0
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+coverage[toml]==7.7.0
+distro==1.9.0
+filelock==3.18.0
+frozenlist==1.5.0
+fsspec==2025.3.0
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.29.3
+hypothesis==6.45.0
+idna==3.10
+importlib-metadata==8.6.1
+iniconfig==2.1.0
+jinja2==3.1.6
+jiter==0.9.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+litellm==1.63.12
+markupsafe==3.0.2
+mock==5.2.0
+multidict==6.2.0
+openai==1.66.5
+opentracing==2.4.0
+packaging==24.2
+pluggy==1.5.0
+propcache==0.3.0
+pydantic==2.10.6
+pydantic-core==2.27.2
+pytest==8.3.5
+pytest-asyncio==0.25.3
+pytest-cov==6.0.0
+pytest-mock==3.14.0
+python-dotenv==1.0.1
+pyyaml==6.0.2
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+rpds-py==0.23.1
+sniffio==1.3.1
+sortedcontainers==2.4.0
+tiktoken==0.9.0
+tokenizers==0.21.1
+tqdm==4.67.1
+typing-extensions==4.12.2
+urllib3==2.3.0
+vcrpy==7.0.0
+wrapt==1.17.2
+yarl==1.18.3
+zipp==3.21.0
diff --git a/.riot/requirements/1db92d0.txt b/.riot/requirements/1db92d0.txt
new file mode 100644
index 00000000000..e86bb4cb0aa
--- /dev/null
+++ b/.riot/requirements/1db92d0.txt
@@ -0,0 +1,67 @@
+#
+# This file is autogenerated by pip-compile with Python 3.10
+# by the following command:
+#
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1db92d0.in
+#
+aiohappyeyeballs==2.6.1
+aiohttp==3.11.14
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.9.0
+async-timeout==5.0.1
+attrs==25.3.0
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+coverage[toml]==7.7.0
+distro==1.9.0
+exceptiongroup==1.2.2
+filelock==3.18.0
+frozenlist==1.5.0
+fsspec==2025.3.0
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.29.3
+hypothesis==6.45.0
+idna==3.10
+importlib-metadata==8.6.1
+iniconfig==2.1.0
+jinja2==3.1.6
+jiter==0.9.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+litellm==1.63.12
+markupsafe==3.0.2
+mock==5.2.0
+multidict==6.2.0
+openai==1.66.5
+opentracing==2.4.0
+packaging==24.2
+pluggy==1.5.0
+propcache==0.3.0
+pydantic==2.10.6
+pydantic-core==2.27.2
+pytest==8.3.5
+pytest-asyncio==0.25.3
+pytest-cov==6.0.0
+pytest-mock==3.14.0
+python-dotenv==1.0.1
+pyyaml==6.0.2
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+rpds-py==0.23.1
+sniffio==1.3.1
+sortedcontainers==2.4.0
+tiktoken==0.9.0
+tokenizers==0.21.1
+tomli==2.2.1
+tqdm==4.67.1
+typing-extensions==4.12.2
+urllib3==2.3.0
+vcrpy==7.0.0
+wrapt==1.17.2
+yarl==1.18.3
+zipp==3.21.0
diff --git a/.riot/requirements/1f657b3.txt b/.riot/requirements/1f657b3.txt
new file mode 100644
index 00000000000..d1a93e65777
--- /dev/null
+++ b/.riot/requirements/1f657b3.txt
@@ -0,0 +1,64 @@
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1f657b3.in
+#
+aiohappyeyeballs==2.6.1
+aiohttp==3.11.14
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.9.0
+attrs==25.3.0
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+coverage[toml]==7.7.0
+distro==1.9.0
+filelock==3.18.0
+frozenlist==1.5.0
+fsspec==2025.3.0
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.29.3
+hypothesis==6.45.0
+idna==3.10
+importlib-metadata==8.6.1
+iniconfig==2.1.0
+jinja2==3.1.6
+jiter==0.9.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+litellm==1.63.12
+markupsafe==3.0.2
+mock==5.2.0
+multidict==6.2.0
+openai==1.66.5
+opentracing==2.4.0
+packaging==24.2
+pluggy==1.5.0
+propcache==0.3.0
+pydantic==2.10.6
+pydantic-core==2.27.2
+pytest==8.3.5
+pytest-asyncio==0.25.3
+pytest-cov==6.0.0
+pytest-mock==3.14.0
+python-dotenv==1.0.1
+pyyaml==6.0.2
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+rpds-py==0.23.1
+sniffio==1.3.1
+sortedcontainers==2.4.0
+tiktoken==0.9.0
+tokenizers==0.21.1
+tqdm==4.67.1
+typing-extensions==4.12.2
+urllib3==2.3.0
+vcrpy==7.0.0
+wrapt==1.17.2
+yarl==1.18.3
+zipp==3.21.0
diff --git a/.riot/requirements/8c9f21c.txt b/.riot/requirements/8c9f21c.txt
new file mode 100644
index 00000000000..4c7ee2bb6e4
--- /dev/null
+++ b/.riot/requirements/8c9f21c.txt
@@ -0,0 +1,67 @@
+#
+# This file is autogenerated by pip-compile with Python 3.9
+# by the following command:
+#
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/8c9f21c.in
+#
+aiohappyeyeballs==2.6.1
+aiohttp==3.11.14
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.9.0
+async-timeout==5.0.1
+attrs==25.3.0
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+coverage[toml]==7.7.0
+distro==1.9.0
+exceptiongroup==1.2.2
+filelock==3.18.0
+frozenlist==1.5.0
+fsspec==2025.3.0
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.29.3
+hypothesis==6.45.0
+idna==3.10
+importlib-metadata==8.6.1
+iniconfig==2.1.0
+jinja2==3.1.6
+jiter==0.9.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+litellm==1.63.12
+markupsafe==3.0.2
+mock==5.2.0
+multidict==6.2.0
+openai==1.66.5
+opentracing==2.4.0
+packaging==24.2
+pluggy==1.5.0
+propcache==0.3.0
+pydantic==2.10.6
+pydantic-core==2.27.2
+pytest==8.3.5
+pytest-asyncio==0.25.3
+pytest-cov==6.0.0
+pytest-mock==3.14.0
+python-dotenv==1.0.1
+pyyaml==6.0.2
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+rpds-py==0.23.1
+sniffio==1.3.1
+sortedcontainers==2.4.0
+tiktoken==0.9.0
+tokenizers==0.21.1
+tomli==2.2.1
+tqdm==4.67.1
+typing-extensions==4.12.2
+urllib3==1.26.20
+vcrpy==7.0.0
+wrapt==1.17.2
+yarl==1.18.3
+zipp==3.21.0
diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 7ffa141551d..1ce2e073c73 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -1,5 +1,6 @@
 import os
 import sys
+from importlib.metadata import version
 
 import litellm
 
@@ -24,7 +25,10 @@
 
 def get_version():
     # type: () -> str
-    return getattr(litellm, "__version__", "")
+    try:
+        return version("litellm")
+    except Exception:
+        return ""
 
 
 def _create_span(litellm, pin, func, instance, args, kwargs):
@@ -87,9 +91,9 @@ def unpatch():
 
     litellm._datadog_patch = False
 
-    unwrap("litellm", "completion", traced_completion(litellm))
-    unwrap("litellm", "acompletion", traced_acompletion(litellm))
-    unwrap("litellm", "text_completion", traced_completion(litellm))
-    unwrap("litellm", "atext_completion", traced_acompletion(litellm))
+    unwrap(litellm, "completion")
+    unwrap(litellm, "acompletion")
+    unwrap(litellm, "text_completion")
+    unwrap(litellm, "atext_completion")
 
     delattr(litellm, "_datadog_integration")
\ No newline at end of file
diff --git a/riotfile.py b/riotfile.py
index de16e76ed3e..5fcf5d52097 100644
--- a/riotfile.py
+++ b/riotfile.py
@@ -2576,6 +2576,16 @@ def select_pys(min_version: str = MIN_PYTHON_VERSION, max_version: str = MAX_PYT
                 "langgraph": "~=0.2.60",
             },
         ),
+        Venv(
+            name="litellm",
+            command="pytest {cmdargs} tests/contrib/litellm",
+            pys=select_pys(min_version="3.9", max_version="3.12"),
+            pkgs={
+                "litellm": latest,
+                "vcrpy": latest,
+                "pytest-asyncio": latest,
+            },
+        ),
         Venv(
             name="anthropic",
             command="pytest {cmdargs} tests/contrib/anthropic",
diff --git a/tests/contrib/litellm/__init__.py b/tests/contrib/litellm/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/contrib/litellm/cassettes/acompletion.yaml b/tests/contrib/litellm/cassettes/acompletion.yaml
new file mode 100644
index 00000000000..1ef458cb6cf
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/acompletion.yaml
@@ -0,0 +1,106 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo"}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '83'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jFLBahsxEL3vV0x16cU2cew4jS+B5NBATaEk0EIJiyzN7irVaoQ0cmuC
+        /71o7XjXbQK56DBv3tN7M/NcAAijxRKEaiSr1tvxzW2636x89bCd39+mL9/iQ9h8Xq8WP1Z3qhKj
+        zKD1Eyp+YU0Utd4iG3J7WAWUjFl1ejk/n08Xi09XHdCSRptptefxbHIx5hTWND6bnl8cmA0ZhVEs
+        4WcBAPDcvdmj0/hHLOFs9FJpMUZZo1gemwBEIJsrQsZoIkvHYtSDihyj62zf4Ra4wYAf4CsxtEk1
+        I3hKkaGRrjauBko8ge+N5I8RkoffhhvYUroeCgasUpQ5kEvWDgDpHLHMA+miPB6Q3dG8pdoHWsd/
+        qKIyzsSmDCgjuWw0MnnRobsC4LEbUjrJLXyg1nPJ9Au776azvZzo1zIALw8gE0vb12eHwZ6qlRpZ
+        GhsHQxZKqgZ1z+w3IpM2NACKQeb/zbymvc9tXP0e+R5QCj2jLn1AbdRp4L4tYD7at9qOM+4Mi4hh
+        YxSWbDDkPWisZLL7cxJxGxnbsjKuxuCD6W4q77HYFX8BAAD//wMAtRqFIFIDAAA=
+    headers:
+      CF-RAY:
+      - 922fc592ca8ee61b-IAD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 19 Mar 2025 20:38:09 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=Og1MhUPOSLUYFX6sLOZjXUt6_Ii7DeHec6bu0xEwveU-1742416689-1.0.1.1-24FPrr2zTb6eb.iYCyCZ5tyNDAaMwFjbJQ6MuU6ZZfarYDu945JdhKsS.h0Vc5bvUu4cEVkN072A15WuJ.KstgoBD.hgm.Owir7t6Mfrs4A;
+        path=/; expires=Wed, 19-Mar-25 21:08:09 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=igKx8CPA0FCh5KeOwdnLy585rLuR2kKt7gGASW5nWSA-1742416689508-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '312'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999978'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_c35f7392746227673bdca3d80b74722e
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices
new file mode 100644
index 00000000000..69fe82a0cc4
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices
@@ -0,0 +1,107 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '89'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA8RTwW7bMAy9+ysInZMgTho0y2VYOwwdMHSnYYehMBSJsdXKoipRa7Oi/z7ITmNn
+        3YBdhl184ON7fnykngoAYbTYgFCNZNV6O724THc/vnxaf31/9eH68l7rz/PHhb+450TvSjHJDNre
+        ouIX1kxR6y2yIdfDKqBkzKrl+dnirDxfz990QEsababVnqfL2WrKKWxpOi8XqwOzIaMwig18KwAA
+        nrpv9ug0PooNzCcvlRZjlDWKzbEJQASyuSJkjCaydCwmA6jIMbrO9jUxtEk1E7hNkaHBgMAEPQ32
+        lGZwRQ+gpIOP0KD1uQZMWu7fjiUD7lKUeSSXrB0B0jlimSPphrk5IM9H+5ZqH2gbf6GKnXEmNlVA
+        Gcllq5HJi2JEfpVJ+U8zOU7/YLgB6fbcGFd3FYeox0EN8f33qAqAm+6i0kkgwgdqPVdMd9j9rlz2
+        cmK44QFcrg8gE0s71FeHxE/VKo0sjY2j9IWSqkE9MIfzlUkbGgHjDb828zvtfm7j6r+RHwCl0DPq
+        ygfURp0OPLQFzC/8T23HjDvDImL4bhRWbDDkPWjcyWT7OxNxHxnbamdcjcEH0x1b3mPxXPwEAAD/
+        /wMAj6bv838EAAA=
+    headers:
+      CF-RAY:
+      - 922fe0ef9f095818-IAD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 19 Mar 2025 20:56:50 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=aPjfuM9i_nmKcJm5fCd1envIdCwOQVscUCICrBTkJYU-1742417810-1.0.1.1-ADp9ZNwLfBcVhfTOqZ9bjV6taFMKE6YhPfsBJvKDzRAsgzcYNT1xHhcwjuBPt.NhHPZYGalO3QieMp1UU1gMwYJMN1JlwKpDJS5CoYVFdps;
+        path=/; expires=Wed, 19-Mar-25 21:26:50 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=vDmdz8TD9L7irSgxD2Y3qy54hE9HNQW7QTl3s0jxFGk-1742417810365-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '404'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999962'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 1ms
+      x-request-id:
+      - req_a2320c1466e6e6e7e4ca227666ddef05
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/acompletion_stream.yaml b/tests/contrib/litellm/cassettes/acompletion_stream.yaml
new file mode 100644
index 00000000000..eaa66f573f1
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/acompletion_stream.yaml
@@ -0,0 +1,145 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '137'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        much"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        how"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        can"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        I"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        today"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":11,"total_tokens":24,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 922fc595fb4f9c5e-IAD
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Wed, 19 Mar 2025 20:38:09 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=qJIrfX0cSgQGfLPmiIO9pJtmBk4d0TDxYojb3KwrOAQ-1742416689-1.0.1.1-H5.hTje2ckYwcrDNDdl7MVOpEbekK8vcdWlkX69z8CQtZFgoHg3xBQ2p0ijtOfOZoSsO.dkOlaQsLHQLTsnsPz5Ku2XJpBJx48ai9xyu_b4;
+        path=/; expires=Wed, 19-Mar-25 21:08:09 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=L6iVhGFB5poEf4mgyHgsiQer_LAFpmPwx7BsZksUMxA-1742416689952-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '174'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999979'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_7d0818d359483bfb4ae5dd041857b220
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/acompletion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/acompletion_stream.yaml_multiple_choices
new file mode 100644
index 00000000000..b3f1528ee49
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/acompletion_stream.yaml_multiple_choices
@@ -0,0 +1,258 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '143'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        much"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        much"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        just"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        just"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        here"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        here"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        ready"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        to"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        to"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        help"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        help"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        and"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        with"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        chat"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        anything"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        need"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        can"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        I"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        can"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        I"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        today"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        today"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":40,"total_tokens":53,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 922fe0e1892df27e-IAD
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Wed, 19 Mar 2025 20:56:49 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=_sDHtJttvjcmNqUWyQLC0HY_6ceDdhabvgxP_mSWetQ-1742417809-1.0.1.1-CqlTg5EIrNFOnvIRAcesGqLwPwg3FZ18khnoA0HR26ZkfsHWDW2u.nJYbbUMztUsr2FmgqcE_dOzuuEF.u5QN04xVbjgSkJ9zBXVj1Y5Ei0;
+        path=/; expires=Wed, 19-Mar-25 21:26:49 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=3J8OnKlA8uybv8hIzboB47mhL0FVEQahZhquEcDPxcM-1742417809367-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '329'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999963'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 1ms
+      x-request-id:
+      - req_95fb491365244b0b18b9daf1bc26cf5f
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/atext_completion.yaml b/tests/contrib/litellm/cassettes/atext_completion.yaml
new file mode 100644
index 00000000000..dd13fc8e98e
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/atext_completion.yaml
@@ -0,0 +1,106 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo"}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '78'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jFLBTuMwEL3nKwafKWq6LYheVoJL97x7Qyhy7Uni4ngse7K0Qv13ZKc0
+        6QLSXnyYN+/5vZl5KwCE0WINQrWSVeft7OGx//26n2+Wrlwc9n/cQ1PXZFd3tGp2G3GdGLTdoeIP
+        1o2izltkQ26AVUDJmFTLu+ViWd7e3s8z0JFGm2iN59mPm9WM+7Cl2bxcrE7MlozCKNbwVAAAvOU3
+        eXQa92INWSdXOoxRNijW5yYAEcimipAxmsjSsbgeQUWO0WXbG7SWgFsMeAUbegUlHfyCgQYH6oFJ
+        y8PPKT1g3UeZ7Lve2gkgnSOWKX42/nxCjmerlhofaBv/oYraOBPbKqCM5JKtyORFRo8FwHMeSX+R
+        UvhAneeK6QXzd/eDmhh3MGJleQKZWNqxvjhN8VKs0sjS2DiZqFBStahH5jh+2WtDE6CYRP5s5ivt
+        IbZxzf/Ij4BS6Bl15QNqoy4Dj20B04V+13YecTYsIoa/RmHFBkNag8Za9na4HREPkbGrauMaDD6Y
+        fEBpjcWxeAcAAP//AwBZaYouPwMAAA==
+    headers:
+      CF-RAY:
+      - 922fc59a1ac781f9-IAD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 19 Mar 2025 20:38:10 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=Bz9ucbRhHir1yYpu5unXOWIq57sqhNmDYXwU8KYxhHk-1742416690-1.0.1.1-SaoVbDJEo.BTnLOYloqvADWkOFfKvIPkYdRataztswTUOadi9nBdpnxOrxqBTCDld5JP_w__0pINDjdJi4sSxdlMlK3f0SG9r54Vxu19sPQ;
+        path=/; expires=Wed, 19-Mar-25 21:08:10 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=CocuhkG0h7KH_xyamzsz3.bJ.3F_Hbx3vpGagUoqjNk-1742416690671-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '292'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999979'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_a2a6419c8a6accc5e78eef471a5b26f5
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices
new file mode 100644
index 00000000000..7c33ea81265
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices
@@ -0,0 +1,106 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '84'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA9xTsW7bMBDd9RXXm+0gUmIk8RK0XVIU6dAOHYpAoMmzzJTiMeSpqRHk3wtKtiUn
+        LdA5C4d7947v8R6fCgC0BpeAeqNEt8HNP3zsXPX965VpzuRRPXxZPNxe3r6vWrr/LN9wlhm8uict
+        e9aJ5jY4Est+gHUkJZSnlhfn1Xl5cVlWPdCyIZdpTZD52cliLl1c8fy0rBY75oatpoRL+FEAADz1
+        Z9boDf3GJZzO9pWWUlIN4fLQBICRXa6gSskmUV5wNoKavZDvZd+Qc/wObvgRtPLwCQYCbLkDYaO2
+        11NipHWXVBbuO+cmgPKeRWXjveS7HfJ8EOm4CZFX6QUV19bbtKkjqcQ+C0rCAYsJ+ZXz8u07LwDu
+        +hh0R/4wRG6D1MI/qb/uapiGY+5GrNolBIVFuUl9TzoaVhsSZV2avCVqpTdkRuYYOdUZyxNguq/X
+        Yv42e7BtffM/40dAawpCpg6RjNXHhse2SPlX/qvt8MS9YEwUf1lNtViKeQ2G1qpzQ2owbZNQW6+t
+        byiGaPvo5DUWz8UfAAAA//8DALgzECozBAAA
+    headers:
+      CF-RAY:
+      - 922fe0fe5c567048-IAD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 19 Mar 2025 20:56:52 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=gCkAh45BSGsfWua_FvRAhLkamJr4XgRd0lcnofceYBY-1742417812-1.0.1.1-_Ift61wmuekINzK5SeNl.ZXlL8hJS1voTYf6n8_6aUhDSUOfy.a2z5vgSwlHYq9IvWGj3LuIw32DO0HcDW_yICnwNQSWKsmr9e1.SAjWwMY;
+        path=/; expires=Wed, 19-Mar-25 21:26:52 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=QA9rcm0xWUBveoAGWFSSbnWbR50iE8_T0TS2HZr70GQ-1742417812613-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '284'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999964'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 1ms
+      x-request-id:
+      - req_4f430f54186e98254720edf4049f781c
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/atext_completion_stream.yaml b/tests/contrib/litellm/cassettes/atext_completion_stream.yaml
new file mode 100644
index 00000000000..bfb6b17bb6d
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/atext_completion_stream.yaml
@@ -0,0 +1,141 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '132'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        can"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        I"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        today"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":10,"total_tokens":19,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 922fd11f3abc6fd4-IAD
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Wed, 19 Mar 2025 20:46:02 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=V_JLXb_xlWAgshE0jP2yNY0KBReG3a8K9k.1.dtFMCg-1742417162-1.0.1.1-FZF9YKksh6mPiowW_hJcxYsQgyc8V.sjUl892Qq3mA7LD_3uGUbH7U.DjmnY8HxjecXeWIVp3wTlLleq10jNmS8WvmrJg76.LaSNBV6tQ4U;
+        path=/; expires=Wed, 19-Mar-25 21:16:02 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=WqtIXB3UQHUJ15_13eNn1X9VBnzvnBe8zPBAhTHs9K0-1742417162393-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '163'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999979'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_e2fdb82e16db2663c51b4ae5540e74e5
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices
new file mode 100644
index 00000000000..7980c0ce553
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices
@@ -0,0 +1,180 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '138'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        can"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        can"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        I"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        I"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        today"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        today"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":20,"total_tokens":29,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 922fe0faab2cc9bb-IAD
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Wed, 19 Mar 2025 20:56:51 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=T4a2h0oJlCPxhohnl5Y8ce47sMTKxTSiMx4YnMHdCeE-1742417811-1.0.1.1-7h_MyNOSh.23MJSDS07CHWQSCd54y1UXY03vB9MV_upmjlqus6.JWajf9T9VvkjWxvSy_46nZsEU.neeA_2Ok7EBScbstRENB_le3WcBoCs;
+        path=/; expires=Wed, 19-Mar-25 21:26:51 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=s1wC2SePnrWoNLlq9w_9HSceo63N6ZY6bl87NsDSz7U-1742417811921-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '183'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999964'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 1ms
+      x-request-id:
+      - req_947acd4ad0207061c681399bb70031ac
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion.yaml b/tests/contrib/litellm/cassettes/completion.yaml
new file mode 100644
index 00000000000..8b1c5e95ffb
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion.yaml
@@ -0,0 +1,107 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo"}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '83'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jJJNb9swDIbv/hWEzknQfGFbLgPSAUUvvXTDDkNhKDJjq5FFRaTaBUX+
+        +yAnjd21A3bRgQ9fii/JlwJA2UqtQJlGi2mDG6+v080u/Vi399c3e6zx2634u/XefN88870aZQVt
+        HtHIq2piqA0OxZI/YRNRC+aq00+L2WK6/DJfdKClCl2W1UHG88lyLCluaHw1nS3PyoasQVYr+FUA
+        ALx0b+7RV/hbreBq9BppkVnXqFaXJAAVyeWI0syWRXtRox4a8oK+a/uOBNpkmhE8JhZoMCIIwUkG
+        B0rwbKUB7Q+wT8jZGgNFEM077nirD9DoJ5zAz0YLGO3hFhp04aL+Ovw74jaxzt59cm4AtPckuvsg
+        u344k+PFp6M6RNrwX1K1td5yU0bUTD57YqGgOnosAB66eaY3I1IhUhukFNph9910fiqn+g32cLY8
+        QyHRro/PP48+qFZWKNo6HuxDGW0arHplvzydKksDUAw8v2/mo9on39bX/1O+B8ZgEKzKELGy5q3h
+        Pi1ivu9/pV1m3DWsGOOTNViKxZj3UOFWJ3e6PMUHFmzLrfU1xhBtd355j8Wx+AMAAP//AwCIlZxL
+        fQMAAA==
+    headers:
+      CF-RAY:
+      - 922fb3249bf7e5c3-IAD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 19 Mar 2025 20:25:34 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=j1UnG6U2N_m2w7_ZZs_IcvkLv3zZHG2XAZJvgnwKIX4-1742415934-1.0.1.1-3q9GhK0Y_WF039ZDEh.fvsi1rJCJJCq6hn15xodxGjAHi3Ard0mxXU2Ae709hsZPlid0d8teeWknL0oS5TuPPsBimWG1qT6LRUFmo1J3pTU;
+        path=/; expires=Wed, 19-Mar-25 20:55:34 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=YonKtY9iy9G7TG4c1XY31ZGLtQkwk9WEsAUQm55fiHs-1742415934844-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '536'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999978'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_831c50d74f79d3c6f55b46c6165ad726
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/completion.yaml_multiple_choices
new file mode 100644
index 00000000000..3284d55e07f
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion.yaml_multiple_choices
@@ -0,0 +1,107 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '89'
+      content-type:
+      - application/json
+      cookie:
+      - _cfuvid=YonKtY9iy9G7TG4c1XY31ZGLtQkwk9WEsAUQm55fiHs-1742415934844-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA8RTTWsbMRC9768YdLaDHTs49aU0oV+UFkpvKWGRpfGuYq1GSKMkJuS/F2kd76ZJ
+        oZfSyx7mzZt9783ooQIQRos1CNVKVp2304vLtLs/t1fvLr/srn7suu9f31y8d/Zjsh/wVkwygzY3
+        qPiJdaKo8xbZkOthFVAy5qnz1fJ0OV+dz1YF6EijzbTG83RxcjblFDY0nc1Pzw7MlozCKNbwswIA
+        eCjfrNFpvBdrmE2eKh3GKBsU62MTgAhkc0XIGE1k6VhMBlCRY3RF9jdi6JJqJ3CTIkOLAYEJihv4
+        RHcgA8Ke0tsxP+A2RZn1u2TtCJDOEcvsvyi/PiCPR62WGh9oE3+jiq1xJrZ1QBnJZV2RyYtqRH4R
+        wPyfBtDTsnO4M9yCdHtujWtKxSHqPh0lHXyGFq0vAJOW+/8YVQVwXc4nPQtE+ECd55pph+V380U/
+        TgwHO4CL5QFkYmmH+nI1eWVarZGlsXGUvlBStagH5nCrMmlDI2C84ZdiXpvd+zau+ZvxA6AUekZd
+        +4DaqOeGh7aA+Tn/qe2YcREsIoZbo7BmgyHvQeNWJtvfmYj7yNjVW+MaDD6Ycmx5j9Vj9QsAAP//
+        AwBjQR3ubAQAAA==
+    headers:
+      CF-RAY:
+      - 922fe0dd3980081e-IAD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 19 Mar 2025 20:56:47 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=4IvKUKDYbguDfkct3LJSPjqtZQXESBNXlj0FYX2EhDw-1742417807-1.0.1.1-NheqJGPsBtnZt86lvpEQ399jpX9C0.Meer7zqTrBFvtM1nDS.F2nb3Am2CumeUA9gl3hKjHDRDn2VRJSIEJL1F4Ki3Doz2f86LPah_teN_M;
+        path=/; expires=Wed, 19-Mar-25 21:26:47 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '429'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999963'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 1ms
+      x-request-id:
+      - req_039283032c4707703c924619286ae6b1
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_stream.yaml b/tests/contrib/litellm/cassettes/completion_stream.yaml
new file mode 100644
index 00000000000..8b9ee2258a0
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_stream.yaml
@@ -0,0 +1,181 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '137'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=j1UnG6U2N_m2w7_ZZs_IcvkLv3zZHG2XAZJvgnwKIX4-1742415934-1.0.1.1-3q9GhK0Y_WF039ZDEh.fvsi1rJCJJCq6hn15xodxGjAHi3Ard0mxXU2Ae709hsZPlid0d8teeWknL0oS5TuPPsBimWG1qT6LRUFmo1J3pTU;
+        _cfuvid=YonKtY9iy9G7TG4c1XY31ZGLtQkwk9WEsAUQm55fiHs-1742415934844-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        much"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        just"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        here"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        to"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        with"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        anything"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        need"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        can"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        I"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        help"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        today"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":21,"total_tokens":34,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 922fbf33c84f05d4-IAD
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Wed, 19 Mar 2025 20:33:49 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '484'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999978'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_ed79ffc55abcad13d82ad45a85b50cef
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices
new file mode 100644
index 00000000000..71352afe403
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices
@@ -0,0 +1,234 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '143'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        much"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        much"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        just"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        just"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        here"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        here"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        to"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        to"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        chat"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        help"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        with"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        anything"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        about"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        might"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        need"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        can"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        I"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        today"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":34,"total_tokens":47,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 922fe0d84bc32081-IAD
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Wed, 19 Mar 2025 20:56:46 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=bUH6Gw.1xWGTdrtJJp3Kd1QwQu.citY3dSw84SuP9a8-1742417806-1.0.1.1-nd3_tPgN5caA927YXL7MDwbkwDcsY2.cOUvLdkUaaYxi7UqUPwCwwGgjDsSpkg1AHFx7aR.wS8GKU2eBr2aujsFMmkWLmL_ohd4qBtE6K84;
+        path=/; expires=Wed, 19-Mar-25 21:26:46 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=spcOdmzqi_GTX7NW.qMkPKuD0G7qOz1ab6PGnOf_m_s-1742417806409-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '172'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999962'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 1ms
+      x-request-id:
+      - req_68b16a7f5aec94206951718ec91e4166
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/text_completion.yaml b/tests/contrib/litellm/cassettes/text_completion.yaml
new file mode 100644
index 00000000000..80fd89bcb39
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/text_completion.yaml
@@ -0,0 +1,106 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo"}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '78'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jFLLbtswELzrK7Z7tgM7jtvGlwINiqSPQ9Fji0CgyZXEhOIS5KqtEfjf
+        C0q2JbcNkIsOOzujmeE+FQBoDW4AdaNEt8HN3990tw/fFu6Tflvx6sutbR4/7D5///q6dTcOZ5nB
+        2wfScmRdaG6DI7HsB1hHUkJZdfnm6vJqub5erXqgZUMu0+og89XFei5d3PJ8sbxcH5gNW00JN/Cj
+        AAB46r/Zozf0GzewmB0nLaWkasLNaQkAI7s8QZWSTaK84GwENXsh39u+I+f4FdzxL9DKw0cYCLDj
+        DoSN2r2bEiNVXVLZuO+cmwDKexaVg/eW7w/I/mTScR0ib9NfVKyst6kpI6nEPhtKwgF7dF8A3Pdl
+        dGf5MERug5TCj9T/7npQw7H9EVseekJhUW4yP5LOxEpDoqxLky5RK92QGZlj8aozlidAMYn8r5n/
+        aQ+xra9fIj8CWlMQMmWIZKw+DzyuRcq3+dzaqeLeMCaKP62mUizF/AyGKtW54Wow7ZJQW1bW1xRD
+        tP3p5Gcs9sUfAAAA//8DAEy0bTM5AwAA
+    headers:
+      CF-RAY:
+      - 922fb31c6b4a3b86-IAD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 19 Mar 2025 20:25:33 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=OVR0c7pmjNlvkgITOy_5zxiGhdaeoh2rYi0sMIwKUGw-1742415933-1.0.1.1-9ldeCN1Z0Gzz63GmLhFMPkykl_aiDMZHh9jdn_aB8Mwaq8j8c3UX0EJL_RDMTdRgRFAjD7RDiSRhuM45kkZ8yvViyheANvDky_wdNXC7pmg;
+        path=/; expires=Wed, 19-Mar-25 20:55:33 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=pYi67r3mdZAGs2TdjYlNetH8he_L8lXNKD2EhBsZw4s-1742415933301-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '310'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999980'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_f68897fc201c7e0cbeac660a95b4e136
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices
new file mode 100644
index 00000000000..743d1a085e2
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices
@@ -0,0 +1,106 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '84'
+      content-type:
+      - application/json
+      cookie:
+      - _cfuvid=pYi67r3mdZAGs2TdjYlNetH8he_L8lXNKD2EhBsZw4s-1742415933301-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA9xTwW4TMRC971cMc06qbGjUNhdEikSBWw9cULVy7Mmuweux7FlKqPrvyLtJdkNB
+        4szFh3nzxu95np8KALQG14C6UaLb4Oab286VPzcf79/fNtvN4nH5rnt7f/1Z6NMqRJxlBm+/kpYj
+        60JzGxyJZT/AOpISylPLq8vlZXl1XZY90LIhl2l1kPnri9Vcurjl+aJcrg7Mhq2mhGv4UgAAPPVn
+        1ugN/cA1LGbHSkspqZpwfWoCwMguV1ClZJMoLzgbQc1eyPey78g5fgV3/AhaefgAAwH23IGwUfs3
+        U2KkXZdUFu475yaA8p5FZeO95IcD8nwS6bgOkbfpNyrurLepqSKpxD4LSsIBiwn5hfPy/3deADz0
+        MejO/GGI3AaphL9Rf93NMA3H3I3Y8pAQFBblJvUj6WxYZUiUdWnylqiVbsiMzDFyqjOWJ8B0Xy/F
+        /Gn2YNv6+l/Gj4DWFIRMFSIZq88Nj22R8q/8W9vpiXvBmCh+t5oqsRTzGgztVOeG1GDaJ6G22llf
+        UwzR9tHJayyei18AAAD//wMABa0m/DMEAAA=
+    headers:
+      CF-RAY:
+      - 922fe0f67e45081e-IAD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 19 Mar 2025 20:56:51 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=FlHOj93lCOi0G8vOplzZPeLSW3Vdv0Hb2HoLy7MM6Us-1742417811-1.0.1.1-kahp3aLVlFkNG2jXCdMC.nUEAogyYTUDKmeLcLdMShR8EnclldclSdDIIWq6EgH9RGnwBGy5.NfkQ9REHjpAGzXjaNfX3IBb1LAF6cQGWOE;
+        path=/; expires=Wed, 19-Mar-25 21:26:51 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '347'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999963'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 1ms
+      x-request-id:
+      - req_0a1d4fa700dec9fa2f39b6a53af4d9c2
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/text_completion_stream.yaml b/tests/contrib/litellm/cassettes/text_completion_stream.yaml
new file mode 100644
index 00000000000..bc9e6f2efc4
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/text_completion_stream.yaml
@@ -0,0 +1,133 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '132'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        are"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        today"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":8,"total_tokens":17,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 922fd11b9ae3c971-IAD
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Wed, 19 Mar 2025 20:46:01 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=burGHM6VfMUi7tY4F9HPOZGLPGxfqoQv0QDudHYBzAI-1742417161-1.0.1.1-MC78dEOJbnnU73JGAYQb93S4ex3NPLLM70pH_.FPPXa1FeTqG9rlyPAZyx58mF.Z5aEhuPWTN45xS8eCJsKzuP20WOjvGlJAgW166y3SdTs;
+        path=/; expires=Wed, 19-Mar-25 21:16:01 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=ReP56wFcIwz4J_wQ9KjzLau5vHIddwYVpMetFMYh57w-1742417161882-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '226'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999980'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_6c0123908864d876c53702ce7507e69e
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices
new file mode 100644
index 00000000000..62b0b934b66
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices
@@ -0,0 +1,177 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '138'
+      content-type:
+      - application/json
+      cookie:
+      - _cfuvid=ReP56wFcIwz4J_wQ9KjzLau5vHIddwYVpMetFMYh57w-1742417161882-0.0.1.1-604800000;
+        __cf_bm=burGHM6VfMUi7tY4F9HPOZGLPGxfqoQv0QDudHYBzAI-1742417161-1.0.1.1-MC78dEOJbnnU73JGAYQb93S4ex3NPLLM70pH_.FPPXa1FeTqG9rlyPAZyx58mF.Z5aEhuPWTN45xS8eCJsKzuP20WOjvGlJAgW166y3SdTs
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.66.5
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.5
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        can"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        can"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        I"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        I"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        today"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        today"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":20,"total_tokens":29,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 922fe0f349d2081e-IAD
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Wed, 19 Mar 2025 20:56:50 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '172'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999963'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 1ms
+      x-request-id:
+      - req_775dd461f5f264e40aeb0c5ab23fe071
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
new file mode 100644
index 00000000000..590b1986652
--- /dev/null
+++ b/tests/contrib/litellm/conftest.py
@@ -0,0 +1,57 @@
+import os
+from typing import Generator
+
+import pytest
+from ddtrace.contrib.internal.litellm.patch import patch
+from ddtrace.trace import Pin
+from ddtrace.contrib.internal.litellm.patch import unpatch
+from tests.utils import DummyTracer
+from tests.utils import DummyWriter
+from tests.utils import override_config
+from tests.utils import override_env
+from tests.utils import override_global_config
+from tests.contrib.litellm.utils import get_request_vcr
+
+def default_global_config():
+    return {}
+
+
+@pytest.fixture
+def ddtrace_global_config():
+    return {}
+
+@pytest.fixture
+def ddtrace_config_litellm():
+    return {}
+
+
+@pytest.fixture
+def litellm(ddtrace_global_config, ddtrace_config_litellm):
+    global_config = default_global_config()
+    global_config.update(ddtrace_global_config)
+    with override_global_config(global_config):
+        with override_config("litellm", ddtrace_config_litellm):
+            with override_env(
+                dict(
+                    OPENAI=os.getenv("OPENAI_API_KEY", "<not-a-real-key>"),
+                )
+            ):
+                patch()
+                import litellm
+
+                yield litellm
+                unpatch()
+
+
+@pytest.fixture
+def mock_tracer(litellm):
+    pin = Pin.get_from(litellm)
+    mock_tracer = DummyTracer(writer=DummyWriter(trace_flush_enabled=False))
+    pin._override(litellm, tracer=mock_tracer)
+    pin.tracer._configure()
+    yield mock_tracer
+
+
+@pytest.fixture
+def request_vcr():
+    return get_request_vcr()
diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py
new file mode 100644
index 00000000000..f3ce38c872f
--- /dev/null
+++ b/tests/contrib/litellm/test_litellm.py
@@ -0,0 +1,55 @@
+import pytest
+
+@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
+@pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
+def test_litellm_completion(litellm, request_vcr, stream, n):
+    cassette = "completion.yaml" if not stream else "completion_stream.yaml"
+    choice_suffix = "_multiple_choices" if n > 1 else ""
+    with request_vcr.use_cassette(cassette + choice_suffix):
+        messages = [{ "content": "Hey, what is up?","role": "user"}]
+        litellm.completion(
+            model="gpt-3.5-turbo",
+            messages=messages,
+            stream=stream,
+            n=n,
+        )
+
+@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
+@pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
+async def test_litellm_acompletion(litellm, request_vcr, stream, n):
+    cassette = "acompletion.yaml" if not stream else "acompletion_stream.yaml"
+    choice_suffix = "_multiple_choices" if n > 1 else ""
+    with request_vcr.use_cassette(cassette + choice_suffix):
+        messages = [{ "content": "Hey, what is up?","role": "user"}]
+        await litellm.acompletion(
+            model="gpt-3.5-turbo",
+            messages=messages,
+            stream=stream,
+            n=n,
+        )
+
+@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) 
+@pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
+def test_litellm_text_completion(litellm, request_vcr, stream, n):
+    cassette = "text_completion.yaml" if not stream else "text_completion_stream.yaml"
+    choice_suffix = "_multiple_choices" if n > 1 else ""
+    with request_vcr.use_cassette(cassette + choice_suffix):
+        litellm.text_completion(
+            model="gpt-3.5-turbo",
+            prompt="Hello world",
+            stream=stream,
+            n=n,
+        )
+
+@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])  
+@pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
+async def test_litellm_atext_completion(litellm, request_vcr, stream, n):
+    cassette = "atext_completion.yaml" if not stream else "atext_completion_stream.yaml"
+    choice_suffix = "_multiple_choices" if n > 1 else ""
+    with request_vcr.use_cassette(cassette + choice_suffix):
+        await litellm.atext_completion(
+            model="gpt-3.5-turbo",
+            prompt="Hello world",
+            stream=stream,
+            n=n,
+        )
diff --git a/tests/contrib/litellm/test_litellm_patch.py b/tests/contrib/litellm/test_litellm_patch.py
new file mode 100644
index 00000000000..4946aeda49d
--- /dev/null
+++ b/tests/contrib/litellm/test_litellm_patch.py
@@ -0,0 +1,30 @@
+from ddtrace.contrib.internal.litellm.patch import get_version
+from ddtrace.contrib.internal.litellm.patch import patch
+from ddtrace.contrib.internal.litellm.patch import unpatch
+from tests.contrib.patch import PatchTestCase
+
+
+class TestLitellmPatch(PatchTestCase.Base):
+    __integration_name__ = "litellm"
+    __module_name__ = "litellm"
+    __patch_func__ = patch
+    __unpatch_func__ = unpatch
+    __get_version__ = get_version
+
+    def assert_module_patched(self, litellm):
+        self.assert_wrapped(litellm.completion)
+        self.assert_wrapped(litellm.acompletion)
+        self.assert_wrapped(litellm.text_completion)
+        self.assert_wrapped(litellm.atext_completion)
+
+    def assert_not_module_patched(self, litellm):
+        self.assert_not_wrapped(litellm.completion)
+        self.assert_not_wrapped(litellm.acompletion)
+        self.assert_not_wrapped(litellm.text_completion)
+        self.assert_not_wrapped(litellm.atext_completion)
+
+    def assert_not_module_double_patched(self, litellm):
+        self.assert_not_double_wrapped(litellm.completion)
+        self.assert_not_double_wrapped(litellm.acompletion)
+        self.assert_not_double_wrapped(litellm.text_completion)
+        self.assert_not_double_wrapped(litellm.atext_completion)
diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py
new file mode 100644
index 00000000000..8b140d6cd0c
--- /dev/null
+++ b/tests/contrib/litellm/utils.py
@@ -0,0 +1,15 @@
+import vcr
+import os
+
+# VCR is used to capture and store network requests made to Anthropic.
+# This is done to avoid making real calls to the API which could introduce
+# flakiness and cost.
+def get_request_vcr():
+    return vcr.VCR(
+        cassette_library_dir=os.path.join(os.path.dirname(__file__), "cassettes"),
+        record_mode="once",
+        match_on=["path"],
+        filter_headers=["authorization", "x-api-key", "api-key"],
+        # Ignore requests to the agent
+        ignore_localhost=True,
+    )
\ No newline at end of file
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json
new file mode 100644
index 00000000000..fc0b82fadf8
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json
@@ -0,0 +1,28 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.completion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67db283e00000000",
+      "language": "python",
+      "litellm.request.model": "gpt-3.5-turbo",
+      "litellm.request.provider": "",
+      "runtime-id": "f4fa019846d24cc9a50c88d550a339dd"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 18466
+    },
+    "duration": 737978000,
+    "start": 1742415934103250000
+  }]]

From 7e43133d642cd2ace84da7037b7a185db0d7c4df Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 20 Mar 2025 09:26:30 -0400
Subject: [PATCH 03/61] reuse cassettes that are similar across different
 litellm requests

---
 .../litellm/cassettes/acompletion.yaml        | 106 --------
 .../acompletion.yaml_multiple_choices         | 107 --------
 .../litellm/cassettes/acompletion_stream.yaml | 145 -----------
 .../litellm/cassettes/atext_completion.yaml   | 106 --------
 .../atext_completion.yaml_multiple_choices    | 106 --------
 .../cassettes/atext_completion_stream.yaml    | 141 -----------
 ...xt_completion_stream.yaml_multiple_choices | 180 --------------
 .../contrib/litellm/cassettes/completion.yaml |  41 ++-
 ...oices => completion_multiple_choices.yaml} |  37 ++-
 .../litellm/cassettes/completion_stream.yaml  |  85 ++++---
 .../completion_stream.yaml_multiple_choices   | 234 ------------------
 ...> completion_stream_multiple_choices.yaml} | 149 +++++------
 .../litellm/cassettes/text_completion.yaml    | 106 --------
 .../text_completion.yaml_multiple_choices     | 106 --------
 .../cassettes/text_completion_stream.yaml     | 133 ----------
 ...xt_completion_stream.yaml_multiple_choices | 177 -------------
 tests/contrib/litellm/test_litellm.py         |  18 +-
 tests/contrib/litellm/utils.py                |  12 +-
 18 files changed, 175 insertions(+), 1814 deletions(-)
 delete mode 100644 tests/contrib/litellm/cassettes/acompletion.yaml
 delete mode 100644 tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices
 delete mode 100644 tests/contrib/litellm/cassettes/acompletion_stream.yaml
 delete mode 100644 tests/contrib/litellm/cassettes/atext_completion.yaml
 delete mode 100644 tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices
 delete mode 100644 tests/contrib/litellm/cassettes/atext_completion_stream.yaml
 delete mode 100644 tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices
 rename tests/contrib/litellm/cassettes/{completion.yaml_multiple_choices => completion_multiple_choices.yaml} (60%)
 delete mode 100644 tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices
 rename tests/contrib/litellm/cassettes/{acompletion_stream.yaml_multiple_choices => completion_stream_multiple_choices.yaml} (55%)
 delete mode 100644 tests/contrib/litellm/cassettes/text_completion.yaml
 delete mode 100644 tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices
 delete mode 100644 tests/contrib/litellm/cassettes/text_completion_stream.yaml
 delete mode 100644 tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices

diff --git a/tests/contrib/litellm/cassettes/acompletion.yaml b/tests/contrib/litellm/cassettes/acompletion.yaml
deleted file mode 100644
index 1ef458cb6cf..00000000000
--- a/tests/contrib/litellm/cassettes/acompletion.yaml
+++ /dev/null
@@ -1,106 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo"}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '83'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - AsyncOpenAI/Python 1.66.5
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.66.5
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAAwAAAP//jFLBahsxEL3vV0x16cU2cew4jS+B5NBATaEk0EIJiyzN7irVaoQ0cmuC
-        /71o7XjXbQK56DBv3tN7M/NcAAijxRKEaiSr1tvxzW2636x89bCd39+mL9/iQ9h8Xq8WP1Z3qhKj
-        zKD1Eyp+YU0Utd4iG3J7WAWUjFl1ejk/n08Xi09XHdCSRptptefxbHIx5hTWND6bnl8cmA0ZhVEs
-        4WcBAPDcvdmj0/hHLOFs9FJpMUZZo1gemwBEIJsrQsZoIkvHYtSDihyj62zf4Ra4wYAf4CsxtEk1
-        I3hKkaGRrjauBko8ge+N5I8RkoffhhvYUroeCgasUpQ5kEvWDgDpHLHMA+miPB6Q3dG8pdoHWsd/
-        qKIyzsSmDCgjuWw0MnnRobsC4LEbUjrJLXyg1nPJ9Au776azvZzo1zIALw8gE0vb12eHwZ6qlRpZ
-        GhsHQxZKqgZ1z+w3IpM2NACKQeb/zbymvc9tXP0e+R5QCj2jLn1AbdRp4L4tYD7at9qOM+4Mi4hh
-        YxSWbDDkPWisZLL7cxJxGxnbsjKuxuCD6W4q77HYFX8BAAD//wMAtRqFIFIDAAA=
-    headers:
-      CF-RAY:
-      - 922fc592ca8ee61b-IAD
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 19 Mar 2025 20:38:09 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=Og1MhUPOSLUYFX6sLOZjXUt6_Ii7DeHec6bu0xEwveU-1742416689-1.0.1.1-24FPrr2zTb6eb.iYCyCZ5tyNDAaMwFjbJQ6MuU6ZZfarYDu945JdhKsS.h0Vc5bvUu4cEVkN072A15WuJ.KstgoBD.hgm.Owir7t6Mfrs4A;
-        path=/; expires=Wed, 19-Mar-25 21:08:09 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=igKx8CPA0FCh5KeOwdnLy585rLuR2kKt7gGASW5nWSA-1742416689508-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - datadog-4
-      openai-processing-ms:
-      - '312'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '15000'
-      x-ratelimit-limit-tokens:
-      - '2000000'
-      x-ratelimit-remaining-requests:
-      - '14999'
-      x-ratelimit-remaining-tokens:
-      - '1999978'
-      x-ratelimit-reset-requests:
-      - 4ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_c35f7392746227673bdca3d80b74722e
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices
deleted file mode 100644
index 69fe82a0cc4..00000000000
--- a/tests/contrib/litellm/cassettes/acompletion.yaml_multiple_choices
+++ /dev/null
@@ -1,107 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '89'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - AsyncOpenAI/Python 1.66.5
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.66.5
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA8RTwW7bMAy9+ysInZMgTho0y2VYOwwdMHSnYYehMBSJsdXKoipRa7Oi/z7ITmNn
-        3YBdhl184ON7fnykngoAYbTYgFCNZNV6O724THc/vnxaf31/9eH68l7rz/PHhb+450TvSjHJDNre
-        ouIX1kxR6y2yIdfDKqBkzKrl+dnirDxfz990QEsababVnqfL2WrKKWxpOi8XqwOzIaMwig18KwAA
-        nrpv9ug0PooNzCcvlRZjlDWKzbEJQASyuSJkjCaydCwmA6jIMbrO9jUxtEk1E7hNkaHBgMAEPQ32
-        lGZwRQ+gpIOP0KD1uQZMWu7fjiUD7lKUeSSXrB0B0jlimSPphrk5IM9H+5ZqH2gbf6GKnXEmNlVA
-        Gcllq5HJi2JEfpVJ+U8zOU7/YLgB6fbcGFd3FYeox0EN8f33qAqAm+6i0kkgwgdqPVdMd9j9rlz2
-        cmK44QFcrg8gE0s71FeHxE/VKo0sjY2j9IWSqkE9MIfzlUkbGgHjDb828zvtfm7j6r+RHwCl0DPq
-        ygfURp0OPLQFzC/8T23HjDvDImL4bhRWbDDkPWjcyWT7OxNxHxnbamdcjcEH0x1b3mPxXPwEAAD/
-        /wMAj6bv838EAAA=
-    headers:
-      CF-RAY:
-      - 922fe0ef9f095818-IAD
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 19 Mar 2025 20:56:50 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=aPjfuM9i_nmKcJm5fCd1envIdCwOQVscUCICrBTkJYU-1742417810-1.0.1.1-ADp9ZNwLfBcVhfTOqZ9bjV6taFMKE6YhPfsBJvKDzRAsgzcYNT1xHhcwjuBPt.NhHPZYGalO3QieMp1UU1gMwYJMN1JlwKpDJS5CoYVFdps;
-        path=/; expires=Wed, 19-Mar-25 21:26:50 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=vDmdz8TD9L7irSgxD2Y3qy54hE9HNQW7QTl3s0jxFGk-1742417810365-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - datadog-4
-      openai-processing-ms:
-      - '404'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '15000'
-      x-ratelimit-limit-tokens:
-      - '2000000'
-      x-ratelimit-remaining-requests:
-      - '14999'
-      x-ratelimit-remaining-tokens:
-      - '1999962'
-      x-ratelimit-reset-requests:
-      - 4ms
-      x-ratelimit-reset-tokens:
-      - 1ms
-      x-request-id:
-      - req_a2320c1466e6e6e7e4ca227666ddef05
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/acompletion_stream.yaml b/tests/contrib/litellm/cassettes/acompletion_stream.yaml
deleted file mode 100644
index eaa66f573f1..00000000000
--- a/tests/contrib/litellm/cassettes/acompletion_stream.yaml
+++ /dev/null
@@ -1,145 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '137'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - AsyncOpenAI/Python 1.66.5
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.66.5
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: 'data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        much"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        how"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        can"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        I"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        you"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        today"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuSvumkyUQZ8ufmBwnM4dGyAFHdw","object":"chat.completion.chunk","created":1742416689,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":11,"total_tokens":24,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
-
-
-        data: [DONE]
-
-
-        '
-    headers:
-      CF-RAY:
-      - 922fc595fb4f9c5e-IAD
-      Connection:
-      - keep-alive
-      Content-Type:
-      - text/event-stream; charset=utf-8
-      Date:
-      - Wed, 19 Mar 2025 20:38:09 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=qJIrfX0cSgQGfLPmiIO9pJtmBk4d0TDxYojb3KwrOAQ-1742416689-1.0.1.1-H5.hTje2ckYwcrDNDdl7MVOpEbekK8vcdWlkX69z8CQtZFgoHg3xBQ2p0ijtOfOZoSsO.dkOlaQsLHQLTsnsPz5Ku2XJpBJx48ai9xyu_b4;
-        path=/; expires=Wed, 19-Mar-25 21:08:09 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=L6iVhGFB5poEf4mgyHgsiQer_LAFpmPwx7BsZksUMxA-1742416689952-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - datadog-4
-      openai-processing-ms:
-      - '174'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '15000'
-      x-ratelimit-limit-tokens:
-      - '2000000'
-      x-ratelimit-remaining-requests:
-      - '14999'
-      x-ratelimit-remaining-tokens:
-      - '1999979'
-      x-ratelimit-reset-requests:
-      - 4ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_7d0818d359483bfb4ae5dd041857b220
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/atext_completion.yaml b/tests/contrib/litellm/cassettes/atext_completion.yaml
deleted file mode 100644
index dd13fc8e98e..00000000000
--- a/tests/contrib/litellm/cassettes/atext_completion.yaml
+++ /dev/null
@@ -1,106 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo"}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '78'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - AsyncOpenAI/Python 1.66.5
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.66.5
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAAwAAAP//jFLBTuMwEL3nKwafKWq6LYheVoJL97x7Qyhy7Uni4ngse7K0Qv13ZKc0
-        6QLSXnyYN+/5vZl5KwCE0WINQrWSVeft7OGx//26n2+Wrlwc9n/cQ1PXZFd3tGp2G3GdGLTdoeIP
-        1o2izltkQ26AVUDJmFTLu+ViWd7e3s8z0JFGm2iN59mPm9WM+7Cl2bxcrE7MlozCKNbwVAAAvOU3
-        eXQa92INWSdXOoxRNijW5yYAEcimipAxmsjSsbgeQUWO0WXbG7SWgFsMeAUbegUlHfyCgQYH6oFJ
-        y8PPKT1g3UeZ7Lve2gkgnSOWKX42/nxCjmerlhofaBv/oYraOBPbKqCM5JKtyORFRo8FwHMeSX+R
-        UvhAneeK6QXzd/eDmhh3MGJleQKZWNqxvjhN8VKs0sjS2DiZqFBStahH5jh+2WtDE6CYRP5s5ivt
-        IbZxzf/Ij4BS6Bl15QNqoy4Dj20B04V+13YecTYsIoa/RmHFBkNag8Za9na4HREPkbGrauMaDD6Y
-        fEBpjcWxeAcAAP//AwBZaYouPwMAAA==
-    headers:
-      CF-RAY:
-      - 922fc59a1ac781f9-IAD
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 19 Mar 2025 20:38:10 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=Bz9ucbRhHir1yYpu5unXOWIq57sqhNmDYXwU8KYxhHk-1742416690-1.0.1.1-SaoVbDJEo.BTnLOYloqvADWkOFfKvIPkYdRataztswTUOadi9nBdpnxOrxqBTCDld5JP_w__0pINDjdJi4sSxdlMlK3f0SG9r54Vxu19sPQ;
-        path=/; expires=Wed, 19-Mar-25 21:08:10 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=CocuhkG0h7KH_xyamzsz3.bJ.3F_Hbx3vpGagUoqjNk-1742416690671-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - datadog-4
-      openai-processing-ms:
-      - '292'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '15000'
-      x-ratelimit-limit-tokens:
-      - '2000000'
-      x-ratelimit-remaining-requests:
-      - '14999'
-      x-ratelimit-remaining-tokens:
-      - '1999979'
-      x-ratelimit-reset-requests:
-      - 4ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_a2a6419c8a6accc5e78eef471a5b26f5
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices
deleted file mode 100644
index 7c33ea81265..00000000000
--- a/tests/contrib/litellm/cassettes/atext_completion.yaml_multiple_choices
+++ /dev/null
@@ -1,106 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '84'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - AsyncOpenAI/Python 1.66.5
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.66.5
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA9xTsW7bMBDd9RXXm+0gUmIk8RK0XVIU6dAOHYpAoMmzzJTiMeSpqRHk3wtKtiUn
-        LdA5C4d7947v8R6fCgC0BpeAeqNEt8HNP3zsXPX965VpzuRRPXxZPNxe3r6vWrr/LN9wlhm8uict
-        e9aJ5jY4Est+gHUkJZSnlhfn1Xl5cVlWPdCyIZdpTZD52cliLl1c8fy0rBY75oatpoRL+FEAADz1
-        Z9boDf3GJZzO9pWWUlIN4fLQBICRXa6gSskmUV5wNoKavZDvZd+Qc/wObvgRtPLwCQYCbLkDYaO2
-        11NipHWXVBbuO+cmgPKeRWXjveS7HfJ8EOm4CZFX6QUV19bbtKkjqcQ+C0rCAYsJ+ZXz8u07LwDu
-        +hh0R/4wRG6D1MI/qb/uapiGY+5GrNolBIVFuUl9TzoaVhsSZV2avCVqpTdkRuYYOdUZyxNguq/X
-        Yv42e7BtffM/40dAawpCpg6RjNXHhse2SPlX/qvt8MS9YEwUf1lNtViKeQ2G1qpzQ2owbZNQW6+t
-        byiGaPvo5DUWz8UfAAAA//8DALgzECozBAAA
-    headers:
-      CF-RAY:
-      - 922fe0fe5c567048-IAD
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 19 Mar 2025 20:56:52 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=gCkAh45BSGsfWua_FvRAhLkamJr4XgRd0lcnofceYBY-1742417812-1.0.1.1-_Ift61wmuekINzK5SeNl.ZXlL8hJS1voTYf6n8_6aUhDSUOfy.a2z5vgSwlHYq9IvWGj3LuIw32DO0HcDW_yICnwNQSWKsmr9e1.SAjWwMY;
-        path=/; expires=Wed, 19-Mar-25 21:26:52 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=QA9rcm0xWUBveoAGWFSSbnWbR50iE8_T0TS2HZr70GQ-1742417812613-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - datadog-4
-      openai-processing-ms:
-      - '284'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '15000'
-      x-ratelimit-limit-tokens:
-      - '2000000'
-      x-ratelimit-remaining-requests:
-      - '14999'
-      x-ratelimit-remaining-tokens:
-      - '1999964'
-      x-ratelimit-reset-requests:
-      - 4ms
-      x-ratelimit-reset-tokens:
-      - 1ms
-      x-request-id:
-      - req_4f430f54186e98254720edf4049f781c
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/atext_completion_stream.yaml b/tests/contrib/litellm/cassettes/atext_completion_stream.yaml
deleted file mode 100644
index bfb6b17bb6d..00000000000
--- a/tests/contrib/litellm/cassettes/atext_completion_stream.yaml
+++ /dev/null
@@ -1,141 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '132'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - AsyncOpenAI/Python 1.66.5
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.66.5
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: 'data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        How"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        can"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        I"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        you"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        today"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaYIxuUbpqze85e3y4KW5N6pZGY","object":"chat.completion.chunk","created":1742417162,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":10,"total_tokens":19,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
-
-
-        data: [DONE]
-
-
-        '
-    headers:
-      CF-RAY:
-      - 922fd11f3abc6fd4-IAD
-      Connection:
-      - keep-alive
-      Content-Type:
-      - text/event-stream; charset=utf-8
-      Date:
-      - Wed, 19 Mar 2025 20:46:02 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=V_JLXb_xlWAgshE0jP2yNY0KBReG3a8K9k.1.dtFMCg-1742417162-1.0.1.1-FZF9YKksh6mPiowW_hJcxYsQgyc8V.sjUl892Qq3mA7LD_3uGUbH7U.DjmnY8HxjecXeWIVp3wTlLleq10jNmS8WvmrJg76.LaSNBV6tQ4U;
-        path=/; expires=Wed, 19-Mar-25 21:16:02 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=WqtIXB3UQHUJ15_13eNn1X9VBnzvnBe8zPBAhTHs9K0-1742417162393-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - datadog-4
-      openai-processing-ms:
-      - '163'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '15000'
-      x-ratelimit-limit-tokens:
-      - '2000000'
-      x-ratelimit-remaining-requests:
-      - '14999'
-      x-ratelimit-remaining-tokens:
-      - '1999979'
-      x-ratelimit-reset-requests:
-      - 4ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_e2fdb82e16db2663c51b4ae5540e74e5
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices
deleted file mode 100644
index 7980c0ce553..00000000000
--- a/tests/contrib/litellm/cassettes/atext_completion_stream.yaml_multiple_choices
+++ /dev/null
@@ -1,180 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '138'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - AsyncOpenAI/Python 1.66.5
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.66.5
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: 'data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        How"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        How"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        can"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        can"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        I"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        I"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        you"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        you"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        today"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        today"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul1vCIc82DFmjdrhsOnIzdARu1H","object":"chat.completion.chunk","created":1742417811,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":20,"total_tokens":29,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
-
-
-        data: [DONE]
-
-
-        '
-    headers:
-      CF-RAY:
-      - 922fe0faab2cc9bb-IAD
-      Connection:
-      - keep-alive
-      Content-Type:
-      - text/event-stream; charset=utf-8
-      Date:
-      - Wed, 19 Mar 2025 20:56:51 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=T4a2h0oJlCPxhohnl5Y8ce47sMTKxTSiMx4YnMHdCeE-1742417811-1.0.1.1-7h_MyNOSh.23MJSDS07CHWQSCd54y1UXY03vB9MV_upmjlqus6.JWajf9T9VvkjWxvSy_46nZsEU.neeA_2Ok7EBScbstRENB_le3WcBoCs;
-        path=/; expires=Wed, 19-Mar-25 21:26:51 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=s1wC2SePnrWoNLlq9w_9HSceo63N6ZY6bl87NsDSz7U-1742417811921-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - datadog-4
-      openai-processing-ms:
-      - '183'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '15000'
-      x-ratelimit-limit-tokens:
-      - '2000000'
-      x-ratelimit-remaining-requests:
-      - '14999'
-      x-ratelimit-remaining-tokens:
-      - '1999964'
-      x-ratelimit-reset-requests:
-      - 4ms
-      x-ratelimit-reset-tokens:
-      - 1ms
-      x-request-id:
-      - req_947acd4ad0207061c681399bb70031ac
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/completion.yaml b/tests/contrib/litellm/cassettes/completion.yaml
index 8b1c5e95ffb..8a48a419be1 100644
--- a/tests/contrib/litellm/cassettes/completion.yaml
+++ b/tests/contrib/litellm/cassettes/completion.yaml
@@ -1,6 +1,6 @@
 interactions:
 - request:
-    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo"}'
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1}'
     headers:
       accept:
       - application/json
@@ -9,9 +9,12 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '83'
+      - '89'
       content-type:
       - application/json
+      cookie:
+      - __cf_bm=8uxWeZujXJxknKPHUVTv93owTsLP6A_7S9rmPUqYFqM-1742476858-1.0.1.1-Z1890C3uorOQnRF57DQFSmxnNXHE0TaP_oXxoolxhkaNxA2pyytE3307uN6CaZ3u9yu9ztH.3.HRNfSprzJA4o2PVittzgtZ6Bf16p_4omM;
+        _cfuvid=XCe0nOmJMT2AZm.ZO6Nwin7Mu3h3tYIlybA60Pa1myk-1742476858288-0.0.1.1-604800000
       host:
       - api.openai.com
       user-agent:
@@ -41,18 +44,18 @@ interactions:
   response:
     body:
       string: !!binary |
-        H4sIAAAAAAAAAwAAAP//jJJNb9swDIbv/hWEzknQfGFbLgPSAUUvvXTDDkNhKDJjq5FFRaTaBUX+
-        +yAnjd21A3bRgQ9fii/JlwJA2UqtQJlGi2mDG6+v080u/Vi399c3e6zx2634u/XefN88870aZQVt
-        HtHIq2piqA0OxZI/YRNRC+aq00+L2WK6/DJfdKClCl2W1UHG88lyLCluaHw1nS3PyoasQVYr+FUA
-        ALx0b+7RV/hbreBq9BppkVnXqFaXJAAVyeWI0syWRXtRox4a8oK+a/uOBNpkmhE8JhZoMCIIwUkG
-        B0rwbKUB7Q+wT8jZGgNFEM077nirD9DoJ5zAz0YLGO3hFhp04aL+Ovw74jaxzt59cm4AtPckuvsg
-        u344k+PFp6M6RNrwX1K1td5yU0bUTD57YqGgOnosAB66eaY3I1IhUhukFNph9910fiqn+g32cLY8
-        QyHRro/PP48+qFZWKNo6HuxDGW0arHplvzydKksDUAw8v2/mo9on39bX/1O+B8ZgEKzKELGy5q3h
-        Pi1ivu9/pV1m3DWsGOOTNViKxZj3UOFWJ3e6PMUHFmzLrfU1xhBtd355j8Wx+AMAAP//AwCIlZxL
-        fQMAAA==
+        H4sIAAAAAAAAAwAAAP//jFJNaxsxEL3vrxh0tk38VQdfSk0PpZCGXkqhhEWWZleytRpVmq3rBP/3
+        orXj3aQp9KLDvHlP897MUwEgrBZrEMpIVk1w483HD6v774+z3eawr+p7vfm6f7z7bO7w24J+ilFm
+        0HaHip9ZE0VNcMiW/BlWESVjVp2uFrPF6t3t8rYDGtLoMq0OPJ5PlmNu45bGN9PZ8sI0ZBUmsYYf
+        BQDAU/fmGb3G32INN6PnSoMpyRrF+toEICK5XBEyJZtYehajHlTkGX039hdiaFplRrBrE4PBiMAE
+        2Q1Ir8GgC3CwbED6IxvrazhSC42tDYNH1BP4RAeQEbu6Juvr98O/IlZtktmrb50bANJ7Ypmz6lw+
+        XJDT1ZejOkTapldUUVlvkykjykQ+e0hMQXToqQB46PJrX0QiQqQmcMm0x+676fwsJ/qN9eBsegGZ
+        WLq+Pl+M3lArNbK0Lg3yF0oqg7pn9suSrbY0AIqB57+HeUv77Nv6+n/ke0ApDIy6DBG1VS8N920R
+        8z3/q+2acTewSBh/WYUlW4x5Dxor2brzpYl0TIxNWVlfYwzRdueW91icij8AAAD//wMACIVjLG0D
+        AAA=
     headers:
       CF-RAY:
-      - 922fb3249bf7e5c3-IAD
+      - 9235828e7e930798-IAD
       Connection:
       - keep-alive
       Content-Encoding:
@@ -60,15 +63,9 @@ interactions:
       Content-Type:
       - application/json
       Date:
-      - Wed, 19 Mar 2025 20:25:34 GMT
+      - Thu, 20 Mar 2025 13:20:59 GMT
       Server:
       - cloudflare
-      Set-Cookie:
-      - __cf_bm=j1UnG6U2N_m2w7_ZZs_IcvkLv3zZHG2XAZJvgnwKIX4-1742415934-1.0.1.1-3q9GhK0Y_WF039ZDEh.fvsi1rJCJJCq6hn15xodxGjAHi3Ard0mxXU2Ae709hsZPlid0d8teeWknL0oS5TuPPsBimWG1qT6LRUFmo1J3pTU;
-        path=/; expires=Wed, 19-Mar-25 20:55:34 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=YonKtY9iy9G7TG4c1XY31ZGLtQkwk9WEsAUQm55fiHs-1742415934844-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
       Transfer-Encoding:
       - chunked
       X-Content-Type-Options:
@@ -82,7 +79,7 @@ interactions:
       openai-organization:
       - datadog-4
       openai-processing-ms:
-      - '536'
+      - '433'
       openai-version:
       - '2020-10-01'
       strict-transport-security:
@@ -94,13 +91,13 @@ interactions:
       x-ratelimit-remaining-requests:
       - '14999'
       x-ratelimit-remaining-tokens:
-      - '1999978'
+      - '1999994'
       x-ratelimit-reset-requests:
       - 4ms
       x-ratelimit-reset-tokens:
       - 0s
       x-request-id:
-      - req_831c50d74f79d3c6f55b46c6165ad726
+      - req_d81e25e4ffb6cba1e92b5907b689fefb
     status:
       code: 200
       message: OK
diff --git a/tests/contrib/litellm/cassettes/completion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/completion_multiple_choices.yaml
similarity index 60%
rename from tests/contrib/litellm/cassettes/completion.yaml_multiple_choices
rename to tests/contrib/litellm/cassettes/completion_multiple_choices.yaml
index 3284d55e07f..c39b5e948d3 100644
--- a/tests/contrib/litellm/cassettes/completion.yaml_multiple_choices
+++ b/tests/contrib/litellm/cassettes/completion_multiple_choices.yaml
@@ -13,7 +13,8 @@ interactions:
       content-type:
       - application/json
       cookie:
-      - _cfuvid=YonKtY9iy9G7TG4c1XY31ZGLtQkwk9WEsAUQm55fiHs-1742415934844-0.0.1.1-604800000
+      - __cf_bm=8uxWeZujXJxknKPHUVTv93owTsLP6A_7S9rmPUqYFqM-1742476858-1.0.1.1-Z1890C3uorOQnRF57DQFSmxnNXHE0TaP_oXxoolxhkaNxA2pyytE3307uN6CaZ3u9yu9ztH.3.HRNfSprzJA4o2PVittzgtZ6Bf16p_4omM;
+        _cfuvid=XCe0nOmJMT2AZm.ZO6Nwin7Mu3h3tYIlybA60Pa1myk-1742476858288-0.0.1.1-604800000
       host:
       - api.openai.com
       user-agent:
@@ -43,18 +44,18 @@ interactions:
   response:
     body:
       string: !!binary |
-        H4sIAAAAAAAAA8RTTWsbMRC9768YdLaDHTs49aU0oV+UFkpvKWGRpfGuYq1GSKMkJuS/F2kd76ZJ
-        oZfSyx7mzZt9783ooQIQRos1CNVKVp2304vLtLs/t1fvLr/srn7suu9f31y8d/Zjsh/wVkwygzY3
-        qPiJdaKo8xbZkOthFVAy5qnz1fJ0OV+dz1YF6EijzbTG83RxcjblFDY0nc1Pzw7MlozCKNbwswIA
-        eCjfrNFpvBdrmE2eKh3GKBsU62MTgAhkc0XIGE1k6VhMBlCRY3RF9jdi6JJqJ3CTIkOLAYEJihv4
-        RHcgA8Ke0tsxP+A2RZn1u2TtCJDOEcvsvyi/PiCPR62WGh9oE3+jiq1xJrZ1QBnJZV2RyYtqRH4R
-        wPyfBtDTsnO4M9yCdHtujWtKxSHqPh0lHXyGFq0vAJOW+/8YVQVwXc4nPQtE+ECd55pph+V380U/
-        TgwHO4CL5QFkYmmH+nI1eWVarZGlsXGUvlBStagH5nCrMmlDI2C84ZdiXpvd+zau+ZvxA6AUekZd
-        +4DaqOeGh7aA+Tn/qe2YcREsIoZbo7BmgyHvQeNWJtvfmYj7yNjVW+MaDD6Ycmx5j9Vj9QsAAP//
-        AwBjQR3ubAQAAA==
+        H4sIAAAAAAAAAwAAAP//xFRNb9swDL37VxA6J0HdJM2Wy9BiGPYN7DwUhiIxtlJZFCQ6nVH0vw9S
+        sthdOqCXYhcf+Pie+B4JPxQAwmixBqEayar1dnrz/nr1Y96trr/EXfvt7W5/t/9QfnVb8jebz2KS
+        GLTZoeI/rJmi1ltkQ+4Aq4CSMamWq8XlYnX15uoiAy1ptIlWe57OZ8spd2FD04vycnlkNmQURrGG
+        nwUAwEP+phmdxl9iDVknV1qMUdYo1qcmABHIpoqQMZrI0rGYDKAix+jy2N+Joe1UM4FdFxkaDAjS
+        aQgodQ9McBCAnjq4N9yAdD03xtW54hD1DD7SPSjp4BM0aH0GmLTs343fDLjtokyeXWftCJDOEcuU
+        WXZ7e0QeT/4s1T7QJv5FFVvjTGyqgDKSS14ikxfFiHwWWvl6oTFBOoEcXo7hPK1W9qPEZMBc1WRc
+        /R+jKgBu88l1TwIRPlDruWK6w/xcOT/IieHIB3CxOIJMLO1QX64mz6hVGlkaG0fpCyVVg3pgDvct
+        O21oBIw3fD7Mc9oH38bVL5EfAKXQM+rKB9RGPTU8tAVMv4B/tZ0yzgOLiGFvFFZsMKQ9aNzKzh7u
+        TMQ+MrbV1rgagw8mH1vaY/FY/AYAAP//AwBdjdH5oAQAAA==
     headers:
       CF-RAY:
-      - 922fe0dd3980081e-IAD
+      - 923582928a520798-IAD
       Connection:
       - keep-alive
       Content-Encoding:
@@ -62,13 +63,9 @@ interactions:
       Content-Type:
       - application/json
       Date:
-      - Wed, 19 Mar 2025 20:56:47 GMT
+      - Thu, 20 Mar 2025 13:21:01 GMT
       Server:
       - cloudflare
-      Set-Cookie:
-      - __cf_bm=4IvKUKDYbguDfkct3LJSPjqtZQXESBNXlj0FYX2EhDw-1742417807-1.0.1.1-NheqJGPsBtnZt86lvpEQ399jpX9C0.Meer7zqTrBFvtM1nDS.F2nb3Am2CumeUA9gl3hKjHDRDn2VRJSIEJL1F4Ki3Doz2f86LPah_teN_M;
-        path=/; expires=Wed, 19-Mar-25 21:26:47 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
       Transfer-Encoding:
       - chunked
       X-Content-Type-Options:
@@ -82,7 +79,7 @@ interactions:
       openai-organization:
       - datadog-4
       openai-processing-ms:
-      - '429'
+      - '693'
       openai-version:
       - '2020-10-01'
       strict-transport-security:
@@ -94,13 +91,13 @@ interactions:
       x-ratelimit-remaining-requests:
       - '14999'
       x-ratelimit-remaining-tokens:
-      - '1999963'
+      - '1999993'
       x-ratelimit-reset-requests:
       - 4ms
       x-ratelimit-reset-tokens:
-      - 1ms
+      - 0s
       x-request-id:
-      - req_039283032c4707703c924619286ae6b1
+      - req_a9fed9cf625f2496c21b9413f11fa6da
     status:
       code: 200
       message: OK
diff --git a/tests/contrib/litellm/cassettes/completion_stream.yaml b/tests/contrib/litellm/cassettes/completion_stream.yaml
index 8b9ee2258a0..7a7febb6e30 100644
--- a/tests/contrib/litellm/cassettes/completion_stream.yaml
+++ b/tests/contrib/litellm/cassettes/completion_stream.yaml
@@ -1,6 +1,6 @@
 interactions:
 - request:
-    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}'
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":true}}'
     headers:
       accept:
       - application/json
@@ -9,12 +9,9 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '137'
+      - '143'
       content-type:
       - application/json
-      cookie:
-      - __cf_bm=j1UnG6U2N_m2w7_ZZs_IcvkLv3zZHG2XAZJvgnwKIX4-1742415934-1.0.1.1-3q9GhK0Y_WF039ZDEh.fvsi1rJCJJCq6hn15xodxGjAHi3Ard0mxXU2Ae709hsZPlid0d8teeWknL0oS5TuPPsBimWG1qT6LRUFmo1J3pTU;
-        _cfuvid=YonKtY9iy9G7TG4c1XY31ZGLtQkwk9WEsAUQm55fiHs-1742415934844-0.0.1.1-604800000
       host:
       - api.openai.com
       user-agent:
@@ -43,89 +40,85 @@ interactions:
     uri: https://api.openai.com/v1/chat/completions
   response:
     body:
-      string: 'data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+      string: 'data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         much"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         just"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         here"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         to"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        chat"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        and"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         assist"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         you"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         with"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         anything"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         you"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         need"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         How"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        can"},"logprobs":null,"finish_reason":null}],"usage":null}
-
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        are"},"logprobs":null,"finish_reason":null}],"usage":null}
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        I"},"logprobs":null,"finish_reason":null}],"usage":null}
 
-
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        help"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         you"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        today"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCuOjHffUHnlUbKnSQdOO21Tk0SzR","object":"chat.completion.chunk","created":1742416429,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":21,"total_tokens":34,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+        data: {"id":"chatcmpl-BDA7NsqzvZlObxjeBtas2VM8zS9L7","object":"chat.completion.chunk","created":1742476857,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":20,"total_tokens":33,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
 
 
         data: [DONE]
@@ -134,15 +127,21 @@ interactions:
         '
     headers:
       CF-RAY:
-      - 922fbf33c84f05d4-IAD
+      - 92358286cc050820-IAD
       Connection:
       - keep-alive
       Content-Type:
       - text/event-stream; charset=utf-8
       Date:
-      - Wed, 19 Mar 2025 20:33:49 GMT
+      - Thu, 20 Mar 2025 13:20:57 GMT
       Server:
       - cloudflare
+      Set-Cookie:
+      - __cf_bm=JpzSaUROcp7sHhVWx1Wg27uDVgIq4vtDDwL7tZMUMG0-1742476857-1.0.1.1-eRa7HeyAvoVA0z.zlCfx_r9.xUo7yEVtZ1ptiY1CSeeePSEkliaQZrsM4AVOsv7GH9Ftos4TLzwxoFAc8sVc7GGKJb.QKZXcTpVEemhzheI;
+        path=/; expires=Thu, 20-Mar-25 13:50:57 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=vOlVxqZHnk9s1Ko78yMITZ4lWCYFB_cPjAEkvBcMff0-1742476857669-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
       Transfer-Encoding:
       - chunked
       X-Content-Type-Options:
@@ -156,7 +155,7 @@ interactions:
       openai-organization:
       - datadog-4
       openai-processing-ms:
-      - '484'
+      - '150'
       openai-version:
       - '2020-10-01'
       strict-transport-security:
@@ -168,13 +167,13 @@ interactions:
       x-ratelimit-remaining-requests:
       - '14999'
       x-ratelimit-remaining-tokens:
-      - '1999978'
+      - '1999994'
       x-ratelimit-reset-requests:
       - 4ms
       x-ratelimit-reset-tokens:
       - 0s
       x-request-id:
-      - req_ed79ffc55abcad13d82ad45a85b50cef
+      - req_c6b88c23a429b4c97b52dbf20f562742
     status:
       code: 200
       message: OK
diff --git a/tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices
deleted file mode 100644
index 71352afe403..00000000000
--- a/tests/contrib/litellm/cassettes/completion_stream.yaml_multiple_choices
+++ /dev/null
@@ -1,234 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '143'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.66.5
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.66.5
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: 'data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        much"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        much"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        just"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        just"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        here"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        here"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        to"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        to"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        chat"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        help"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        with"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        How"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        anything"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        about"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        you"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        you"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        might"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        need"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        How"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        can"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        I"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        you"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        today"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCukw0o6NFfM054jHWRPGqnLoXYll","object":"chat.completion.chunk","created":1742417806,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":34,"total_tokens":47,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
-
-
-        data: [DONE]
-
-
-        '
-    headers:
-      CF-RAY:
-      - 922fe0d84bc32081-IAD
-      Connection:
-      - keep-alive
-      Content-Type:
-      - text/event-stream; charset=utf-8
-      Date:
-      - Wed, 19 Mar 2025 20:56:46 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=bUH6Gw.1xWGTdrtJJp3Kd1QwQu.citY3dSw84SuP9a8-1742417806-1.0.1.1-nd3_tPgN5caA927YXL7MDwbkwDcsY2.cOUvLdkUaaYxi7UqUPwCwwGgjDsSpkg1AHFx7aR.wS8GKU2eBr2aujsFMmkWLmL_ohd4qBtE6K84;
-        path=/; expires=Wed, 19-Mar-25 21:26:46 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=spcOdmzqi_GTX7NW.qMkPKuD0G7qOz1ab6PGnOf_m_s-1742417806409-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - datadog-4
-      openai-processing-ms:
-      - '172'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '15000'
-      x-ratelimit-limit-tokens:
-      - '2000000'
-      x-ratelimit-remaining-requests:
-      - '14999'
-      x-ratelimit-remaining-tokens:
-      - '1999962'
-      x-ratelimit-reset-requests:
-      - 4ms
-      x-ratelimit-reset-tokens:
-      - 1ms
-      x-request-id:
-      - req_68b16a7f5aec94206951718ec91e4166
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/acompletion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices.yaml
similarity index 55%
rename from tests/contrib/litellm/cassettes/acompletion_stream.yaml_multiple_choices
rename to tests/contrib/litellm/cassettes/completion_stream_multiple_choices.yaml
index b3f1528ee49..9fb9a1bfbc3 100644
--- a/tests/contrib/litellm/cassettes/acompletion_stream.yaml_multiple_choices
+++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices.yaml
@@ -15,11 +15,11 @@ interactions:
       host:
       - api.openai.com
       user-agent:
-      - AsyncOpenAI/Python 1.66.5
+      - OpenAI/Python 1.66.5
       x-stainless-arch:
       - arm64
       x-stainless-async:
-      - async:asyncio
+      - 'false'
       x-stainless-lang:
       - python
       x-stainless-os:
@@ -40,163 +40,174 @@ interactions:
     uri: https://api.openai.com/v1/chat/completions
   response:
     body:
-      string: 'data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+      string: 'data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hey"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        much"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        there"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
         much"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        just"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        Not"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
         just"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        here"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        much"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
         here"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
         ready"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        to"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        just"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
         to"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        help"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        here"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        help"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        chat"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        to"},"logprobs":null,"finish_reason":null}],"usage":null}
+
 
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        or"},"logprobs":null,"finish_reason":null}],"usage":null}
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        and"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        help"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        help"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         with"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        chat"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        with"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         anything"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        anything"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         you"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        How"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         need"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        can"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        need"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        I"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         How"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         can"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        you"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        about"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         I"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        today"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         assist"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         you"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
         today"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
 
 
-        data: {"id":"chatcmpl-BCukzxv320YfuCs2wHpPz1HsrVPJn","object":"chat.completion.chunk","created":1742417809,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":40,"total_tokens":53,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+        data: {"id":"chatcmpl-BDA7OEUOiPJr6S3fbWdwAE3GbR2n5","object":"chat.completion.chunk","created":1742476858,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":13,"completion_tokens":43,"total_tokens":56,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
 
 
         data: [DONE]
@@ -205,20 +216,20 @@ interactions:
         '
     headers:
       CF-RAY:
-      - 922fe0e1892df27e-IAD
+      - 9235828a5ad20798-IAD
       Connection:
       - keep-alive
       Content-Type:
       - text/event-stream; charset=utf-8
       Date:
-      - Wed, 19 Mar 2025 20:56:49 GMT
+      - Thu, 20 Mar 2025 13:20:58 GMT
       Server:
       - cloudflare
       Set-Cookie:
-      - __cf_bm=_sDHtJttvjcmNqUWyQLC0HY_6ceDdhabvgxP_mSWetQ-1742417809-1.0.1.1-CqlTg5EIrNFOnvIRAcesGqLwPwg3FZ18khnoA0HR26ZkfsHWDW2u.nJYbbUMztUsr2FmgqcE_dOzuuEF.u5QN04xVbjgSkJ9zBXVj1Y5Ei0;
-        path=/; expires=Wed, 19-Mar-25 21:26:49 GMT; domain=.api.openai.com; HttpOnly;
+      - __cf_bm=8uxWeZujXJxknKPHUVTv93owTsLP6A_7S9rmPUqYFqM-1742476858-1.0.1.1-Z1890C3uorOQnRF57DQFSmxnNXHE0TaP_oXxoolxhkaNxA2pyytE3307uN6CaZ3u9yu9ztH.3.HRNfSprzJA4o2PVittzgtZ6Bf16p_4omM;
+        path=/; expires=Thu, 20-Mar-25 13:50:58 GMT; domain=.api.openai.com; HttpOnly;
         Secure; SameSite=None
-      - _cfuvid=3J8OnKlA8uybv8hIzboB47mhL0FVEQahZhquEcDPxcM-1742417809367-0.0.1.1-604800000;
+      - _cfuvid=XCe0nOmJMT2AZm.ZO6Nwin7Mu3h3tYIlybA60Pa1myk-1742476858288-0.0.1.1-604800000;
         path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
       Transfer-Encoding:
       - chunked
@@ -233,7 +244,7 @@ interactions:
       openai-organization:
       - datadog-4
       openai-processing-ms:
-      - '329'
+      - '195'
       openai-version:
       - '2020-10-01'
       strict-transport-security:
@@ -245,13 +256,13 @@ interactions:
       x-ratelimit-remaining-requests:
       - '14999'
       x-ratelimit-remaining-tokens:
-      - '1999963'
+      - '1999993'
       x-ratelimit-reset-requests:
       - 4ms
       x-ratelimit-reset-tokens:
-      - 1ms
+      - 0s
       x-request-id:
-      - req_95fb491365244b0b18b9daf1bc26cf5f
+      - req_802b8ab4acaedc8189bfb4216f72135f
     status:
       code: 200
       message: OK
diff --git a/tests/contrib/litellm/cassettes/text_completion.yaml b/tests/contrib/litellm/cassettes/text_completion.yaml
deleted file mode 100644
index 80fd89bcb39..00000000000
--- a/tests/contrib/litellm/cassettes/text_completion.yaml
+++ /dev/null
@@ -1,106 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo"}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '78'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.66.5
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.66.5
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAAwAAAP//jFLLbtswELzrK7Z7tgM7jtvGlwINiqSPQ9Fji0CgyZXEhOIS5KqtEfjf
-        C0q2JbcNkIsOOzujmeE+FQBoDW4AdaNEt8HN3990tw/fFu6Tflvx6sutbR4/7D5///q6dTcOZ5nB
-        2wfScmRdaG6DI7HsB1hHUkJZdfnm6vJqub5erXqgZUMu0+og89XFei5d3PJ8sbxcH5gNW00JN/Cj
-        AAB46r/Zozf0GzewmB0nLaWkasLNaQkAI7s8QZWSTaK84GwENXsh39u+I+f4FdzxL9DKw0cYCLDj
-        DoSN2r2bEiNVXVLZuO+cmwDKexaVg/eW7w/I/mTScR0ib9NfVKyst6kpI6nEPhtKwgF7dF8A3Pdl
-        dGf5MERug5TCj9T/7npQw7H9EVseekJhUW4yP5LOxEpDoqxLky5RK92QGZlj8aozlidAMYn8r5n/
-        aQ+xra9fIj8CWlMQMmWIZKw+DzyuRcq3+dzaqeLeMCaKP62mUizF/AyGKtW54Wow7ZJQW1bW1xRD
-        tP3p5Gcs9sUfAAAA//8DAEy0bTM5AwAA
-    headers:
-      CF-RAY:
-      - 922fb31c6b4a3b86-IAD
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 19 Mar 2025 20:25:33 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=OVR0c7pmjNlvkgITOy_5zxiGhdaeoh2rYi0sMIwKUGw-1742415933-1.0.1.1-9ldeCN1Z0Gzz63GmLhFMPkykl_aiDMZHh9jdn_aB8Mwaq8j8c3UX0EJL_RDMTdRgRFAjD7RDiSRhuM45kkZ8yvViyheANvDky_wdNXC7pmg;
-        path=/; expires=Wed, 19-Mar-25 20:55:33 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=pYi67r3mdZAGs2TdjYlNetH8he_L8lXNKD2EhBsZw4s-1742415933301-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - datadog-4
-      openai-processing-ms:
-      - '310'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '15000'
-      x-ratelimit-limit-tokens:
-      - '2000000'
-      x-ratelimit-remaining-requests:
-      - '14999'
-      x-ratelimit-remaining-tokens:
-      - '1999980'
-      x-ratelimit-reset-requests:
-      - 4ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_f68897fc201c7e0cbeac660a95b4e136
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices b/tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices
deleted file mode 100644
index 743d1a085e2..00000000000
--- a/tests/contrib/litellm/cassettes/text_completion.yaml_multiple_choices
+++ /dev/null
@@ -1,106 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '84'
-      content-type:
-      - application/json
-      cookie:
-      - _cfuvid=pYi67r3mdZAGs2TdjYlNetH8he_L8lXNKD2EhBsZw4s-1742415933301-0.0.1.1-604800000
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.66.5
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.66.5
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA9xTwW4TMRC971cMc06qbGjUNhdEikSBWw9cULVy7Mmuweux7FlKqPrvyLtJdkNB
-        4szFh3nzxu95np8KALQG14C6UaLb4Oab286VPzcf79/fNtvN4nH5rnt7f/1Z6NMqRJxlBm+/kpYj
-        60JzGxyJZT/AOpISylPLq8vlZXl1XZY90LIhl2l1kPnri9Vcurjl+aJcrg7Mhq2mhGv4UgAAPPVn
-        1ugN/cA1LGbHSkspqZpwfWoCwMguV1ClZJMoLzgbQc1eyPey78g5fgV3/AhaefgAAwH23IGwUfs3
-        U2KkXZdUFu475yaA8p5FZeO95IcD8nwS6bgOkbfpNyrurLepqSKpxD4LSsIBiwn5hfPy/3deADz0
-        MejO/GGI3AaphL9Rf93NMA3H3I3Y8pAQFBblJvUj6WxYZUiUdWnylqiVbsiMzDFyqjOWJ8B0Xy/F
-        /Gn2YNv6+l/Gj4DWFIRMFSIZq88Nj22R8q/8W9vpiXvBmCh+t5oqsRTzGgztVOeG1GDaJ6G22llf
-        UwzR9tHJayyei18AAAD//wMABa0m/DMEAAA=
-    headers:
-      CF-RAY:
-      - 922fe0f67e45081e-IAD
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Wed, 19 Mar 2025 20:56:51 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=FlHOj93lCOi0G8vOplzZPeLSW3Vdv0Hb2HoLy7MM6Us-1742417811-1.0.1.1-kahp3aLVlFkNG2jXCdMC.nUEAogyYTUDKmeLcLdMShR8EnclldclSdDIIWq6EgH9RGnwBGy5.NfkQ9REHjpAGzXjaNfX3IBb1LAF6cQGWOE;
-        path=/; expires=Wed, 19-Mar-25 21:26:51 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - datadog-4
-      openai-processing-ms:
-      - '347'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '15000'
-      x-ratelimit-limit-tokens:
-      - '2000000'
-      x-ratelimit-remaining-requests:
-      - '14999'
-      x-ratelimit-remaining-tokens:
-      - '1999963'
-      x-ratelimit-reset-requests:
-      - 4ms
-      x-ratelimit-reset-tokens:
-      - 1ms
-      x-request-id:
-      - req_0a1d4fa700dec9fa2f39b6a53af4d9c2
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/text_completion_stream.yaml b/tests/contrib/litellm/cassettes/text_completion_stream.yaml
deleted file mode 100644
index bc9e6f2efc4..00000000000
--- a/tests/contrib/litellm/cassettes/text_completion_stream.yaml
+++ /dev/null
@@ -1,133 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","stream":true,"stream_options":{"include_usage":true}}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '132'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.66.5
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.66.5
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: 'data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        How"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        are"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        you"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        today"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCuaXbsSaimJMsqp1ieCm1mP2RdLh","object":"chat.completion.chunk","created":1742417161,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":8,"total_tokens":17,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
-
-
-        data: [DONE]
-
-
-        '
-    headers:
-      CF-RAY:
-      - 922fd11b9ae3c971-IAD
-      Connection:
-      - keep-alive
-      Content-Type:
-      - text/event-stream; charset=utf-8
-      Date:
-      - Wed, 19 Mar 2025 20:46:01 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=burGHM6VfMUi7tY4F9HPOZGLPGxfqoQv0QDudHYBzAI-1742417161-1.0.1.1-MC78dEOJbnnU73JGAYQb93S4ex3NPLLM70pH_.FPPXa1FeTqG9rlyPAZyx58mF.Z5aEhuPWTN45xS8eCJsKzuP20WOjvGlJAgW166y3SdTs;
-        path=/; expires=Wed, 19-Mar-25 21:16:01 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=ReP56wFcIwz4J_wQ9KjzLau5vHIddwYVpMetFMYh57w-1742417161882-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - datadog-4
-      openai-processing-ms:
-      - '226'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '15000'
-      x-ratelimit-limit-tokens:
-      - '2000000'
-      x-ratelimit-remaining-requests:
-      - '14999'
-      x-ratelimit-remaining-tokens:
-      - '1999980'
-      x-ratelimit-reset-requests:
-      - 4ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_6c0123908864d876c53702ce7507e69e
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices b/tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices
deleted file mode 100644
index 62b0b934b66..00000000000
--- a/tests/contrib/litellm/cassettes/text_completion_stream.yaml_multiple_choices
+++ /dev/null
@@ -1,177 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"role":"user","content":"Hello world"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '138'
-      content-type:
-      - application/json
-      cookie:
-      - _cfuvid=ReP56wFcIwz4J_wQ9KjzLau5vHIddwYVpMetFMYh57w-1742417161882-0.0.1.1-604800000;
-        __cf_bm=burGHM6VfMUi7tY4F9HPOZGLPGxfqoQv0QDudHYBzAI-1742417161-1.0.1.1-MC78dEOJbnnU73JGAYQb93S4ex3NPLLM70pH_.FPPXa1FeTqG9rlyPAZyx58mF.Z5aEhuPWTN45xS8eCJsKzuP20WOjvGlJAgW166y3SdTs
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.66.5
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.66.5
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: 'data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        How"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        How"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        can"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        can"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        I"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        I"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        assist"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        you"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        you"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
-        today"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
-        today"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}
-
-
-        data: {"id":"chatcmpl-BCul0l3ftamTBg72ho8ijaeQqDUpr","object":"chat.completion.chunk","created":1742417810,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":9,"completion_tokens":20,"total_tokens":29,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
-
-
-        data: [DONE]
-
-
-        '
-    headers:
-      CF-RAY:
-      - 922fe0f349d2081e-IAD
-      Connection:
-      - keep-alive
-      Content-Type:
-      - text/event-stream; charset=utf-8
-      Date:
-      - Wed, 19 Mar 2025 20:56:50 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - datadog-4
-      openai-processing-ms:
-      - '172'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '15000'
-      x-ratelimit-limit-tokens:
-      - '2000000'
-      x-ratelimit-remaining-requests:
-      - '14999'
-      x-ratelimit-remaining-tokens:
-      - '1999963'
-      x-ratelimit-reset-requests:
-      - 4ms
-      x-ratelimit-reset-tokens:
-      - 1ms
-      x-request-id:
-      - req_775dd461f5f264e40aeb0c5ab23fe071
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py
index f3ce38c872f..f1bd6c1f455 100644
--- a/tests/contrib/litellm/test_litellm.py
+++ b/tests/contrib/litellm/test_litellm.py
@@ -1,11 +1,11 @@
 import pytest
 
+from tests.contrib.litellm.utils import get_cassette_name
+
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
 @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
 def test_litellm_completion(litellm, request_vcr, stream, n):
-    cassette = "completion.yaml" if not stream else "completion_stream.yaml"
-    choice_suffix = "_multiple_choices" if n > 1 else ""
-    with request_vcr.use_cassette(cassette + choice_suffix):
+    with request_vcr.use_cassette(get_cassette_name(stream, n)):
         messages = [{ "content": "Hey, what is up?","role": "user"}]
         litellm.completion(
             model="gpt-3.5-turbo",
@@ -17,9 +17,7 @@ def test_litellm_completion(litellm, request_vcr, stream, n):
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
 @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
 async def test_litellm_acompletion(litellm, request_vcr, stream, n):
-    cassette = "acompletion.yaml" if not stream else "acompletion_stream.yaml"
-    choice_suffix = "_multiple_choices" if n > 1 else ""
-    with request_vcr.use_cassette(cassette + choice_suffix):
+    with request_vcr.use_cassette(get_cassette_name(stream, n)):
         messages = [{ "content": "Hey, what is up?","role": "user"}]
         await litellm.acompletion(
             model="gpt-3.5-turbo",
@@ -31,9 +29,7 @@ async def test_litellm_acompletion(litellm, request_vcr, stream, n):
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) 
 @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
 def test_litellm_text_completion(litellm, request_vcr, stream, n):
-    cassette = "text_completion.yaml" if not stream else "text_completion_stream.yaml"
-    choice_suffix = "_multiple_choices" if n > 1 else ""
-    with request_vcr.use_cassette(cassette + choice_suffix):
+    with request_vcr.use_cassette(get_cassette_name(stream, n)):
         litellm.text_completion(
             model="gpt-3.5-turbo",
             prompt="Hello world",
@@ -44,9 +40,7 @@ def test_litellm_text_completion(litellm, request_vcr, stream, n):
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])  
 @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
 async def test_litellm_atext_completion(litellm, request_vcr, stream, n):
-    cassette = "atext_completion.yaml" if not stream else "atext_completion_stream.yaml"
-    choice_suffix = "_multiple_choices" if n > 1 else ""
-    with request_vcr.use_cassette(cassette + choice_suffix):
+    with request_vcr.use_cassette(get_cassette_name(stream, n)):
         await litellm.atext_completion(
             model="gpt-3.5-turbo",
             prompt="Hello world",
diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py
index 8b140d6cd0c..f95503dce6c 100644
--- a/tests/contrib/litellm/utils.py
+++ b/tests/contrib/litellm/utils.py
@@ -1,6 +1,8 @@
 import vcr
 import os
 
+CASETTE_EXTENSION = ".yaml"
+
 # VCR is used to capture and store network requests made to Anthropic.
 # This is done to avoid making real calls to the API which could introduce
 # flakiness and cost.
@@ -12,4 +14,12 @@ def get_request_vcr():
         filter_headers=["authorization", "x-api-key", "api-key"],
         # Ignore requests to the agent
         ignore_localhost=True,
-    )
\ No newline at end of file
+    )
+
+# Get the name of the cassette to use for a given test
+# All LiteLLM requests that use Open AI get routed to the chat completions endpoint, 
+# so we can reuse the same cassette for each combination of stream and n
+def get_cassette_name(stream, n):
+    stream_suffix = "_stream" if stream else ""
+    choice_suffix = "_multiple_choices" if n > 1 else ""
+    return "completion" + stream_suffix + choice_suffix + CASETTE_EXTENSION

From 65714e5d9905c4366ee4af9a339becde8c1f2efd Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 20 Mar 2025 14:29:17 -0400
Subject: [PATCH 04/61] add test snapshots

---
 ddtrace/contrib/internal/litellm/patch.py     |  3 ++
 ddtrace/contrib/internal/litellm/utils.py     |  6 ++++
 ...llm.test_litellm_acompletion[False-1].json | 28 +++++++++++++++++++
 ...llm.test_litellm_acompletion[False-2].json | 28 +++++++++++++++++++
 ...ellm.test_litellm_acompletion[True-2].json | 28 +++++++++++++++++++
 ...est_litellm_atext_completion[False-1].json | 28 +++++++++++++++++++
 ...est_litellm_atext_completion[False-2].json | 28 +++++++++++++++++++
 ...test_litellm_atext_completion[True-2].json | 28 +++++++++++++++++++
 ....test_litellm.test_litellm_completion.json | 10 +++----
 ...ellm.test_litellm_completion[False-1].json | 28 +++++++++++++++++++
 ...ellm.test_litellm_completion[False-2].json | 28 +++++++++++++++++++
 ...tellm.test_litellm_completion[True-2].json | 28 +++++++++++++++++++
 ...test_litellm_text_completion[False-1].json | 28 +++++++++++++++++++
 ...test_litellm_text_completion[False-2].json | 28 +++++++++++++++++++
 ....test_litellm_text_completion[True-2].json | 28 +++++++++++++++++++
 15 files changed, 350 insertions(+), 5 deletions(-)
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 1ce2e073c73..911fc2a7147 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -9,6 +9,7 @@
 from ddtrace.contrib.trace_utils import with_traced_module
 from ddtrace.contrib.trace_utils import wrap
 from ddtrace.contrib.internal.litellm.utils import get_provider
+from ddtrace.contrib.internal.litellm.utils import tag_request
 from ddtrace.llmobs._integrations import LiteLLMIntegration
 from ddtrace.trace import Pin
 from ddtrace.internal.utils import get_argument_value
@@ -48,6 +49,7 @@ def _create_span(litellm, pin, func, instance, args, kwargs):
 @with_traced_module
 def traced_completion(litellm, pin, func, instance, args, kwargs):
     span = _create_span(litellm, pin, func, instance, args, kwargs)
+    tag_request(span, kwargs)
     try:
         return func(*args, **kwargs)
     except Exception:
@@ -60,6 +62,7 @@ def traced_completion(litellm, pin, func, instance, args, kwargs):
 @with_traced_module
 async def traced_acompletion(litellm, pin, func, instance, args, kwargs):
     span = _create_span(litellm, pin, func, instance, args, kwargs)
+    tag_request(span, kwargs)
     try:
         return await func(*args, **kwargs)
     except Exception:
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index dd6d766ad2b..f3e7721ee7f 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -5,3 +5,9 @@ def get_provider(model):
         return parsed_model[0]
     else:
         return ""
+
+def tag_request(span, kwargs):
+    if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]:
+        span.set_tag_str("litellm.host", kwargs["metadata"]["headers"]["host"])
+        
+
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json
new file mode 100644
index 00000000000..900c1ac4412
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json
@@ -0,0 +1,28 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.acompletion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67dc5d3c00000000",
+      "language": "python",
+      "litellm.request.model": "gpt-3.5-turbo",
+      "litellm.request.provider": "",
+      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 34947
+    },
+    "duration": 11157000,
+    "start": 1742495036719661000
+  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json
new file mode 100644
index 00000000000..8183d85e848
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json
@@ -0,0 +1,28 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.acompletion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67dc5d3c00000000",
+      "language": "python",
+      "litellm.request.model": "gpt-3.5-turbo",
+      "litellm.request.provider": "",
+      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 34947
+    },
+    "duration": 11435000,
+    "start": 1742495036748464000
+  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json
new file mode 100644
index 00000000000..a6bc65181c9
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json
@@ -0,0 +1,28 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.acompletion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67dc5d3c00000000",
+      "language": "python",
+      "litellm.request.model": "gpt-3.5-turbo",
+      "litellm.request.provider": "",
+      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 34947
+    },
+    "duration": 11029000,
+    "start": 1742495036690303000
+  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json
new file mode 100644
index 00000000000..a05dd9ffeda
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json
@@ -0,0 +1,28 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.atext_completion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67dc5d3c00000000",
+      "language": "python",
+      "litellm.request.model": "gpt-3.5-turbo",
+      "litellm.request.provider": "",
+      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 34947
+    },
+    "duration": 11393000,
+    "start": 1742495036919703000
+  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json
new file mode 100644
index 00000000000..1bb4b5798f8
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json
@@ -0,0 +1,28 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.atext_completion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67dc5d3c00000000",
+      "language": "python",
+      "litellm.request.model": "gpt-3.5-turbo",
+      "litellm.request.provider": "",
+      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 34947
+    },
+    "duration": 11460000,
+    "start": 1742495036946048000
+  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json
new file mode 100644
index 00000000000..cae9d84f2b9
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json
@@ -0,0 +1,28 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.atext_completion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67dc5d3c00000000",
+      "language": "python",
+      "litellm.request.model": "gpt-3.5-turbo",
+      "litellm.request.provider": "",
+      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 34947
+    },
+    "duration": 10946000,
+    "start": 1742495036893874000
+  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json
index fc0b82fadf8..60e83997323 100644
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json
@@ -10,19 +10,19 @@
     "error": 0,
     "meta": {
       "_dd.p.dm": "-0",
-      "_dd.p.tid": "67db283e00000000",
+      "_dd.p.tid": "67dc5d3c00000000",
       "language": "python",
       "litellm.request.model": "gpt-3.5-turbo",
       "litellm.request.provider": "",
-      "runtime-id": "f4fa019846d24cc9a50c88d550a339dd"
+      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
     },
     "metrics": {
       "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
-      "process_id": 18466
+      "process_id": 34947
     },
-    "duration": 737978000,
-    "start": 1742415934103250000
+    "duration": 42807000,
+    "start": 1742495036462994000
   }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json
new file mode 100644
index 00000000000..697da49dd16
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json
@@ -0,0 +1,28 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.completion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67dc5d3c00000000",
+      "language": "python",
+      "litellm.request.model": "gpt-3.5-turbo",
+      "litellm.request.provider": "",
+      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 34947
+    },
+    "duration": 57377000,
+    "start": 1742495036556493000
+  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json
new file mode 100644
index 00000000000..6d1d2839507
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json
@@ -0,0 +1,28 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.completion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67dc5d3c00000000",
+      "language": "python",
+      "litellm.request.model": "gpt-3.5-turbo",
+      "litellm.request.provider": "",
+      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 34947
+    },
+    "duration": 6095000,
+    "start": 1742495036633397000
+  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json
new file mode 100644
index 00000000000..f563f841768
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json
@@ -0,0 +1,28 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.completion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67dc5d3c00000000",
+      "language": "python",
+      "litellm.request.model": "gpt-3.5-turbo",
+      "litellm.request.provider": "",
+      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 34947
+    },
+    "duration": 6195000,
+    "start": 1742495036533565000
+  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json
new file mode 100644
index 00000000000..28bf0d39941
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json
@@ -0,0 +1,28 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.text_completion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67dc5d3c00000000",
+      "language": "python",
+      "litellm.request.model": "gpt-3.5-turbo",
+      "litellm.request.provider": "",
+      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 34947
+    },
+    "duration": 7254000,
+    "start": 1742495036824538000
+  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json
new file mode 100644
index 00000000000..f40a76bcaa5
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json
@@ -0,0 +1,28 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.text_completion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67dc5d3c00000000",
+      "language": "python",
+      "litellm.request.model": "gpt-3.5-turbo",
+      "litellm.request.provider": "",
+      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 34947
+    },
+    "duration": 6404000,
+    "start": 1742495036845984000
+  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json
new file mode 100644
index 00000000000..7d1105f25ef
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json
@@ -0,0 +1,28 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.text_completion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67dc5d3c00000000",
+      "language": "python",
+      "litellm.request.model": "gpt-3.5-turbo",
+      "litellm.request.provider": "",
+      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 34947
+    },
+    "duration": 5760000,
+    "start": 1742495036803937000
+  }]]

From 462b576f30499749c64600f6f6b3a0d190b74f6c Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Fri, 21 Mar 2025 14:01:30 -0400
Subject: [PATCH 05/61] trace get_llm_provider

---
 ddtrace/contrib/internal/litellm/patch.py | 28 ++++++++++++++++-------
 ddtrace/contrib/internal/litellm/utils.py | 11 ++++++++-
 ddtrace/llmobs/_integrations/litellm.py   |  1 +
 3 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 911fc2a7147..e162218ee13 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -8,8 +8,8 @@
 from ddtrace.contrib.trace_utils import unwrap
 from ddtrace.contrib.trace_utils import with_traced_module
 from ddtrace.contrib.trace_utils import wrap
-from ddtrace.contrib.internal.litellm.utils import get_provider
 from ddtrace.contrib.internal.litellm.utils import tag_request
+from ddtrace.contrib.internal.litellm.utils import tag_model_and_provider
 from ddtrace.llmobs._integrations import LiteLLMIntegration
 from ddtrace.trace import Pin
 from ddtrace.internal.utils import get_argument_value
@@ -17,10 +17,7 @@
 
 config._add(
     "litellm",
-    {
-        "span_prompt_completion_sample_rate": float(os.getenv("DD_LITELLM_SPAN_PROMPT_COMPLETION_SAMPLE_RATE", 1.0)),
-        "span_char_limit": int(os.getenv("DD_LITELLM_SPAN_CHAR_LIMIT", 128)),
-    },
+    {},
 )
 
 
@@ -35,12 +32,9 @@ def get_version():
 def _create_span(litellm, pin, func, instance, args, kwargs):
     """Helper function to create and configure a traced span."""
     integration = litellm._datadog_integration
-    model = get_argument_value(args, kwargs, 0, "model", None)
     span = integration.trace(
         pin,
         "litellm.%s" % func.__name__,
-        model=model,
-        provider=get_provider(model),
         submit_to_llmobs=False,
     )
     return span
@@ -48,6 +42,7 @@ def _create_span(litellm, pin, func, instance, args, kwargs):
 
 @with_traced_module
 def traced_completion(litellm, pin, func, instance, args, kwargs):
+    requested_model = get_argument_value(args, kwargs, 0, "model", None)
     span = _create_span(litellm, pin, func, instance, args, kwargs)
     tag_request(span, kwargs)
     try:
@@ -56,11 +51,14 @@ def traced_completion(litellm, pin, func, instance, args, kwargs):
         span.set_exc_info(*sys.exc_info())
         raise
     finally:
+        # tag model and provider
+        tag_model_and_provider(litellm, span, requested_model)
         span.finish()
 
 
 @with_traced_module
 async def traced_acompletion(litellm, pin, func, instance, args, kwargs):
+    requested_model = get_argument_value(args, kwargs, 0, "model", None)
     span = _create_span(litellm, pin, func, instance, args, kwargs)
     tag_request(span, kwargs)
     try:
@@ -69,9 +67,21 @@ async def traced_acompletion(litellm, pin, func, instance, args, kwargs):
         span.set_exc_info(*sys.exc_info())
         raise
     finally:
+        # tag model and provider
+        tag_model_and_provider(litellm, span, requested_model)
         span.finish()
 
 
+@with_traced_module
+def traced_get_llm_provider(litellm, pin, func, instance, args, kwargs):
+    requested_model = get_argument_value(args, kwargs, 0, "model", None)
+    integration = litellm._datadog_integration
+    model, custom_llm_provider, dynamic_api_key, api_base = func(*args, **kwargs)
+    # Store the provider information in the integration
+    integration._provider_map[requested_model] = custom_llm_provider
+    return model, custom_llm_provider, dynamic_api_key, api_base
+
+
 def patch():
     if getattr(litellm, "_datadog_patch", False):
         return
@@ -86,6 +96,7 @@ def patch():
     wrap("litellm", "acompletion", traced_acompletion(litellm))
     wrap("litellm", "text_completion", traced_completion(litellm))
     wrap("litellm", "atext_completion", traced_acompletion(litellm))
+    wrap("litellm", "get_llm_provider", traced_get_llm_provider(litellm))
 
 
 def unpatch():
@@ -98,5 +109,6 @@ def unpatch():
     unwrap(litellm, "acompletion")
     unwrap(litellm, "text_completion")
     unwrap(litellm, "atext_completion")
+    unwrap(litellm, "get_llm_provider")
 
     delattr(litellm, "_datadog_integration")
\ No newline at end of file
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index f3e7721ee7f..ad4970da79b 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -1,10 +1,19 @@
 # TODO: temporary since we may want to intercept get_llm_provider response
-def get_provider(model):
+def get_provider(model, kwargs):
+    if "custom_llm_provider" in kwargs:
+        return kwargs["custom_llm_provider"]
     parsed_model = model.split("/")
     if len(parsed_model) == 2:
         return parsed_model[0]
     else:
         return ""
+    
+def tag_model_and_provider(litellm, span, requested_model):
+    span.set_tag_str("litellm.request.model", requested_model)
+    integration = litellm._datadog_integration
+    provider = integration._provider_map.get(requested_model, None)
+    if provider:
+        span.set_tag_str("litellm.request.provider", provider)
 
 def tag_request(span, kwargs):
     if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]:
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index afe7a373f06..9602cf7336c 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -8,6 +8,7 @@
 
 class LiteLLMIntegration(BaseLLMIntegration):
     _integration_name = "litellm"
+    _provider_map = {}
 
     def _set_base_span_tags(
         self, span: Span, provider: Optional[str] = None, model: Optional[str] = None, **kwargs: Dict[str, Any]

From a01b4159ba56096861495c5a1bd47f1e433cb8e6 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Fri, 21 Mar 2025 14:06:40 -0400
Subject: [PATCH 06/61] remove provider tagging for now

---
 ddtrace/contrib/internal/litellm/patch.py     | 21 ++-----------------
 ddtrace/contrib/internal/litellm/utils.py     | 17 ---------------
 ...llm.test_litellm_acompletion[False-1].json | 11 +++++-----
 ...llm.test_litellm_acompletion[False-2].json | 11 +++++-----
 ...ellm.test_litellm_acompletion[True-2].json | 11 +++++-----
 ...est_litellm_atext_completion[False-1].json | 11 +++++-----
 ...est_litellm_atext_completion[False-2].json | 11 +++++-----
 ...test_litellm_atext_completion[True-2].json | 11 +++++-----
 ....test_litellm.test_litellm_completion.json | 11 +++++-----
 ...ellm.test_litellm_completion[False-1].json | 11 +++++-----
 ...ellm.test_litellm_completion[False-2].json | 11 +++++-----
 ...tellm.test_litellm_completion[True-2].json | 11 +++++-----
 ...test_litellm_text_completion[False-1].json | 11 +++++-----
 ...test_litellm_text_completion[False-2].json | 11 +++++-----
 ....test_litellm_text_completion[True-2].json | 11 +++++-----
 15 files changed, 67 insertions(+), 114 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index e162218ee13..4cee84768b9 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -9,7 +9,6 @@
 from ddtrace.contrib.trace_utils import with_traced_module
 from ddtrace.contrib.trace_utils import wrap
 from ddtrace.contrib.internal.litellm.utils import tag_request
-from ddtrace.contrib.internal.litellm.utils import tag_model_and_provider
 from ddtrace.llmobs._integrations import LiteLLMIntegration
 from ddtrace.trace import Pin
 from ddtrace.internal.utils import get_argument_value
@@ -32,9 +31,11 @@ def get_version():
 def _create_span(litellm, pin, func, instance, args, kwargs):
     """Helper function to create and configure a traced span."""
     integration = litellm._datadog_integration
+    model = get_argument_value(args, kwargs, 0, "model", None)
     span = integration.trace(
         pin,
         "litellm.%s" % func.__name__,
+        model=model,
         submit_to_llmobs=False,
     )
     return span
@@ -42,7 +43,6 @@ def _create_span(litellm, pin, func, instance, args, kwargs):
 
 @with_traced_module
 def traced_completion(litellm, pin, func, instance, args, kwargs):
-    requested_model = get_argument_value(args, kwargs, 0, "model", None)
     span = _create_span(litellm, pin, func, instance, args, kwargs)
     tag_request(span, kwargs)
     try:
@@ -51,14 +51,11 @@ def traced_completion(litellm, pin, func, instance, args, kwargs):
         span.set_exc_info(*sys.exc_info())
         raise
     finally:
-        # tag model and provider
-        tag_model_and_provider(litellm, span, requested_model)
         span.finish()
 
 
 @with_traced_module
 async def traced_acompletion(litellm, pin, func, instance, args, kwargs):
-    requested_model = get_argument_value(args, kwargs, 0, "model", None)
     span = _create_span(litellm, pin, func, instance, args, kwargs)
     tag_request(span, kwargs)
     try:
@@ -67,21 +64,9 @@ async def traced_acompletion(litellm, pin, func, instance, args, kwargs):
         span.set_exc_info(*sys.exc_info())
         raise
     finally:
-        # tag model and provider
-        tag_model_and_provider(litellm, span, requested_model)
         span.finish()
 
 
-@with_traced_module
-def traced_get_llm_provider(litellm, pin, func, instance, args, kwargs):
-    requested_model = get_argument_value(args, kwargs, 0, "model", None)
-    integration = litellm._datadog_integration
-    model, custom_llm_provider, dynamic_api_key, api_base = func(*args, **kwargs)
-    # Store the provider information in the integration
-    integration._provider_map[requested_model] = custom_llm_provider
-    return model, custom_llm_provider, dynamic_api_key, api_base
-
-
 def patch():
     if getattr(litellm, "_datadog_patch", False):
         return
@@ -96,7 +81,6 @@ def patch():
     wrap("litellm", "acompletion", traced_acompletion(litellm))
     wrap("litellm", "text_completion", traced_completion(litellm))
     wrap("litellm", "atext_completion", traced_acompletion(litellm))
-    wrap("litellm", "get_llm_provider", traced_get_llm_provider(litellm))
 
 
 def unpatch():
@@ -109,6 +93,5 @@ def unpatch():
     unwrap(litellm, "acompletion")
     unwrap(litellm, "text_completion")
     unwrap(litellm, "atext_completion")
-    unwrap(litellm, "get_llm_provider")
 
     delattr(litellm, "_datadog_integration")
\ No newline at end of file
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index ad4970da79b..f9fc64dfeee 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -1,20 +1,3 @@
-# TODO: temporary since we may want to intercept get_llm_provider response
-def get_provider(model, kwargs):
-    if "custom_llm_provider" in kwargs:
-        return kwargs["custom_llm_provider"]
-    parsed_model = model.split("/")
-    if len(parsed_model) == 2:
-        return parsed_model[0]
-    else:
-        return ""
-    
-def tag_model_and_provider(litellm, span, requested_model):
-    span.set_tag_str("litellm.request.model", requested_model)
-    integration = litellm._datadog_integration
-    provider = integration._provider_map.get(requested_model, None)
-    if provider:
-        span.set_tag_str("litellm.request.provider", provider)
-
 def tag_request(span, kwargs):
     if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]:
         span.set_tag_str("litellm.host", kwargs["metadata"]["headers"]["host"])
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json
index 900c1ac4412..214afe91718 100644
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json
@@ -10,19 +10,18 @@
     "error": 0,
     "meta": {
       "_dd.p.dm": "-0",
-      "_dd.p.tid": "67dc5d3c00000000",
+      "_dd.p.tid": "67ddaa2400000000",
       "language": "python",
       "litellm.request.model": "gpt-3.5-turbo",
-      "litellm.request.provider": "",
-      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
     },
     "metrics": {
       "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
-      "process_id": 34947
+      "process_id": 44834
     },
-    "duration": 11157000,
-    "start": 1742495036719661000
+    "duration": 11473000,
+    "start": 1742580260689536000
   }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json
index 8183d85e848..c0e47e63442 100644
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json
@@ -10,19 +10,18 @@
     "error": 0,
     "meta": {
       "_dd.p.dm": "-0",
-      "_dd.p.tid": "67dc5d3c00000000",
+      "_dd.p.tid": "67ddaa2400000000",
       "language": "python",
       "litellm.request.model": "gpt-3.5-turbo",
-      "litellm.request.provider": "",
-      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
     },
     "metrics": {
       "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
-      "process_id": 34947
+      "process_id": 44834
     },
-    "duration": 11435000,
-    "start": 1742495036748464000
+    "duration": 11410000,
+    "start": 1742580260717377000
   }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json
index a6bc65181c9..2edd0a58339 100644
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json
@@ -10,19 +10,18 @@
     "error": 0,
     "meta": {
       "_dd.p.dm": "-0",
-      "_dd.p.tid": "67dc5d3c00000000",
+      "_dd.p.tid": "67ddaa2400000000",
       "language": "python",
       "litellm.request.model": "gpt-3.5-turbo",
-      "litellm.request.provider": "",
-      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
     },
     "metrics": {
       "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
-      "process_id": 34947
+      "process_id": 44834
     },
-    "duration": 11029000,
-    "start": 1742495036690303000
+    "duration": 10910000,
+    "start": 1742580260660336000
   }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json
index a05dd9ffeda..4823265c91e 100644
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json
@@ -10,19 +10,18 @@
     "error": 0,
     "meta": {
       "_dd.p.dm": "-0",
-      "_dd.p.tid": "67dc5d3c00000000",
+      "_dd.p.tid": "67ddaa2400000000",
       "language": "python",
       "litellm.request.model": "gpt-3.5-turbo",
-      "litellm.request.provider": "",
-      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
     },
     "metrics": {
       "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
-      "process_id": 34947
+      "process_id": 44834
     },
-    "duration": 11393000,
-    "start": 1742495036919703000
+    "duration": 11645000,
+    "start": 1742580260892670000
   }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json
index 1bb4b5798f8..82aa9e0797b 100644
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json
@@ -10,19 +10,18 @@
     "error": 0,
     "meta": {
       "_dd.p.dm": "-0",
-      "_dd.p.tid": "67dc5d3c00000000",
+      "_dd.p.tid": "67ddaa2400000000",
       "language": "python",
       "litellm.request.model": "gpt-3.5-turbo",
-      "litellm.request.provider": "",
-      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
     },
     "metrics": {
       "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
-      "process_id": 34947
+      "process_id": 44834
     },
-    "duration": 11460000,
-    "start": 1742495036946048000
+    "duration": 11638000,
+    "start": 1742580260921802000
   }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json
index cae9d84f2b9..019e6861dd0 100644
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json
@@ -10,19 +10,18 @@
     "error": 0,
     "meta": {
       "_dd.p.dm": "-0",
-      "_dd.p.tid": "67dc5d3c00000000",
+      "_dd.p.tid": "67ddaa2400000000",
       "language": "python",
       "litellm.request.model": "gpt-3.5-turbo",
-      "litellm.request.provider": "",
-      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
     },
     "metrics": {
       "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
-      "process_id": 34947
+      "process_id": 44834
     },
-    "duration": 10946000,
-    "start": 1742495036893874000
+    "duration": 10966000,
+    "start": 1742580260866220000
   }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json
index 60e83997323..5d496bb6478 100644
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion.json
@@ -10,19 +10,18 @@
     "error": 0,
     "meta": {
       "_dd.p.dm": "-0",
-      "_dd.p.tid": "67dc5d3c00000000",
+      "_dd.p.tid": "67ddaa2400000000",
       "language": "python",
       "litellm.request.model": "gpt-3.5-turbo",
-      "litellm.request.provider": "",
-      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
     },
     "metrics": {
       "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
-      "process_id": 34947
+      "process_id": 44834
     },
-    "duration": 42807000,
-    "start": 1742495036462994000
+    "duration": 43782000,
+    "start": 1742580260427196000
   }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json
index 697da49dd16..70b4b725406 100644
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json
@@ -10,19 +10,18 @@
     "error": 0,
     "meta": {
       "_dd.p.dm": "-0",
-      "_dd.p.tid": "67dc5d3c00000000",
+      "_dd.p.tid": "67ddaa2400000000",
       "language": "python",
       "litellm.request.model": "gpt-3.5-turbo",
-      "litellm.request.provider": "",
-      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
     },
     "metrics": {
       "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
-      "process_id": 34947
+      "process_id": 44834
     },
-    "duration": 57377000,
-    "start": 1742495036556493000
+    "duration": 64700000,
+    "start": 1742580260523466000
   }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json
index 6d1d2839507..70100a1b6fe 100644
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json
@@ -10,19 +10,18 @@
     "error": 0,
     "meta": {
       "_dd.p.dm": "-0",
-      "_dd.p.tid": "67dc5d3c00000000",
+      "_dd.p.tid": "67ddaa2400000000",
       "language": "python",
       "litellm.request.model": "gpt-3.5-turbo",
-      "litellm.request.provider": "",
-      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
     },
     "metrics": {
       "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
-      "process_id": 34947
+      "process_id": 44834
     },
-    "duration": 6095000,
-    "start": 1742495036633397000
+    "duration": 6278000,
+    "start": 1742580260607014000
   }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json
index f563f841768..9f99831ec22 100644
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json
@@ -10,19 +10,18 @@
     "error": 0,
     "meta": {
       "_dd.p.dm": "-0",
-      "_dd.p.tid": "67dc5d3c00000000",
+      "_dd.p.tid": "67ddaa2400000000",
       "language": "python",
       "litellm.request.model": "gpt-3.5-turbo",
-      "litellm.request.provider": "",
-      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
     },
     "metrics": {
       "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
-      "process_id": 34947
+      "process_id": 44834
     },
-    "duration": 6195000,
-    "start": 1742495036533565000
+    "duration": 6885000,
+    "start": 1742580260495830000
   }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json
index 28bf0d39941..3639ac22839 100644
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json
@@ -10,19 +10,18 @@
     "error": 0,
     "meta": {
       "_dd.p.dm": "-0",
-      "_dd.p.tid": "67dc5d3c00000000",
+      "_dd.p.tid": "67ddaa2400000000",
       "language": "python",
       "litellm.request.model": "gpt-3.5-turbo",
-      "litellm.request.provider": "",
-      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
     },
     "metrics": {
       "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
-      "process_id": 34947
+      "process_id": 44834
     },
-    "duration": 7254000,
-    "start": 1742495036824538000
+    "duration": 7357000,
+    "start": 1742580260791261000
   }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json
index f40a76bcaa5..cfa5ca52417 100644
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json
@@ -10,19 +10,18 @@
     "error": 0,
     "meta": {
       "_dd.p.dm": "-0",
-      "_dd.p.tid": "67dc5d3c00000000",
+      "_dd.p.tid": "67ddaa2400000000",
       "language": "python",
       "litellm.request.model": "gpt-3.5-turbo",
-      "litellm.request.provider": "",
-      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
     },
     "metrics": {
       "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
-      "process_id": 34947
+      "process_id": 44834
     },
-    "duration": 6404000,
-    "start": 1742495036845984000
+    "duration": 6527000,
+    "start": 1742580260815275000
   }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json
index 7d1105f25ef..e68a5d04c10 100644
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json
@@ -10,19 +10,18 @@
     "error": 0,
     "meta": {
       "_dd.p.dm": "-0",
-      "_dd.p.tid": "67dc5d3c00000000",
+      "_dd.p.tid": "67ddaa2400000000",
       "language": "python",
       "litellm.request.model": "gpt-3.5-turbo",
-      "litellm.request.provider": "",
-      "runtime-id": "5886e4cf96ff4386abd757da414c8d9b"
+      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
     },
     "metrics": {
       "_dd.measured": 1,
       "_dd.top_level": 1,
       "_dd.tracer_kr": 1.0,
       "_sampling_priority_v1": 1,
-      "process_id": 34947
+      "process_id": 44834
     },
-    "duration": 5760000,
-    "start": 1742495036803937000
+    "duration": 5879000,
+    "start": 1742580260771604000
   }]]

From 486979464ac9cc21819ed99efc999a41eff869bd Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Fri, 21 Mar 2025 15:37:15 -0400
Subject: [PATCH 07/61] test out other models

---
 .../requirements/{17b7978.txt => 45c12de.txt} |  17 ++-
 .../requirements/{1db92d0.txt => e05a904.txt} |  17 ++-
 .../requirements/{1f657b3.txt => e8c8851.txt} |  17 ++-
 .../requirements/{8c9f21c.txt => f30dfc2.txt} |  17 ++-
 riotfile.py                                   |   3 +
 .../claude-3-5-sonnet-20240620.yaml           |  86 ++++++++++++++
 .../completion_vertex_ai/gemini-pro.yaml      | 110 ++++++++++++++++++
 tests/contrib/litellm/test_litellm.py         |  23 ++++
 ...t_litellm_completion_different_models.json |  27 +++++
 ...anthropic_claude-3-5-sonnet-20240620].json |  27 +++++
 10 files changed, 332 insertions(+), 12 deletions(-)
 rename .riot/requirements/{17b7978.txt => 45c12de.txt} (82%)
 rename .riot/requirements/{1db92d0.txt => e05a904.txt} (83%)
 rename .riot/requirements/{1f657b3.txt => e8c8851.txt} (82%)
 rename .riot/requirements/{8c9f21c.txt => f30dfc2.txt} (83%)
 create mode 100644 tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml
 create mode 100644 tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json
 create mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json

diff --git a/.riot/requirements/17b7978.txt b/.riot/requirements/45c12de.txt
similarity index 82%
rename from .riot/requirements/17b7978.txt
rename to .riot/requirements/45c12de.txt
index 798b258db60..c2da32fca1e 100644
--- a/.riot/requirements/17b7978.txt
+++ b/.riot/requirements/45c12de.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/17b7978.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/45c12de.in
 #
 aiohappyeyeballs==2.6.1
 aiohttp==3.11.14
@@ -10,14 +10,18 @@ aiosignal==1.3.2
 annotated-types==0.7.0
 anyio==4.9.0
 attrs==25.3.0
+boto3==1.37.17
+botocore==1.37.17
+cachetools==5.5.2
 certifi==2025.1.31
 charset-normalizer==3.4.1
 click==8.1.8
-coverage[toml]==7.7.0
+coverage[toml]==7.7.1
 distro==1.9.0
 filelock==3.18.0
 frozenlist==1.5.0
 fsspec==2025.3.0
+google-auth==2.38.0
 h11==0.14.0
 httpcore==1.0.7
 httpx==0.28.1
@@ -28,29 +32,36 @@ importlib-metadata==8.6.1
 iniconfig==2.1.0
 jinja2==3.1.6
 jiter==0.9.0
+jmespath==1.0.1
 jsonschema==4.23.0
 jsonschema-specifications==2024.10.1
 litellm==1.63.12
 markupsafe==3.0.2
 mock==5.2.0
 multidict==6.2.0
-openai==1.66.5
+openai==1.68.2
 opentracing==2.4.0
 packaging==24.2
 pluggy==1.5.0
 propcache==0.3.0
+pyasn1==0.6.1
+pyasn1-modules==0.4.1
 pydantic==2.10.6
 pydantic-core==2.27.2
 pytest==8.3.5
 pytest-asyncio==0.25.3
 pytest-cov==6.0.0
 pytest-mock==3.14.0
+python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
 pyyaml==6.0.2
 referencing==0.36.2
 regex==2024.11.6
 requests==2.32.3
 rpds-py==0.23.1
+rsa==4.9
+s3transfer==0.11.4
+six==1.17.0
 sniffio==1.3.1
 sortedcontainers==2.4.0
 tiktoken==0.9.0
diff --git a/.riot/requirements/1db92d0.txt b/.riot/requirements/e05a904.txt
similarity index 83%
rename from .riot/requirements/1db92d0.txt
rename to .riot/requirements/e05a904.txt
index e86bb4cb0aa..48afc8fdf4a 100644
--- a/.riot/requirements/1db92d0.txt
+++ b/.riot/requirements/e05a904.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1db92d0.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/e05a904.in
 #
 aiohappyeyeballs==2.6.1
 aiohttp==3.11.14
@@ -11,15 +11,19 @@ annotated-types==0.7.0
 anyio==4.9.0
 async-timeout==5.0.1
 attrs==25.3.0
+boto3==1.37.17
+botocore==1.37.17
+cachetools==5.5.2
 certifi==2025.1.31
 charset-normalizer==3.4.1
 click==8.1.8
-coverage[toml]==7.7.0
+coverage[toml]==7.7.1
 distro==1.9.0
 exceptiongroup==1.2.2
 filelock==3.18.0
 frozenlist==1.5.0
 fsspec==2025.3.0
+google-auth==2.38.0
 h11==0.14.0
 httpcore==1.0.7
 httpx==0.28.1
@@ -30,29 +34,36 @@ importlib-metadata==8.6.1
 iniconfig==2.1.0
 jinja2==3.1.6
 jiter==0.9.0
+jmespath==1.0.1
 jsonschema==4.23.0
 jsonschema-specifications==2024.10.1
 litellm==1.63.12
 markupsafe==3.0.2
 mock==5.2.0
 multidict==6.2.0
-openai==1.66.5
+openai==1.68.2
 opentracing==2.4.0
 packaging==24.2
 pluggy==1.5.0
 propcache==0.3.0
+pyasn1==0.6.1
+pyasn1-modules==0.4.1
 pydantic==2.10.6
 pydantic-core==2.27.2
 pytest==8.3.5
 pytest-asyncio==0.25.3
 pytest-cov==6.0.0
 pytest-mock==3.14.0
+python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
 pyyaml==6.0.2
 referencing==0.36.2
 regex==2024.11.6
 requests==2.32.3
 rpds-py==0.23.1
+rsa==4.9
+s3transfer==0.11.4
+six==1.17.0
 sniffio==1.3.1
 sortedcontainers==2.4.0
 tiktoken==0.9.0
diff --git a/.riot/requirements/1f657b3.txt b/.riot/requirements/e8c8851.txt
similarity index 82%
rename from .riot/requirements/1f657b3.txt
rename to .riot/requirements/e8c8851.txt
index d1a93e65777..a209020993c 100644
--- a/.riot/requirements/1f657b3.txt
+++ b/.riot/requirements/e8c8851.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1f657b3.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/e8c8851.in
 #
 aiohappyeyeballs==2.6.1
 aiohttp==3.11.14
@@ -10,14 +10,18 @@ aiosignal==1.3.2
 annotated-types==0.7.0
 anyio==4.9.0
 attrs==25.3.0
+boto3==1.37.17
+botocore==1.37.17
+cachetools==5.5.2
 certifi==2025.1.31
 charset-normalizer==3.4.1
 click==8.1.8
-coverage[toml]==7.7.0
+coverage[toml]==7.7.1
 distro==1.9.0
 filelock==3.18.0
 frozenlist==1.5.0
 fsspec==2025.3.0
+google-auth==2.38.0
 h11==0.14.0
 httpcore==1.0.7
 httpx==0.28.1
@@ -28,29 +32,36 @@ importlib-metadata==8.6.1
 iniconfig==2.1.0
 jinja2==3.1.6
 jiter==0.9.0
+jmespath==1.0.1
 jsonschema==4.23.0
 jsonschema-specifications==2024.10.1
 litellm==1.63.12
 markupsafe==3.0.2
 mock==5.2.0
 multidict==6.2.0
-openai==1.66.5
+openai==1.68.2
 opentracing==2.4.0
 packaging==24.2
 pluggy==1.5.0
 propcache==0.3.0
+pyasn1==0.6.1
+pyasn1-modules==0.4.1
 pydantic==2.10.6
 pydantic-core==2.27.2
 pytest==8.3.5
 pytest-asyncio==0.25.3
 pytest-cov==6.0.0
 pytest-mock==3.14.0
+python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
 pyyaml==6.0.2
 referencing==0.36.2
 regex==2024.11.6
 requests==2.32.3
 rpds-py==0.23.1
+rsa==4.9
+s3transfer==0.11.4
+six==1.17.0
 sniffio==1.3.1
 sortedcontainers==2.4.0
 tiktoken==0.9.0
diff --git a/.riot/requirements/8c9f21c.txt b/.riot/requirements/f30dfc2.txt
similarity index 83%
rename from .riot/requirements/8c9f21c.txt
rename to .riot/requirements/f30dfc2.txt
index 4c7ee2bb6e4..c9092e0225c 100644
--- a/.riot/requirements/8c9f21c.txt
+++ b/.riot/requirements/f30dfc2.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.9
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/8c9f21c.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/f30dfc2.in
 #
 aiohappyeyeballs==2.6.1
 aiohttp==3.11.14
@@ -11,15 +11,19 @@ annotated-types==0.7.0
 anyio==4.9.0
 async-timeout==5.0.1
 attrs==25.3.0
+boto3==1.37.17
+botocore==1.37.17
+cachetools==5.5.2
 certifi==2025.1.31
 charset-normalizer==3.4.1
 click==8.1.8
-coverage[toml]==7.7.0
+coverage[toml]==7.7.1
 distro==1.9.0
 exceptiongroup==1.2.2
 filelock==3.18.0
 frozenlist==1.5.0
 fsspec==2025.3.0
+google-auth==2.38.0
 h11==0.14.0
 httpcore==1.0.7
 httpx==0.28.1
@@ -30,29 +34,36 @@ importlib-metadata==8.6.1
 iniconfig==2.1.0
 jinja2==3.1.6
 jiter==0.9.0
+jmespath==1.0.1
 jsonschema==4.23.0
 jsonschema-specifications==2024.10.1
 litellm==1.63.12
 markupsafe==3.0.2
 mock==5.2.0
 multidict==6.2.0
-openai==1.66.5
+openai==1.68.2
 opentracing==2.4.0
 packaging==24.2
 pluggy==1.5.0
 propcache==0.3.0
+pyasn1==0.6.1
+pyasn1-modules==0.4.1
 pydantic==2.10.6
 pydantic-core==2.27.2
 pytest==8.3.5
 pytest-asyncio==0.25.3
 pytest-cov==6.0.0
 pytest-mock==3.14.0
+python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
 pyyaml==6.0.2
 referencing==0.36.2
 regex==2024.11.6
 requests==2.32.3
 rpds-py==0.23.1
+rsa==4.9
+s3transfer==0.11.4
+six==1.17.0
 sniffio==1.3.1
 sortedcontainers==2.4.0
 tiktoken==0.9.0
diff --git a/riotfile.py b/riotfile.py
index 5fcf5d52097..373d75c4d02 100644
--- a/riotfile.py
+++ b/riotfile.py
@@ -2584,6 +2584,9 @@ def select_pys(min_version: str = MIN_PYTHON_VERSION, max_version: str = MAX_PYT
                 "litellm": latest,
                 "vcrpy": latest,
                 "pytest-asyncio": latest,
+                "botocore": latest,
+                "boto3": latest,
+                "google-auth": latest,
             },
         ),
         Venv(
diff --git a/tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml b/tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml
new file mode 100644
index 00000000000..a719ac0af83
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml
@@ -0,0 +1,86 @@
+interactions:
+- request:
+    body: '{"model": "claude-3-5-sonnet-20240620", "messages": [{"role": "user", "content":
+      [{"type": "text", "text": "Hey, what is up?"}]}], "max_tokens": 4096}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '150'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - litellm/1.63.12
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA2RQTW/UQAz9K8aXvcyibNoCygWVU5dyQagnhCJ3xiSjztpp7Gm7Wu1/RwlUAnGy
+        9L70nk+YE3Z4sKFvdrd37Xhf33+gT5+/yXT7ZUdPX9s7DOjHiRcVm9HAGHDWsgBkls1JHAMeNHHB
+        DmOhmnh7sb3amoqwb9umvWzetQ0GjCrO4th9P72GOr8s9vV0eMOl6Bu4NiCB6z0UkqHSwLDGB9hD
+        Utk4jPTEMPFsKlSAXyaeM0tkA53hJ3PJMliA++qw3xxg5JnBFUYuExy1wnP2EUiO8FjZPKusRtcp
+        R1sEmwQlP6yelC1Ws7dwo88QSWAPv3evQa6Jjh/x/COguU79zGQq2CFL6r3Ogn8I48e6NMROaikB
+        6/rK7oRZpuq96wOLYbe7CBgpjtzHmWlp1v8raF75mSn9z2n1v5HLq/P5FwAAAP//AwAsXlFX5AEA
+        AA==
+    headers:
+      CF-RAY:
+      - 923fd79c4a4a7cfc-EWR
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 21 Mar 2025 19:26:40 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - 0280e0cf-573a-4392-b276-1b73319958fb
+      anthropic-ratelimit-input-tokens-limit:
+      - '20000'
+      anthropic-ratelimit-input-tokens-remaining:
+      - '20000'
+      anthropic-ratelimit-input-tokens-reset:
+      - '2025-03-21T19:26:40Z'
+      anthropic-ratelimit-output-tokens-limit:
+      - '4000'
+      anthropic-ratelimit-output-tokens-remaining:
+      - '4000'
+      anthropic-ratelimit-output-tokens-reset:
+      - '2025-03-21T19:26:40Z'
+      anthropic-ratelimit-requests-limit:
+      - '5'
+      anthropic-ratelimit-requests-remaining:
+      - '4'
+      anthropic-ratelimit-requests-reset:
+      - '2025-03-21T19:26:52Z'
+      anthropic-ratelimit-tokens-limit:
+      - '24000'
+      anthropic-ratelimit-tokens-remaining:
+      - '24000'
+      anthropic-ratelimit-tokens-reset:
+      - '2025-03-21T19:26:40Z'
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - req_01RRDNDcX3wjQFEMkLiTep47
+      via:
+      - 1.1 google
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml b/tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml
new file mode 100644
index 00000000000..5e712cce0bb
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml
@@ -0,0 +1,110 @@
+interactions:
+- request:
+    body: assertion=eyJ0eXAiOiAiSldUIiwgImFsZyI6ICJSUzI1NiIsICJraWQiOiAiZjc0ODI1ZGMzZDE4ZWU2YTY5Y2I2YTE0NmQ5OGUxNTg4YTM5YWU3YyJ9.eyJpYXQiOiAxNzQyNTg1NTY0LCAiZXhwIjogMTc0MjU4OTE2NCwgImlzcyI6ICJsbG1vYnMtdGVzdEBkYXRhZG9nLXNhbmRib3guaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iLCAiYXVkIjogImh0dHBzOi8vb2F1dGgyLmdvb2dsZWFwaXMuY29tL3Rva2VuIiwgInNjb3BlIjogImh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29tL2F1dGgvY2xvdWQtcGxhdGZvcm0ifQ.ZivXu-4DxUTH_3JzIngRTswHdKmxiPR_yFl7T7o7C7FSrp4zf0cHa-fid8jMBiwzOz0ooBny11AezGE0w5b15NvhbrQq3HDYdoHXGooo9yBOnhez7v5EaP8iMfpkcp0EW8DUdSUrs2-y9rYT67rA6KxxWcdQLPFyk15ka-FC3f1BsdF_c0CdoPfKEG0mpBj5OHvmwjE3L5GP-2OLgx75B9loCFs3npkEa74YfCJ5OZXHUAPgONXC9VxiXf7__Secb-sDqZLKnGi2HSwaTZJ7TWkLyVufp71IMWpYaExI9Qw2IPPok3h-tCRJjljjJ1kfFy4N0AZCv1STT3p7w8jxyA&grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '801'
+      Content-Type:
+      - application/x-www-form-urlencoded
+      User-Agent:
+      - python-requests/2.32.3
+      x-goog-api-client:
+      - gl-python/3.11.10 auth/2.38.0 auth-request-type/at cred-type/sa
+    method: POST
+    uri: https://oauth2.googleapis.com/token
+  response:
+    body:
+      string: '{"access_token":"ya29.c.c0ASRK0GZU0bfQUP0GYf4fuNe4BhHzumcFny6u68QFuxIsG64_oIXfd5scBCRKnTn4AavchTw0iXipb8jUGWZnRC-3IuQPmizs1oQAAWKL90jbwCqpwefQVEMkoPi6Sp1qs0RJjm3gjX_KVPPZlleAEHtWA-lxhbPkP56KRmOHaPWIC6z019UAO4wAuylihJSAq0QheNEW42e2E9NA6MAaCfMgARAvvrNhoaJ2NvvFPYTc_B4Ii8J-fdweojRHAn115d6k4LV0hDqdMeuuDmycrZPjXb6_DTRCYbXrbEjuSlHAAYLFqXaq1q1Uv_rzTt5yaGN8fQig3SZ0b6kQ959wq_6MWRrIlip3UnO3kAjMl7HKp8cZUS2l7sLAN385CjUXmxoq36By_bvYQRmavYvqlIxei6d7mym5I6Fnon1xb69jl2c4Ykatuey3yF2I195zhf1Q_nStxax5ikZoa8gztfzUQO8uz7wXt5zawkgYvvg_y4fZsU4J2gjb44encv0oaFlvw-uXS4WI-_meXXVp-vgexUvZaS6yXO31lhyiVe60BzFsanMMQu4UyJycm2bo4pfdOe6fg3uXZS2fvv8-551p9iWcixYswr-o6h_0FrucJQ0yp-wQ791mYaMx7q7a90iSaJ1s4OqeB_Mw4kl5nscbI-xjb1YVMMYd07MfMwS5fg-0Y-7lv2tYqJbk31hq_npBnvgX-96iupo6y2ZiXebMgIgBRgZxoqvQtsgBM4Jn2Wntjebw7Sy5UIqZOaleqz1nZjeXX7bw2VQkqrgFaXF-RdaBoxJF46Us0hhkRSdIkQMMQmbOf692lo6Sjwvc2ZkziieWp1-am1uaezpamYnckZy0g8eQoZY_S6dgMpMtxcS9Jinf5c2qmMxvIwQdzx3hjt3lsedhre3mhfQvf7ypS-j5g1JVxhyxRaw01jF7tycWp4ecbuVccwwWati9M-qpQ254Mtx89v1Rt85Xh7U66lu12cJc0oktJ7nX766QlZSsx-qf1ri","expires_in":3599,"token_type":"Bearer"}'
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Fri, 21 Mar 2025 19:32:45 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"contents":[{"role":"user","parts":[{"text":"Hey, what is up?"}]}],"generationConfig":{"candidate_count":1}}'
+    headers:
+      accept:
+      - '*/*'
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '109'
+      content-type:
+      - application/json
+      host:
+      - us-central1-aiplatform.googleapis.com
+      user-agent:
+      - litellm/1.63.12
+    method: POST
+    uri: https://us-central1-aiplatform.googleapis.com/v1/projects/datadog-sandbox/locations/us-central1/publishers/google/models/gemini-pro:generateContent
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAC/7VVXW/aMBR951d4edkLIMf5IOGlYtSCTBQQSbvSqUKGuMFbiFHstEUV/312AjRM
+        6sNUlofo5n4c33MdH781ADBWJItZTCQVRhf8VB4A3sq3jvFM0kyqwNGlnDlPqfIYGx7T1Gi+B7Yk
+        l+8g1fNWs1WKpK8azRiyL2DMJdgUq3UT/CqEBGuSJSxLAC8kYBmQawpiljBJUvDC8zRughfCpM6Q
+        HCwpWNN0+1SkbTDkL4AsddmOF1fgx5rIrwIkvATLNJby5xWIqo3J7sqodbU/2Y8Ha3/kZDyxjIn1
+        jBLBM912GE2mJ8aGIE9U7mZEN3XOu85aDVjShOe7kndvdrPo9yI8mMzmi6EyFuEU4/6wNkc9yZwv
+        yZKlTJZVYzwYBYPg2wh/mBaueK53BbahBVHHsi3vLFXQZ5of4MomQnyHZ0E0X3wEfqyoISPo+y70
+        7VPavvmvlK974wGeTW7DRX8yjvA4uiBx3zNt27Euz9uGyLMQtD7BW331wvDmwoShpdv6Dxtt+h3k
+        Ou4nCIf4/rY3Gs0X+H46CvrBBXkj3/aR6V3+BzehCTs2Qu+0D9bj6cyT52TEE92WPvEtVeO5jqlO
+        hoNcz3N8r5qZLiyLjEKQhN5QSZTEkpOQamKbrYz4b5r1eVFKrFstUhPks7CFDnHJlSaehw6jqKOK
+        a7UmS+u6VNNwpd3kOPsI39c3R+HXmzpqYqM2h79bvNBaFjpfrHH476qb5o7mglUynNCNEuaW2YYt
+        xbgFISoxjVVOVU8R25T3E4LIaUGrhczI9LsW6tpO24KO7ZsPVXpOxZZnggaxTreWnfjBn4ZYuP2N
+        H0xzu/Pd6wVGY9/4A9Z/lKYoBwAA
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Fri, 21 Mar 2025 19:32:46 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py
index f1bd6c1f455..b8b113227b0 100644
--- a/tests/contrib/litellm/test_litellm.py
+++ b/tests/contrib/litellm/test_litellm.py
@@ -1,4 +1,5 @@
 import pytest
+import os
 
 from tests.contrib.litellm.utils import get_cassette_name
 
@@ -47,3 +48,25 @@ async def test_litellm_atext_completion(litellm, request_vcr, stream, n):
             stream=stream,
             n=n,
         )
+
+@pytest.mark.parametrize("model", ["vertex_ai/gemini-pro", "anthropic/claude-3-5-sonnet-20240620"])
+@pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion_different_models")
+def test_litellm_completion_different_models(litellm, request_vcr, model):
+    aws_access_key_id = ""
+    aws_secret_access_key = ""
+    aws_region_name = ""
+    if model == "bedrock/amazon.titan-text-lite-v1":
+        aws_access_key_id = "ASIAWYLNJGWWOJPUYN45"
+        aws_secret_access_key = "z99X8m2gwAHrXGH8Owd0gzHq5ndHahRW0nEU8xbu"
+        aws_region_name = "us-east-1"
+    with request_vcr.use_cassette(f"completion_{model}.yaml"):
+        messages = [{ "content": "Hey, what is up?","role": "user"}]
+        litellm.completion(
+            model=model,
+            messages=messages,
+            stream=False,
+            n=1,
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            aws_region_name=aws_region_name,
+        )
\ No newline at end of file
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json
new file mode 100644
index 00000000000..3f4326ea8f5
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json
@@ -0,0 +1,27 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.completion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67ddbedc00000000",
+      "language": "python",
+      "litellm.request.model": "vertex_ai/gemini-pro",
+      "runtime-id": "0b4c7b69fe1b4d69a2cf9a6cb3449ea9"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 4385
+    },
+    "duration": 1211993000,
+    "start": 1742585564858520000
+  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json
new file mode 100644
index 00000000000..852a1cdfd4a
--- /dev/null
+++ b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json
@@ -0,0 +1,27 @@
+[[
+  {
+    "name": "litellm.request",
+    "service": "tests.contrib.litellm",
+    "resource": "litellm.completion",
+    "trace_id": 0,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "67ddbede00000000",
+      "language": "python",
+      "litellm.request.model": "anthropic/claude-3-5-sonnet-20240620",
+      "runtime-id": "0b4c7b69fe1b4d69a2cf9a6cb3449ea9"
+    },
+    "metrics": {
+      "_dd.measured": 1,
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "process_id": 4385
+    },
+    "duration": 11249000,
+    "start": 1742585566097702000
+  }]]

From 0adc8e4e26bb9de8e0863246c6ad5c1bfb49e85b Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Fri, 21 Mar 2025 15:42:19 -0400
Subject: [PATCH 08/61] add global tags test

---
 tests/contrib/litellm/test_litellm.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py
index b8b113227b0..05cedf8307e 100644
--- a/tests/contrib/litellm/test_litellm.py
+++ b/tests/contrib/litellm/test_litellm.py
@@ -1,8 +1,31 @@
 import pytest
-import os
 
+from tests.utils import override_global_config
 from tests.contrib.litellm.utils import get_cassette_name
 
+def test_global_tags(ddtrace_config_litellm, litellm, request_vcr, mock_tracer):
+    """
+    When the global config UST tags are set
+        The service name should be used for all data
+        The env should be used for all data
+        The version should be used for all data
+    """
+    with override_global_config(dict(service="test-svc", env="staging", version="1234")):
+        cassette_name = "completion.yaml"
+        with request_vcr.use_cassette(cassette_name):
+            messages = [{ "content": "Hey, what is up?","role": "user"}]
+            litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+            )
+
+    span = mock_tracer.pop_traces()[0][0]
+    assert span.resource == "litellm.completion"
+    assert span.service == "test-svc"
+    assert span.get_tag("env") == "staging"
+    assert span.get_tag("version") == "1234"
+    assert span.get_tag("litellm.request.model") == "gpt-3.5-turbo"
+
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
 @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
 def test_litellm_completion(litellm, request_vcr, stream, n):

From 800fcf4300236ce05baa46417ad97f1da42ae047 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Fri, 21 Mar 2025 15:48:10 -0400
Subject: [PATCH 09/61] add release note

---
 releasenotes/notes/litellm-tracing-f62a4467d9b50120.yaml | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 releasenotes/notes/litellm-tracing-f62a4467d9b50120.yaml

diff --git a/releasenotes/notes/litellm-tracing-f62a4467d9b50120.yaml b/releasenotes/notes/litellm-tracing-f62a4467d9b50120.yaml
new file mode 100644
index 00000000000..704d57d1e20
--- /dev/null
+++ b/releasenotes/notes/litellm-tracing-f62a4467d9b50120.yaml
@@ -0,0 +1,6 @@
+---
+features:
+  - |
+    litellm: Introduces tracing support for the LiteLLM Python SDK's sync and async ``completion`` and ``text_completion`` methods.
+      See `the docs <https://ddtrace.readthedocs.io/en/stable/integrations.html#litellm>`
+      for more information.
\ No newline at end of file

From 1cc21e10e9dc2df7a05fe7c7505d36a913e7b147 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Fri, 21 Mar 2025 15:53:37 -0400
Subject: [PATCH 10/61] run black

---
 ddtrace/contrib/internal/litellm/patch.py |  2 +-
 ddtrace/contrib/internal/litellm/utils.py |  2 --
 ddtrace/contrib/litellm/__init__.py       |  2 +-
 ddtrace/llmobs/_integrations/litellm.py   |  2 +-
 tests/contrib/litellm/conftest.py         |  2 ++
 tests/contrib/litellm/test_litellm.py     | 20 +++++++++++++-------
 tests/contrib/litellm/utils.py            |  4 +++-
 7 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 4cee84768b9..de258c3080a 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -94,4 +94,4 @@ def unpatch():
     unwrap(litellm, "text_completion")
     unwrap(litellm, "atext_completion")
 
-    delattr(litellm, "_datadog_integration")
\ No newline at end of file
+    delattr(litellm, "_datadog_integration")
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index f9fc64dfeee..f4290ae06a2 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -1,5 +1,3 @@
 def tag_request(span, kwargs):
     if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]:
         span.set_tag_str("litellm.host", kwargs["metadata"]["headers"]["host"])
-        
-
diff --git a/ddtrace/contrib/litellm/__init__.py b/ddtrace/contrib/litellm/__init__.py
index af284d63775..0ca7e276331 100644
--- a/ddtrace/contrib/litellm/__init__.py
+++ b/ddtrace/contrib/litellm/__init__.py
@@ -11,4 +11,4 @@
         from ddtrace.contrib.internal.litellm.patch import patch
         from ddtrace.contrib.internal.litellm.patch import unpatch
 
-        __all__ = ["patch", "unpatch", "get_version"]
\ No newline at end of file
+        __all__ = ["patch", "unpatch", "get_version"]
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index 9602cf7336c..fd448ca9491 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -16,4 +16,4 @@ def _set_base_span_tags(
         if provider is not None:
             span.set_tag_str("litellm.request.provider", provider)
         if model is not None:
-            span.set_tag_str("litellm.request.model", model)
\ No newline at end of file
+            span.set_tag_str("litellm.request.model", model)
diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
index 590b1986652..9f88e2f5921 100644
--- a/tests/contrib/litellm/conftest.py
+++ b/tests/contrib/litellm/conftest.py
@@ -12,6 +12,7 @@
 from tests.utils import override_global_config
 from tests.contrib.litellm.utils import get_request_vcr
 
+
 def default_global_config():
     return {}
 
@@ -20,6 +21,7 @@ def default_global_config():
 def ddtrace_global_config():
     return {}
 
+
 @pytest.fixture
 def ddtrace_config_litellm():
     return {}
diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py
index 05cedf8307e..4d83d1c2209 100644
--- a/tests/contrib/litellm/test_litellm.py
+++ b/tests/contrib/litellm/test_litellm.py
@@ -3,6 +3,7 @@
 from tests.utils import override_global_config
 from tests.contrib.litellm.utils import get_cassette_name
 
+
 def test_global_tags(ddtrace_config_litellm, litellm, request_vcr, mock_tracer):
     """
     When the global config UST tags are set
@@ -13,7 +14,7 @@ def test_global_tags(ddtrace_config_litellm, litellm, request_vcr, mock_tracer):
     with override_global_config(dict(service="test-svc", env="staging", version="1234")):
         cassette_name = "completion.yaml"
         with request_vcr.use_cassette(cassette_name):
-            messages = [{ "content": "Hey, what is up?","role": "user"}]
+            messages = [{"content": "Hey, what is up?", "role": "user"}]
             litellm.completion(
                 model="gpt-3.5-turbo",
                 messages=messages,
@@ -26,11 +27,12 @@ def test_global_tags(ddtrace_config_litellm, litellm, request_vcr, mock_tracer):
     assert span.get_tag("version") == "1234"
     assert span.get_tag("litellm.request.model") == "gpt-3.5-turbo"
 
+
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
 @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
 def test_litellm_completion(litellm, request_vcr, stream, n):
     with request_vcr.use_cassette(get_cassette_name(stream, n)):
-        messages = [{ "content": "Hey, what is up?","role": "user"}]
+        messages = [{"content": "Hey, what is up?", "role": "user"}]
         litellm.completion(
             model="gpt-3.5-turbo",
             messages=messages,
@@ -38,11 +40,12 @@ def test_litellm_completion(litellm, request_vcr, stream, n):
             n=n,
         )
 
+
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
 @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
 async def test_litellm_acompletion(litellm, request_vcr, stream, n):
     with request_vcr.use_cassette(get_cassette_name(stream, n)):
-        messages = [{ "content": "Hey, what is up?","role": "user"}]
+        messages = [{"content": "Hey, what is up?", "role": "user"}]
         await litellm.acompletion(
             model="gpt-3.5-turbo",
             messages=messages,
@@ -50,7 +53,8 @@ async def test_litellm_acompletion(litellm, request_vcr, stream, n):
             n=n,
         )
 
-@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)]) 
+
+@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
 @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
 def test_litellm_text_completion(litellm, request_vcr, stream, n):
     with request_vcr.use_cassette(get_cassette_name(stream, n)):
@@ -61,7 +65,8 @@ def test_litellm_text_completion(litellm, request_vcr, stream, n):
             n=n,
         )
 
-@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])  
+
+@pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
 @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
 async def test_litellm_atext_completion(litellm, request_vcr, stream, n):
     with request_vcr.use_cassette(get_cassette_name(stream, n)):
@@ -72,6 +77,7 @@ async def test_litellm_atext_completion(litellm, request_vcr, stream, n):
             n=n,
         )
 
+
 @pytest.mark.parametrize("model", ["vertex_ai/gemini-pro", "anthropic/claude-3-5-sonnet-20240620"])
 @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion_different_models")
 def test_litellm_completion_different_models(litellm, request_vcr, model):
@@ -83,7 +89,7 @@ def test_litellm_completion_different_models(litellm, request_vcr, model):
         aws_secret_access_key = "z99X8m2gwAHrXGH8Owd0gzHq5ndHahRW0nEU8xbu"
         aws_region_name = "us-east-1"
     with request_vcr.use_cassette(f"completion_{model}.yaml"):
-        messages = [{ "content": "Hey, what is up?","role": "user"}]
+        messages = [{"content": "Hey, what is up?", "role": "user"}]
         litellm.completion(
             model=model,
             messages=messages,
@@ -92,4 +98,4 @@ def test_litellm_completion_different_models(litellm, request_vcr, model):
             aws_access_key_id=aws_access_key_id,
             aws_secret_access_key=aws_secret_access_key,
             aws_region_name=aws_region_name,
-        )
\ No newline at end of file
+        )
diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py
index f95503dce6c..62eb8fe6334 100644
--- a/tests/contrib/litellm/utils.py
+++ b/tests/contrib/litellm/utils.py
@@ -3,6 +3,7 @@
 
 CASETTE_EXTENSION = ".yaml"
 
+
 # VCR is used to capture and store network requests made to Anthropic.
 # This is done to avoid making real calls to the API which could introduce
 # flakiness and cost.
@@ -16,8 +17,9 @@ def get_request_vcr():
         ignore_localhost=True,
     )
 
+
 # Get the name of the cassette to use for a given test
-# All LiteLLM requests that use Open AI get routed to the chat completions endpoint, 
+# All LiteLLM requests that use Open AI get routed to the chat completions endpoint,
 # so we can reuse the same cassette for each combination of stream and n
 def get_cassette_name(stream, n):
     stream_suffix = "_stream" if stream else ""

From b95d96f82dcc7ed421993c22399748cd55b6ff6e Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Fri, 21 Mar 2025 15:57:39 -0400
Subject: [PATCH 11/61] fix requirements lock file

---
 lib-injection/sources/min_compatible_versions.csv | 2 ++
 min_compatible_versions.csv                       | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/lib-injection/sources/min_compatible_versions.csv b/lib-injection/sources/min_compatible_versions.csv
index ee7990e276a..24a7adb2052 100644
--- a/lib-injection/sources/min_compatible_versions.csv
+++ b/lib-injection/sources/min_compatible_versions.csv
@@ -67,6 +67,7 @@ flask-caching,~=1.10.0
 flask-openapi3,0
 gevent,~=20.12.0
 google-ai-generativelanguage,0
+google-auth,0
 google-generativeai,0
 googleapis-common-protos,0
 graphene,~=3.0.0
@@ -94,6 +95,7 @@ langchain-core,==0.1.52
 langchain-openai,==0.1.6
 langchain-pinecone,==0.1.0
 langgraph,~=0.2.60
+litellm,0
 logbook,~=1.0.0
 loguru,~=0.4.0
 lxml,0
diff --git a/min_compatible_versions.csv b/min_compatible_versions.csv
index ee7990e276a..24a7adb2052 100644
--- a/min_compatible_versions.csv
+++ b/min_compatible_versions.csv
@@ -67,6 +67,7 @@ flask-caching,~=1.10.0
 flask-openapi3,0
 gevent,~=20.12.0
 google-ai-generativelanguage,0
+google-auth,0
 google-generativeai,0
 googleapis-common-protos,0
 graphene,~=3.0.0
@@ -94,6 +95,7 @@ langchain-core,==0.1.52
 langchain-openai,==0.1.6
 langchain-pinecone,==0.1.0
 langgraph,~=0.2.60
+litellm,0
 logbook,~=1.0.0
 loguru,~=0.4.0
 lxml,0

From 9f63ceeb0318257a3f13fa590562f95656e3a56b Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Fri, 21 Mar 2025 16:02:14 -0400
Subject: [PATCH 12/61] remove unnecessary bedrock credentials in tests

---
 tests/contrib/litellm/test_litellm.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py
index 4d83d1c2209..6cb65fd69f6 100644
--- a/tests/contrib/litellm/test_litellm.py
+++ b/tests/contrib/litellm/test_litellm.py
@@ -81,13 +81,6 @@ async def test_litellm_atext_completion(litellm, request_vcr, stream, n):
 @pytest.mark.parametrize("model", ["vertex_ai/gemini-pro", "anthropic/claude-3-5-sonnet-20240620"])
 @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion_different_models")
 def test_litellm_completion_different_models(litellm, request_vcr, model):
-    aws_access_key_id = ""
-    aws_secret_access_key = ""
-    aws_region_name = ""
-    if model == "bedrock/amazon.titan-text-lite-v1":
-        aws_access_key_id = "ASIAWYLNJGWWOJPUYN45"
-        aws_secret_access_key = "z99X8m2gwAHrXGH8Owd0gzHq5ndHahRW0nEU8xbu"
-        aws_region_name = "us-east-1"
     with request_vcr.use_cassette(f"completion_{model}.yaml"):
         messages = [{"content": "Hey, what is up?", "role": "user"}]
         litellm.completion(
@@ -95,7 +88,4 @@ def test_litellm_completion_different_models(litellm, request_vcr, model):
             messages=messages,
             stream=False,
             n=1,
-            aws_access_key_id=aws_access_key_id,
-            aws_secret_access_key=aws_secret_access_key,
-            aws_region_name=aws_region_name,
         )

From a066c99f2d0bfb98c6ccb1cf6d22d2af5c89cc44 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Sun, 23 Mar 2025 14:25:53 -0400
Subject: [PATCH 13/61] add documentation

---
 ddtrace/contrib/_litellm.py               | 47 +++++++++++++++++++++++
 ddtrace/contrib/internal/litellm/utils.py |  2 +-
 ddtrace/contrib/litellm/__init__.py       | 14 -------
 ddtrace/llmobs/_integrations/litellm.py   |  5 +--
 4 files changed, 49 insertions(+), 19 deletions(-)
 create mode 100644 ddtrace/contrib/_litellm.py
 delete mode 100644 ddtrace/contrib/litellm/__init__.py

diff --git a/ddtrace/contrib/_litellm.py b/ddtrace/contrib/_litellm.py
new file mode 100644
index 00000000000..026d11250ca
--- /dev/null
+++ b/ddtrace/contrib/_litellm.py
@@ -0,0 +1,47 @@
+"""
+The LiteLLM integration instruments the LiteLLM Python SDK's sync and async ``completion`` and ``text_completion`` methods.
+
+All traces submitted from the LiteLLM integration are tagged by:
+
+- ``service``, ``env``, ``version``: see the `Unified Service Tagging docs <https://docs.datadoghq.com/getting_started/tagging/unified_service_tagging>`_.
+- ``litellm.request.model``: Model used in the request. This may be just the model name (e.g. ``gpt-3.5-turbo``) or the model name with the route defined (e.g. ``openai/gpt-3.5-turbo``).
+- ``litellm.request.host``: Host where the request is sent (if specified).
+
+
+Enabling
+~~~~~~~~
+
+The LiteLLM integration is enabled automatically when you use
+:ref:`ddtrace-run<ddtracerun>` or :ref:`import ddtrace.auto<ddtraceauto>`.
+
+Alternatively, use :func:`patch() <ddtrace.patch>` to manually enable the LiteLLM integration::
+
+    from ddtrace import config, patch
+
+    patch(litellm=True)
+
+
+Global Configuration
+~~~~~~~~~~~~~~~~~~~~
+
+.. py:data:: ddtrace.config.litellm["service"]
+
+   The service name reported by default for LiteLLM requests.
+
+   Alternatively, you can set this option with the ``DD_SERVICE`` or ``DD_LITELLM_SERVICE`` environment
+   variables.
+
+   Default: ``DD_SERVICE``
+
+
+Instance Configuration
+~~~~~~~~~~~~~~~~~~~~~~
+
+To configure the LiteLLM integration on a per-instance basis use the
+``Pin`` API::
+
+    import litellm
+    from ddtrace import Pin, config
+
+    Pin.override(litellm, service="my-litellm-service")
+"""  # noqa: E501
\ No newline at end of file
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index f4290ae06a2..bd6ec1aa289 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -1,3 +1,3 @@
 def tag_request(span, kwargs):
     if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]:
-        span.set_tag_str("litellm.host", kwargs["metadata"]["headers"]["host"])
+        span.set_tag_str("litellm.request.host", kwargs["metadata"]["headers"]["host"])
diff --git a/ddtrace/contrib/litellm/__init__.py b/ddtrace/contrib/litellm/__init__.py
deleted file mode 100644
index 0ca7e276331..00000000000
--- a/ddtrace/contrib/litellm/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# TODO: documentation
-
-from ddtrace.internal.utils.importlib import require_modules
-
-
-required_modules = ["litellm"]
-
-with require_modules(required_modules) as missing_modules:
-    if not missing_modules:
-        from ddtrace.contrib.internal.litellm.patch import get_version
-        from ddtrace.contrib.internal.litellm.patch import patch
-        from ddtrace.contrib.internal.litellm.patch import unpatch
-
-        __all__ = ["patch", "unpatch", "get_version"]
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index fd448ca9491..4dba18be518 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -8,12 +8,9 @@
 
 class LiteLLMIntegration(BaseLLMIntegration):
     _integration_name = "litellm"
-    _provider_map = {}
 
     def _set_base_span_tags(
-        self, span: Span, provider: Optional[str] = None, model: Optional[str] = None, **kwargs: Dict[str, Any]
+        self, span: Span, model: Optional[str] = None, **kwargs: Dict[str, Any]
     ) -> None:
-        if provider is not None:
-            span.set_tag_str("litellm.request.provider", provider)
         if model is not None:
             span.set_tag_str("litellm.request.model", model)

From 010f85e527dcac9b5f91dafbe66e63998b266632 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 25 Mar 2025 12:46:23 -0400
Subject: [PATCH 14/61] add llmobs base span tag method for litellm

---
 ddtrace/contrib/internal/litellm/patch.py |  3 +--
 ddtrace/llmobs/_integrations/litellm.py   | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index de258c3080a..e35f48f0d4e 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -20,8 +20,7 @@
 )
 
 
-def get_version():
-    # type: () -> str
+def get_version() -> str:
     try:
         return version("litellm")
     except Exception:
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index 4dba18be518..7f823dea327 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -1,16 +1,39 @@
 from typing import Any
 from typing import Dict
+from typing import List
 from typing import Optional
 
+from ddtrace.llmobs._constants import METRICS
+from ddtrace.llmobs._constants import MODEL_NAME
+from ddtrace.llmobs._constants import MODEL_PROVIDER
+from ddtrace.llmobs._constants import SPAN_KIND
 from ddtrace.trace import Span
 from ddtrace.llmobs._integrations.base import BaseLLMIntegration
 
 
 class LiteLLMIntegration(BaseLLMIntegration):
     _integration_name = "litellm"
+    _provider_map = {}
 
     def _set_base_span_tags(
         self, span: Span, model: Optional[str] = None, **kwargs: Dict[str, Any]
     ) -> None:
         if model is not None:
             span.set_tag_str("litellm.request.model", model)
+
+    def _llmobs_set_tags(
+        self,
+        span: Span,
+        args: List[Any],
+        kwargs: Dict[str, Any],
+        response: Optional[Any] = None,
+        operation: str = "",
+    ) -> None:
+        model_name = span.get_tag("litellm.request.model")
+
+        # TODO: populate the provider map
+        model_provider = self._provider_map.get(model_name, "")
+
+        span._set_ctx_items(
+            {SPAN_KIND: "llm", MODEL_NAME: model_name or "", MODEL_PROVIDER: model_provider}
+        )
\ No newline at end of file

From 528a0ec0ae0be9691f17ee3b165d51590ca152ff Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 25 Mar 2025 16:34:40 -0400
Subject: [PATCH 15/61] add provider tagging and stream capturing support

---
 ddtrace/contrib/internal/litellm/patch.py |  74 ++++++++--
 ddtrace/contrib/internal/litellm/utils.py | 170 ++++++++++++++++++++++
 ddtrace/llmobs/_integrations/litellm.py   |   7 +-
 3 files changed, 239 insertions(+), 12 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index e35f48f0d4e..67e2af78897 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -9,6 +9,8 @@
 from ddtrace.contrib.trace_utils import with_traced_module
 from ddtrace.contrib.trace_utils import wrap
 from ddtrace.contrib.internal.litellm.utils import tag_request
+from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMStream
+from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMAsyncStream
 from ddtrace.llmobs._integrations import LiteLLMIntegration
 from ddtrace.trace import Pin
 from ddtrace.internal.utils import get_argument_value
@@ -16,7 +18,10 @@
 
 config._add(
     "litellm",
-    {},
+    {
+        "span_prompt_completion_sample_rate": float(os.getenv("DD_LITELLM_SPAN_PROMPT_COMPLETION_SAMPLE_RATE", 1.0)),
+        "span_char_limit": int(os.getenv("DD_LITELLM_SPAN_CHAR_LIMIT", 128)),
+    },
 )
 
 
@@ -29,43 +34,92 @@ def get_version() -> str:
 
 def _create_span(litellm, pin, func, instance, args, kwargs):
     """Helper function to create and configure a traced span."""
-    integration = litellm._datadog_integration
     model = get_argument_value(args, kwargs, 0, "model", None)
+    integration = litellm._datadog_integration
+    base_url = kwargs.get("api_base", None)
     span = integration.trace(
         pin,
         "litellm.%s" % func.__name__,
         model=model,
-        submit_to_llmobs=False,
+        submit_to_llmobs=integration.should_submit_to_llmobs(base_url),
     )
     return span
 
 
 @with_traced_module
 def traced_completion(litellm, pin, func, instance, args, kwargs):
+    return _traced_completion(litellm, pin, func, instance, args, kwargs, False)
+
+@with_traced_module
+async def traced_acompletion(litellm, pin, func, instance, args, kwargs):
+    return await _traced_acompletion(litellm, pin, func, instance, args, kwargs, False)
+
+@with_traced_module
+def traced_text_completion(litellm, pin, func, instance, args, kwargs):
+    return _traced_completion(litellm, pin, func, instance, args, kwargs, True)
+
+@with_traced_module
+async def traced_atext_completion(litellm, pin, func, instance, args, kwargs):
+    return await _traced_acompletion(litellm, pin, func, instance, args, kwargs)
+
+
+def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion):
+    integration = litellm._datadog_integration
     span = _create_span(litellm, pin, func, instance, args, kwargs)
+    stream = kwargs.get("stream", False)
     tag_request(span, kwargs)
+    resp = None
     try:
-        return func(*args, **kwargs)
+        resp = func(*args, **kwargs)
+        if stream:
+            return TracedLiteLLMStream(
+                resp, integration, span, args, kwargs, is_completion
+            )
+        return resp
     except Exception:
         span.set_exc_info(*sys.exc_info())
         raise
     finally:
+        # streamed spans will be finished separately once the stream generator is exhausted
+        if span.error or not stream:
+            integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp)
         span.finish()
 
 
-@with_traced_module
-async def traced_acompletion(litellm, pin, func, instance, args, kwargs):
+async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_completion):
+    integration = litellm._datadog_integration
     span = _create_span(litellm, pin, func, instance, args, kwargs)
+    stream = kwargs.get("stream", False)
     tag_request(span, kwargs)
+    resp = None
     try:
-        return await func(*args, **kwargs)
+        resp = await func(*args, **kwargs)
+        if stream:
+            return TracedLiteLLMAsyncStream(
+                resp, integration, span, args, kwargs, is_completion
+            )
+        return resp
     except Exception:
         span.set_exc_info(*sys.exc_info())
         raise
     finally:
+        # streamed spans will be finished separately once the stream generator is exhausted
+        if span.error or not stream:
+            integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp)
         span.finish()
 
 
+@with_traced_module
+def traced_get_llm_provider(litellm, pin, func, instance, args, kwargs):
+    requested_model = get_argument_value(args, kwargs, 0, "model", None)
+    integration = litellm._datadog_integration
+    model, custom_llm_provider, dynamic_api_key, api_base = func(*args, **kwargs)
+    # Store the provider information in the integration
+    integration._provider_map[requested_model] = custom_llm_provider
+    integration._provider_map[model] = custom_llm_provider
+    return model, custom_llm_provider, dynamic_api_key, api_base
+
+
 def patch():
     if getattr(litellm, "_datadog_patch", False):
         return
@@ -78,8 +132,9 @@ def patch():
 
     wrap("litellm", "completion", traced_completion(litellm))
     wrap("litellm", "acompletion", traced_acompletion(litellm))
-    wrap("litellm", "text_completion", traced_completion(litellm))
-    wrap("litellm", "atext_completion", traced_acompletion(litellm))
+    wrap("litellm", "text_completion", traced_text_completion(litellm))
+    wrap("litellm", "atext_completion", traced_atext_completion(litellm))
+    wrap("litellm", "get_llm_provider", traced_get_llm_provider(litellm))
 
 
 def unpatch():
@@ -92,5 +147,6 @@ def unpatch():
     unwrap(litellm, "acompletion")
     unwrap(litellm, "text_completion")
     unwrap(litellm, "atext_completion")
+    unwrap(litellm, "get_llm_provider")
 
     delattr(litellm, "_datadog_integration")
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index bd6ec1aa289..d16d33f5ddd 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -1,3 +1,173 @@
+import sys
+from typing import Any
+from typing import Dict
+from typing import List
+
+from ddtrace.internal.logger import get_logger
+
+log = get_logger(__name__)
+
 def tag_request(span, kwargs):
     if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]:
         span.set_tag_str("litellm.request.host", kwargs["metadata"]["headers"]["host"])
+
+class BaseTracedLiteLLMStream:
+    def __init__(self, generator, integration, span, args, kwargs, is_completion=False):
+        n = kwargs.get("n", 1) or 1
+        self._generator = generator
+        self._dd_integration = integration
+        self._dd_span = span
+        self._args = args
+        self._kwargs = kwargs
+        self._streamed_chunks = [[] for _ in range(n)]
+        self._is_completion = is_completion
+
+
+class TracedLiteLLMStream(BaseTracedLiteLLMStream):
+    def __enter__(self):
+        self._generator.__enter__()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self._generator.__exit__(exc_type, exc_val, exc_tb)
+
+    def __iter__(self):
+        exception_raised = False
+        try:
+            for chunk in self._generator:
+                self._extract_token_chunk(chunk)
+                yield chunk
+                _loop_handler(chunk, self._streamed_chunks)
+        except Exception:
+            self._dd_span.set_exc_info(*sys.exc_info())
+            exception_raised = True
+            raise
+        finally:
+            if not exception_raised:
+                _process_finished_stream(
+                    self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
+                )
+            self._dd_span.finish()
+
+class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream):
+    async def __aenter__(self):
+        await self._generator.__aenter__()
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self._generator.__aexit__(exc_type, exc_val, exc_tb)
+
+    async def __aiter__(self):
+        exception_raised = False
+        try:
+            async for chunk in self._generator:
+                yield chunk
+                _loop_handler(chunk, self._streamed_chunks)
+        except Exception:
+            self._dd_span.set_exc_info(*sys.exc_info())
+            exception_raised = True
+            raise
+        finally:
+            if not exception_raised:
+                _process_finished_stream(
+                    self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
+                )
+            self._dd_span.finish()
+
+def _loop_handler(chunk, streamed_chunks):
+    """Appends the chunk to the correct index in the streamed_chunks list.
+
+    When handling a streamed chat/completion response, this function is called for each chunk in the streamed response.
+    """
+    for choice in chunk.choices:
+        streamed_chunks[choice.index].append(choice)
+    if getattr(chunk, "usage", None):
+        streamed_chunks[0].insert(0, chunk)
+
+
+def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_completion=False):
+    try:
+        if is_completion:
+            formatted_completions = [_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks]
+        else:
+            formatted_completions = [_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks]
+        operation = "completion" if is_completion else "chat"
+        integration.llmobs_set_tags(span, args=[], kwargs=kwargs, response=formatted_completions, operation=operation)
+    except Exception:
+        log.warning("Error processing streamed completion/chat response.", exc_info=True)
+
+
+def _construct_completion_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]:
+    """Constructs a completion dictionary of form {"text": "...", "finish_reason": "..."} from streamed chunks."""
+    if not streamed_chunks:
+        return {"text": ""}
+    completion = {"text": "".join(c.text for c in streamed_chunks if getattr(c, "text", None))}
+    if streamed_chunks[-1].finish_reason is not None:
+        completion["finish_reason"] = streamed_chunks[-1].finish_reason
+    if hasattr(streamed_chunks[0], "usage"):
+        completion["usage"] = streamed_chunks[0].usage
+    return completion
+
+
+def _construct_tool_call_from_streamed_chunk(stored_tool_calls, tool_call_chunk=None, function_call_chunk=None):
+    """Builds a tool_call dictionary from streamed function_call/tool_call chunks."""
+    if function_call_chunk:
+        if not stored_tool_calls:
+            stored_tool_calls.append({"name": getattr(function_call_chunk, "name", ""), "arguments": ""})
+        stored_tool_calls[0]["arguments"] += getattr(function_call_chunk, "arguments", "")
+        return
+    if not tool_call_chunk:
+        return
+    tool_call_idx = getattr(tool_call_chunk, "index", None)
+    tool_id = getattr(tool_call_chunk, "id", None)
+    tool_type = getattr(tool_call_chunk, "type", None)
+    function_call = getattr(tool_call_chunk, "function", None)
+    function_name = getattr(function_call, "name", "")
+    # Find tool call index in tool_calls list, as it may potentially arrive unordered (i.e. index 2 before 0)
+    list_idx = next(
+        (idx for idx, tool_call in enumerate(stored_tool_calls) if tool_call["index"] == tool_call_idx),
+        None,
+    )
+    if list_idx is None:
+        stored_tool_calls.append(
+            {"name": function_name, "arguments": "", "index": tool_call_idx, "tool_id": tool_id, "type": tool_type}
+        )
+        list_idx = -1
+    stored_tool_calls[list_idx]["arguments"] += getattr(function_call, "arguments", "")
+
+
+def _construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]:
+    """Constructs a chat completion message dictionary from streamed chunks.
+    The resulting message dictionary is of form:
+    {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."}
+    """
+    message = {"content": "", "tool_calls": []}
+    for chunk in streamed_chunks:
+        if getattr(chunk, "usage", None):
+            message["usage"] = chunk.usage
+        if not hasattr(chunk, "delta"):
+            continue
+        if getattr(chunk, "index", None) and not message.get("index"):
+            message["index"] = chunk.index
+        if getattr(chunk.delta, "role") and not message.get("role"):
+            message["role"] = chunk.delta.role
+        if getattr(chunk, "finish_reason", None) and not message.get("finish_reason"):
+            message["finish_reason"] = chunk.finish_reason
+        chunk_content = getattr(chunk.delta, "content", "")
+        if chunk_content:
+            message["content"] += chunk_content
+            continue
+        function_call = getattr(chunk.delta, "function_call", None)
+        if function_call:
+            _construct_tool_call_from_streamed_chunk(message["tool_calls"], function_call_chunk=function_call)
+        tool_calls = getattr(chunk.delta, "tool_calls", None)
+        if not tool_calls:
+            continue
+        for tool_call in tool_calls:
+            _construct_tool_call_from_streamed_chunk(message["tool_calls"], tool_call_chunk=tool_call)
+    if message["tool_calls"]:
+        message["tool_calls"].sort(key=lambda x: x.get("index", 0))
+    else:
+        message.pop("tool_calls", None)
+    message["content"] = message["content"].strip()
+    return message
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index 7f823dea327..c2f0db2c890 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -30,10 +30,11 @@ def _llmobs_set_tags(
         operation: str = "",
     ) -> None:
         model_name = span.get_tag("litellm.request.model")
-
-        # TODO: populate the provider map
         model_provider = self._provider_map.get(model_name, "")
 
         span._set_ctx_items(
             {SPAN_KIND: "llm", MODEL_NAME: model_name or "", MODEL_PROVIDER: model_provider}
-        )
\ No newline at end of file
+        )
+
+    def should_submit_to_llmobs(self, base_url: Optional[str] = None) -> bool:
+        return base_url is None

From 4b16a4c72b41d4b3f0b64c153d051bf7b4614121 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 25 Mar 2025 17:38:54 -0400
Subject: [PATCH 16/61] attach usage metrics to llmobs spans

---
 ddtrace/contrib/internal/litellm/patch.py |  2 -
 ddtrace/contrib/internal/litellm/utils.py | 31 +++++++++
 ddtrace/llmobs/_integrations/litellm.py   | 28 +++++++-
 ddtrace/llmobs/_integrations/openai.py    |  6 +-
 ddtrace/llmobs/_integrations/utils.py     | 83 ++++++++++++++++++++++-
 5 files changed, 141 insertions(+), 9 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 67e2af78897..fbd96f4a156 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -83,7 +83,6 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion
         # streamed spans will be finished separately once the stream generator is exhausted
         if span.error or not stream:
             integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp)
-        span.finish()
 
 
 async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_completion):
@@ -106,7 +105,6 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com
         # streamed spans will be finished separately once the stream generator is exhausted
         if span.error or not stream:
             integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp)
-        span.finish()
 
 
 @with_traced_module
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index d16d33f5ddd..51b36888dca 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -48,6 +48,23 @@ def __iter__(self):
                     self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
                 )
             self._dd_span.finish()
+    
+    def _extract_token_chunk(self, chunk):
+        """Attempt to extract the token chunk (last chunk in the stream) from the streamed response."""
+        choices = getattr(chunk, "choices")
+        if not choices:
+            return
+        choice = choices[0]
+        if not getattr(choice, "finish_reason", None):
+            # Only the second-last chunk in the stream with token usage enabled will have finish_reason set
+            return
+        try:
+            # User isn't expecting last token chunk to be present since it's not part of the default streamed response,
+            # so we consume it and extract the token usage metadata before it reaches the user.
+            usage_chunk = self._generator.__next__()
+            self._streamed_chunks[0].insert(0, usage_chunk)
+        except (StopIteration, GeneratorExit):
+            return
 
 class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream):
     async def __aenter__(self):
@@ -61,6 +78,7 @@ async def __aiter__(self):
         exception_raised = False
         try:
             async for chunk in self._generator:
+                self._extract_token_chunk(chunk)
                 yield chunk
                 _loop_handler(chunk, self._streamed_chunks)
         except Exception:
@@ -73,6 +91,19 @@ async def __aiter__(self):
                     self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
                 )
             self._dd_span.finish()
+    
+    async def _extract_token_chunk(self, chunk):
+        choices = getattr(chunk, "choices")
+        if not choices:
+            return
+        choice = choices[0]
+        if not getattr(choice, "finish_reason", None):
+            return
+        try:
+            usage_chunk = await self._generator.__anext__()
+            self._streamed_chunks[0].insert(0, usage_chunk)
+        except (StopAsyncIteration, GeneratorExit):
+            return
 
 def _loop_handler(chunk, streamed_chunks):
     """Appends the chunk to the correct index in the streamed_chunks list.
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index c2f0db2c890..f3ba73763cf 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -3,10 +3,12 @@
 from typing import List
 from typing import Optional
 
-from ddtrace.llmobs._constants import METRICS
+from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY, METRICS, OUTPUT_TOKENS_METRIC_KEY, TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import MODEL_NAME
 from ddtrace.llmobs._constants import MODEL_PROVIDER
 from ddtrace.llmobs._constants import SPAN_KIND
+from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags, openai_set_meta_tags_from_chat, openai_set_meta_tags_from_completion
+from ddtrace.llmobs._utils import _get_attr
 from ddtrace.trace import Span
 from ddtrace.llmobs._integrations.base import BaseLLMIntegration
 
@@ -32,9 +34,31 @@ def _llmobs_set_tags(
         model_name = span.get_tag("litellm.request.model")
         model_provider = self._provider_map.get(model_name, "")
 
+        # response format will match Open AI
+        if operation == "completion":
+            openai_set_meta_tags_from_completion(span, kwargs, response)
+        else:
+            openai_set_meta_tags_from_chat(span, kwargs, response)
+
+        metrics = self._extract_llmobs_metrics(response)
         span._set_ctx_items(
-            {SPAN_KIND: "llm", MODEL_NAME: model_name or "", MODEL_PROVIDER: model_provider}
+            {SPAN_KIND: "llm", MODEL_NAME: model_name or "", MODEL_PROVIDER: model_provider, METRICS: metrics}
         )
 
+    @staticmethod
+    def _extract_llmobs_metrics(resp: Any) -> Dict[str, Any]:
+        if isinstance(resp, list):
+            token_usage = _get_attr(resp[0], "usage", None)
+        else:
+            token_usage = _get_attr(resp, "usage", None)
+        if token_usage is not None:
+            prompt_tokens = _get_attr(token_usage, "prompt_tokens", 0)
+            completion_tokens = _get_attr(token_usage, "completion_tokens", 0)
+            return {
+                INPUT_TOKENS_METRIC_KEY: prompt_tokens,
+                OUTPUT_TOKENS_METRIC_KEY: completion_tokens,
+                TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens,
+            }
+
     def should_submit_to_llmobs(self, base_url: Optional[str] = None) -> bool:
         return base_url is None
diff --git a/ddtrace/llmobs/_integrations/openai.py b/ddtrace/llmobs/_integrations/openai.py
index 336cea8fde3..b0ae3fc8e17 100644
--- a/ddtrace/llmobs/_integrations/openai.py
+++ b/ddtrace/llmobs/_integrations/openai.py
@@ -20,7 +20,7 @@
 from ddtrace.llmobs._constants import SPAN_KIND
 from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._integrations.base import BaseLLMIntegration
-from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags
+from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags, openai_set_meta_tags_from_chat, openai_set_meta_tags_from_completion
 from ddtrace.llmobs._integrations.utils import is_openai_default_base_url
 from ddtrace.llmobs._utils import _get_attr
 from ddtrace.llmobs.utils import Document
@@ -124,9 +124,9 @@ def _llmobs_set_tags(
             model_provider = "deepseek"
 
         if operation == "completion":
-            self._llmobs_set_meta_tags_from_completion(span, kwargs, response)
+            openai_set_meta_tags_from_completion(span, kwargs, response)
         elif operation == "chat":
-            self._llmobs_set_meta_tags_from_chat(span, kwargs, response)
+            openai_set_meta_tags_from_chat(span, kwargs, response)
         elif operation == "embedding":
             self._llmobs_set_meta_tags_from_embedding(span, kwargs, response)
         metrics = self._extract_llmobs_metrics_tags(span, response)
diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py
index c5531deb2ad..a0e41082400 100644
--- a/ddtrace/llmobs/_integrations/utils.py
+++ b/ddtrace/llmobs/_integrations/utils.py
@@ -1,10 +1,12 @@
+import json
 import re
-from typing import Optional
+from typing import Any, Dict, Optional
 from typing import Tuple
 from typing import Union
 from urllib.parse import urlparse
 
-from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
+from ddtrace._trace.span import Span
+from ddtrace.llmobs._constants import INPUT_MESSAGES, INPUT_TOKENS_METRIC_KEY, METADATA, OUTPUT_MESSAGES
 from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._utils import _get_attr
@@ -270,3 +272,80 @@ def get_messages_from_converse_content(role: str, content: list):
     if message:
         messages.append(message)
     return messages
+
+
+def openai_set_meta_tags_from_completion(span: Span, kwargs: Dict[str, Any], completions: Any) -> None:
+    """Extract prompt/response tags from a completion and set them as temporary "_ml_obs.meta.*" tags."""
+    prompt = kwargs.get("prompt", "")
+    if isinstance(prompt, str):
+        prompt = [prompt]
+    parameters = {k: v for k, v in kwargs.items() if k not in ("model", "prompt", "api_key", "user_api_key", "user_api_key_hash")}
+    output_messages = [{"content": ""}]
+    if not span.error and completions:
+        choices = getattr(completions, "choices", completions)
+        output_messages = [{"content": _get_attr(choice, "text", "")} for choice in choices]
+    span._set_ctx_items(
+        {
+            INPUT_MESSAGES: [{"content": str(p)} for p in prompt],
+            METADATA: parameters,
+            OUTPUT_MESSAGES: output_messages,
+        }
+    )
+
+def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: Optional[Any]) -> None:
+    """Extract prompt/response tags from a chat completion and set them as temporary "_ml_obs.meta.*" tags."""
+    input_messages = []
+    for m in kwargs.get("messages", []):
+        input_messages.append({"content": str(_get_attr(m, "content", "")), "role": str(_get_attr(m, "role", ""))})
+    parameters = {k: v for k, v in kwargs.items() if k not in ("model", "messages", "tools", "functions", "api_key", "user_api_key", "user_api_key_hash")}
+    span._set_ctx_items({INPUT_MESSAGES: input_messages, METADATA: parameters})
+
+    if span.error or not messages:
+        span._set_ctx_item(OUTPUT_MESSAGES, [{"content": ""}])
+        return
+    if isinstance(messages, list):  # streamed response
+        output_messages = []
+        for streamed_message in messages:
+            message = {"content": streamed_message["content"], "role": streamed_message["role"]}
+            tool_calls = streamed_message.get("tool_calls", [])
+            if tool_calls:
+                message["tool_calls"] = [
+                    {
+                        "name": tool_call.get("name", ""),
+                        "arguments": json.loads(tool_call.get("arguments", "")),
+                        "tool_id": tool_call.get("tool_id", ""),
+                        "type": tool_call.get("type", ""),
+                    }
+                    for tool_call in tool_calls
+                ]
+            output_messages.append(message)
+        span._set_ctx_item(OUTPUT_MESSAGES, output_messages)
+        return
+    choices = _get_attr(messages, "choices", [])
+    output_messages = []
+    for idx, choice in enumerate(choices):
+        tool_calls_info = []
+        choice_message = _get_attr(choice, "message", {})
+        role = _get_attr(choice_message, "role", "")
+        content = _get_attr(choice_message, "content", "") or ""
+        function_call = _get_attr(choice_message, "function_call", None)
+        if function_call:
+            function_name = _get_attr(function_call, "name", "")
+            arguments = json.loads(_get_attr(function_call, "arguments", ""))
+            function_call_info = {"name": function_name, "arguments": arguments}
+            output_messages.append({"content": content, "role": role, "tool_calls": [function_call_info]})
+            continue
+        tool_calls = _get_attr(choice_message, "tool_calls", []) or []
+        for tool_call in tool_calls:
+            tool_call_info = {
+                "name": getattr(tool_call.function, "name", ""),
+                "arguments": json.loads(getattr(tool_call.function, "arguments", "")),
+                "tool_id": getattr(tool_call, "id", ""),
+                "type": getattr(tool_call, "type", ""),
+            }
+            tool_calls_info.append(tool_call_info)
+        if tool_calls_info:
+            output_messages.append({"content": content, "role": role, "tool_calls": tool_calls_info})
+            continue
+        output_messages.append({"content": content, "role": role})
+    span._set_ctx_item(OUTPUT_MESSAGES, output_messages)

From 0a6b744d29d9d15652d68a788e303657a9ec0164 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 25 Mar 2025 17:47:59 -0400
Subject: [PATCH 17/61] finish non streamed spans

---
 ddtrace/contrib/internal/litellm/patch.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index fbd96f4a156..19cb3da51af 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -83,6 +83,7 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion
         # streamed spans will be finished separately once the stream generator is exhausted
         if span.error or not stream:
             integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp)
+            span.finish()
 
 
 async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_completion):
@@ -105,6 +106,7 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com
         # streamed spans will be finished separately once the stream generator is exhausted
         if span.error or not stream:
             integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp)
+            span.finish()
 
 
 @with_traced_module

From 33aae75d2ce0ee1829b052f4059524bed8903250 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Wed, 26 Mar 2025 09:51:57 -0400
Subject: [PATCH 18/61] use sample pc config

---
 ddtrace/contrib/internal/litellm/patch.py | 6 ++++--
 ddtrace/contrib/internal/litellm/utils.py | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 19cb3da51af..798040020ec 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -82,7 +82,8 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion
     finally:
         # streamed spans will be finished separately once the stream generator is exhausted
         if span.error or not stream:
-            integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp)
+            if integration.is_pc_sampled_llmobs(span):
+                integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp)
             span.finish()
 
 
@@ -105,7 +106,8 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com
     finally:
         # streamed spans will be finished separately once the stream generator is exhausted
         if span.error or not stream:
-            integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp)
+            if integration.is_pc_sampled_llmobs(span):
+                integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp)
             span.finish()
 
 
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index 51b36888dca..1f326105ffc 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -123,7 +123,8 @@ def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_comp
         else:
             formatted_completions = [_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks]
         operation = "completion" if is_completion else "chat"
-        integration.llmobs_set_tags(span, args=[], kwargs=kwargs, response=formatted_completions, operation=operation)
+        if integration.is_pc_sampled_llmobs(span):
+            integration.llmobs_set_tags(span, args=[], kwargs=kwargs, response=formatted_completions, operation=operation)
     except Exception:
         log.warning("Error processing streamed completion/chat response.", exc_info=True)
 

From 184d728f8247d031bfe1b4046b712ef8c9cf3d9b Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Wed, 26 Mar 2025 10:04:02 -0400
Subject: [PATCH 19/61] move openai message parsing utils to shared utils file

---
 ddtrace/contrib/internal/litellm/utils.py | 81 +---------------------
 ddtrace/contrib/internal/openai/utils.py  | 82 +----------------------
 ddtrace/llmobs/_integrations/utils.py     | 77 ++++++++++++++++++++-
 3 files changed, 82 insertions(+), 158 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index 1f326105ffc..26803f1f5ab 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -4,6 +4,7 @@
 from typing import List
 
 from ddtrace.internal.logger import get_logger
+from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks, openai_construct_message_from_streamed_chunks
 
 log = get_logger(__name__)
 
@@ -119,87 +120,11 @@ def _loop_handler(chunk, streamed_chunks):
 def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_completion=False):
     try:
         if is_completion:
-            formatted_completions = [_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks]
+            formatted_completions = [openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks]
         else:
-            formatted_completions = [_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks]
+            formatted_completions = [openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks]
         operation = "completion" if is_completion else "chat"
         if integration.is_pc_sampled_llmobs(span):
             integration.llmobs_set_tags(span, args=[], kwargs=kwargs, response=formatted_completions, operation=operation)
     except Exception:
         log.warning("Error processing streamed completion/chat response.", exc_info=True)
-
-
-def _construct_completion_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]:
-    """Constructs a completion dictionary of form {"text": "...", "finish_reason": "..."} from streamed chunks."""
-    if not streamed_chunks:
-        return {"text": ""}
-    completion = {"text": "".join(c.text for c in streamed_chunks if getattr(c, "text", None))}
-    if streamed_chunks[-1].finish_reason is not None:
-        completion["finish_reason"] = streamed_chunks[-1].finish_reason
-    if hasattr(streamed_chunks[0], "usage"):
-        completion["usage"] = streamed_chunks[0].usage
-    return completion
-
-
-def _construct_tool_call_from_streamed_chunk(stored_tool_calls, tool_call_chunk=None, function_call_chunk=None):
-    """Builds a tool_call dictionary from streamed function_call/tool_call chunks."""
-    if function_call_chunk:
-        if not stored_tool_calls:
-            stored_tool_calls.append({"name": getattr(function_call_chunk, "name", ""), "arguments": ""})
-        stored_tool_calls[0]["arguments"] += getattr(function_call_chunk, "arguments", "")
-        return
-    if not tool_call_chunk:
-        return
-    tool_call_idx = getattr(tool_call_chunk, "index", None)
-    tool_id = getattr(tool_call_chunk, "id", None)
-    tool_type = getattr(tool_call_chunk, "type", None)
-    function_call = getattr(tool_call_chunk, "function", None)
-    function_name = getattr(function_call, "name", "")
-    # Find tool call index in tool_calls list, as it may potentially arrive unordered (i.e. index 2 before 0)
-    list_idx = next(
-        (idx for idx, tool_call in enumerate(stored_tool_calls) if tool_call["index"] == tool_call_idx),
-        None,
-    )
-    if list_idx is None:
-        stored_tool_calls.append(
-            {"name": function_name, "arguments": "", "index": tool_call_idx, "tool_id": tool_id, "type": tool_type}
-        )
-        list_idx = -1
-    stored_tool_calls[list_idx]["arguments"] += getattr(function_call, "arguments", "")
-
-
-def _construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]:
-    """Constructs a chat completion message dictionary from streamed chunks.
-    The resulting message dictionary is of form:
-    {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."}
-    """
-    message = {"content": "", "tool_calls": []}
-    for chunk in streamed_chunks:
-        if getattr(chunk, "usage", None):
-            message["usage"] = chunk.usage
-        if not hasattr(chunk, "delta"):
-            continue
-        if getattr(chunk, "index", None) and not message.get("index"):
-            message["index"] = chunk.index
-        if getattr(chunk.delta, "role") and not message.get("role"):
-            message["role"] = chunk.delta.role
-        if getattr(chunk, "finish_reason", None) and not message.get("finish_reason"):
-            message["finish_reason"] = chunk.finish_reason
-        chunk_content = getattr(chunk.delta, "content", "")
-        if chunk_content:
-            message["content"] += chunk_content
-            continue
-        function_call = getattr(chunk.delta, "function_call", None)
-        if function_call:
-            _construct_tool_call_from_streamed_chunk(message["tool_calls"], function_call_chunk=function_call)
-        tool_calls = getattr(chunk.delta, "tool_calls", None)
-        if not tool_calls:
-            continue
-        for tool_call in tool_calls:
-            _construct_tool_call_from_streamed_chunk(message["tool_calls"], tool_call_chunk=tool_call)
-    if message["tool_calls"]:
-        message["tool_calls"].sort(key=lambda x: x.get("index", 0))
-    else:
-        message.pop("tool_calls", None)
-    message["content"] = message["content"].strip()
-    return message
diff --git a/ddtrace/contrib/internal/openai/utils.py b/ddtrace/contrib/internal/openai/utils.py
index c421d57c74c..5f4f227054e 100644
--- a/ddtrace/contrib/internal/openai/utils.py
+++ b/ddtrace/contrib/internal/openai/utils.py
@@ -6,6 +6,7 @@
 from typing import Generator
 from typing import List
 
+from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks, openai_construct_message_from_streamed_chunks
 import wrapt
 
 from ddtrace.internal.logger import get_logger
@@ -265,9 +266,9 @@ def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_comp
     request_messages = kwargs.get("messages", None)
     try:
         if is_completion:
-            formatted_completions = [_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks]
+            formatted_completions = [openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks]
         else:
-            formatted_completions = [_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks]
+            formatted_completions = [openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks]
         if integration.is_pc_sampled_span(span):
             _tag_streamed_response(integration, span, formatted_completions)
         _set_token_metrics(span, formatted_completions, prompts, request_messages, kwargs)
@@ -276,83 +277,6 @@ def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_comp
     except Exception:
         log.warning("Error processing streamed completion/chat response.", exc_info=True)
 
-
-def _construct_completion_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]:
-    """Constructs a completion dictionary of form {"text": "...", "finish_reason": "..."} from streamed chunks."""
-    if not streamed_chunks:
-        return {"text": ""}
-    completion = {"text": "".join(c.text for c in streamed_chunks if getattr(c, "text", None))}
-    if streamed_chunks[-1].finish_reason is not None:
-        completion["finish_reason"] = streamed_chunks[-1].finish_reason
-    if hasattr(streamed_chunks[0], "usage"):
-        completion["usage"] = streamed_chunks[0].usage
-    return completion
-
-
-def _construct_tool_call_from_streamed_chunk(stored_tool_calls, tool_call_chunk=None, function_call_chunk=None):
-    """Builds a tool_call dictionary from streamed function_call/tool_call chunks."""
-    if function_call_chunk:
-        if not stored_tool_calls:
-            stored_tool_calls.append({"name": getattr(function_call_chunk, "name", ""), "arguments": ""})
-        stored_tool_calls[0]["arguments"] += getattr(function_call_chunk, "arguments", "")
-        return
-    if not tool_call_chunk:
-        return
-    tool_call_idx = getattr(tool_call_chunk, "index", None)
-    tool_id = getattr(tool_call_chunk, "id", None)
-    tool_type = getattr(tool_call_chunk, "type", None)
-    function_call = getattr(tool_call_chunk, "function", None)
-    function_name = getattr(function_call, "name", "")
-    # Find tool call index in tool_calls list, as it may potentially arrive unordered (i.e. index 2 before 0)
-    list_idx = next(
-        (idx for idx, tool_call in enumerate(stored_tool_calls) if tool_call["index"] == tool_call_idx),
-        None,
-    )
-    if list_idx is None:
-        stored_tool_calls.append(
-            {"name": function_name, "arguments": "", "index": tool_call_idx, "tool_id": tool_id, "type": tool_type}
-        )
-        list_idx = -1
-    stored_tool_calls[list_idx]["arguments"] += getattr(function_call, "arguments", "")
-
-
-def _construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]:
-    """Constructs a chat completion message dictionary from streamed chunks.
-    The resulting message dictionary is of form:
-    {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."}
-    """
-    message = {"content": "", "tool_calls": []}
-    for chunk in streamed_chunks:
-        if getattr(chunk, "usage", None):
-            message["usage"] = chunk.usage
-        if not hasattr(chunk, "delta"):
-            continue
-        if getattr(chunk, "index", None) and not message.get("index"):
-            message["index"] = chunk.index
-        if getattr(chunk.delta, "role") and not message.get("role"):
-            message["role"] = chunk.delta.role
-        if getattr(chunk, "finish_reason", None) and not message.get("finish_reason"):
-            message["finish_reason"] = chunk.finish_reason
-        chunk_content = getattr(chunk.delta, "content", "")
-        if chunk_content:
-            message["content"] += chunk_content
-            continue
-        function_call = getattr(chunk.delta, "function_call", None)
-        if function_call:
-            _construct_tool_call_from_streamed_chunk(message["tool_calls"], function_call_chunk=function_call)
-        tool_calls = getattr(chunk.delta, "tool_calls", None)
-        if not tool_calls:
-            continue
-        for tool_call in tool_calls:
-            _construct_tool_call_from_streamed_chunk(message["tool_calls"], tool_call_chunk=tool_call)
-    if message["tool_calls"]:
-        message["tool_calls"].sort(key=lambda x: x.get("index", 0))
-    else:
-        message.pop("tool_calls", None)
-    message["content"] = message["content"].strip()
-    return message
-
-
 def _tag_streamed_response(integration, span, completions_or_messages=None):
     """Tagging logic for streamed completions and chat completions."""
     for idx, choice in enumerate(completions_or_messages):
diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py
index a0e41082400..477c15bfdee 100644
--- a/ddtrace/llmobs/_integrations/utils.py
+++ b/ddtrace/llmobs/_integrations/utils.py
@@ -1,6 +1,6 @@
 import json
 import re
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
 from typing import Tuple
 from typing import Union
 from urllib.parse import urlparse
@@ -349,3 +349,78 @@ def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages:
             continue
         output_messages.append({"content": content, "role": role})
     span._set_ctx_item(OUTPUT_MESSAGES, output_messages)
+
+def openai_construct_completion_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]:
+    """Constructs a completion dictionary of form {"text": "...", "finish_reason": "..."} from streamed chunks."""
+    if not streamed_chunks:
+        return {"text": ""}
+    completion = {"text": "".join(c.text for c in streamed_chunks if getattr(c, "text", None))}
+    if streamed_chunks[-1].finish_reason is not None:
+        completion["finish_reason"] = streamed_chunks[-1].finish_reason
+    if hasattr(streamed_chunks[0], "usage"):
+        completion["usage"] = streamed_chunks[0].usage
+    return completion
+
+
+def openai_construct_tool_call_from_streamed_chunk(stored_tool_calls, tool_call_chunk=None, function_call_chunk=None):
+    """Builds a tool_call dictionary from streamed function_call/tool_call chunks."""
+    if function_call_chunk:
+        if not stored_tool_calls:
+            stored_tool_calls.append({"name": getattr(function_call_chunk, "name", ""), "arguments": ""})
+        stored_tool_calls[0]["arguments"] += getattr(function_call_chunk, "arguments", "")
+        return
+    if not tool_call_chunk:
+        return
+    tool_call_idx = getattr(tool_call_chunk, "index", None)
+    tool_id = getattr(tool_call_chunk, "id", None)
+    tool_type = getattr(tool_call_chunk, "type", None)
+    function_call = getattr(tool_call_chunk, "function", None)
+    function_name = getattr(function_call, "name", "")
+    # Find tool call index in tool_calls list, as it may potentially arrive unordered (i.e. index 2 before 0)
+    list_idx = next(
+        (idx for idx, tool_call in enumerate(stored_tool_calls) if tool_call["index"] == tool_call_idx),
+        None,
+    )
+    if list_idx is None:
+        stored_tool_calls.append(
+            {"name": function_name, "arguments": "", "index": tool_call_idx, "tool_id": tool_id, "type": tool_type}
+        )
+        list_idx = -1
+    stored_tool_calls[list_idx]["arguments"] += getattr(function_call, "arguments", "")
+
+
+def openai_construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]:
+    """Constructs a chat completion message dictionary from streamed chunks.
+    The resulting message dictionary is of form:
+    {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."}
+    """
+    message = {"content": "", "tool_calls": []}
+    for chunk in streamed_chunks:
+        if getattr(chunk, "usage", None):
+            message["usage"] = chunk.usage
+        if not hasattr(chunk, "delta"):
+            continue
+        if getattr(chunk, "index", None) and not message.get("index"):
+            message["index"] = chunk.index
+        if getattr(chunk.delta, "role") and not message.get("role"):
+            message["role"] = chunk.delta.role
+        if getattr(chunk, "finish_reason", None) and not message.get("finish_reason"):
+            message["finish_reason"] = chunk.finish_reason
+        chunk_content = getattr(chunk.delta, "content", "")
+        if chunk_content:
+            message["content"] += chunk_content
+            continue
+        function_call = getattr(chunk.delta, "function_call", None)
+        if function_call:
+            openai_construct_tool_call_from_streamed_chunk(message["tool_calls"], function_call_chunk=function_call)
+        tool_calls = getattr(chunk.delta, "tool_calls", None)
+        if not tool_calls:
+            continue
+        for tool_call in tool_calls:
+            openai_construct_tool_call_from_streamed_chunk(message["tool_calls"], tool_call_chunk=tool_call)
+    if message["tool_calls"]:
+        message["tool_calls"].sort(key=lambda x: x.get("index", 0))
+    else:
+        message.pop("tool_calls", None)
+    message["content"] = message["content"].strip()
+    return message

From a241160c5cd71279d5b5dac60a0b02d10dd6eccc Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Wed, 26 Mar 2025 10:47:08 -0400
Subject: [PATCH 20/61] reuse role for litellm streamed multi choice responses

---
 ddtrace/llmobs/_integrations/utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py
index 477c15bfdee..855b0a829aa 100644
--- a/ddtrace/llmobs/_integrations/utils.py
+++ b/ddtrace/llmobs/_integrations/utils.py
@@ -304,9 +304,12 @@ def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages:
         span._set_ctx_item(OUTPUT_MESSAGES, [{"content": ""}])
         return
     if isinstance(messages, list):  # streamed response
+        role = "" 
         output_messages = []
         for streamed_message in messages:
-            message = {"content": streamed_message["content"], "role": streamed_message["role"]}
+            # litellm roles appear only on the first choice, so store it to be used for all choices
+            role = streamed_message.get("role", "") or role 
+            message = {"content": streamed_message.get("content", ""), "role": role}
             tool_calls = streamed_message.get("tool_calls", [])
             if tool_calls:
                 message["tool_calls"] = [

From 2d2337a9f392dae9bf38bf5c5ef5b74e47c613e9 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Wed, 26 Mar 2025 11:21:37 -0400
Subject: [PATCH 21/61] pass operation to litellm llmobs set tags

---
 ddtrace/contrib/internal/litellm/patch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 798040020ec..1894e25d185 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -83,7 +83,7 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion
         # streamed spans will be finished separately once the stream generator is exhausted
         if span.error or not stream:
             if integration.is_pc_sampled_llmobs(span):
-                integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp)
+                integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat")
             span.finish()
 
 
@@ -107,7 +107,7 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com
         # streamed spans will be finished separately once the stream generator is exhausted
         if span.error or not stream:
             if integration.is_pc_sampled_llmobs(span):
-                integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp)
+                integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat")
             span.finish()
 
 

From 3891da90225d10797aead5a83cf6fd5cd8df5a22 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Wed, 26 Mar 2025 13:27:37 -0400
Subject: [PATCH 22/61] wrap get_llm_provider in litellm sdk

---
 ddtrace/contrib/internal/litellm/patch.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 1894e25d185..2ad1b2fdf10 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -137,6 +137,7 @@ def patch():
     wrap("litellm", "text_completion", traced_text_completion(litellm))
     wrap("litellm", "atext_completion", traced_atext_completion(litellm))
     wrap("litellm", "get_llm_provider", traced_get_llm_provider(litellm))
+    wrap("litellm", "litellm.main.get_llm_provider", traced_get_llm_provider(litellm))
 
 
 def unpatch():
@@ -150,5 +151,6 @@ def unpatch():
     unwrap(litellm, "text_completion")
     unwrap(litellm, "atext_completion")
     unwrap(litellm, "get_llm_provider")
-
+    unwrap(litellm.litellm.main, "get_llm_provider")
+    
     delattr(litellm, "_datadog_integration")

From df8449ea05d03081f3de8bef6ebdd027c2dfd42f Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 27 Mar 2025 08:34:36 -0400
Subject: [PATCH 23/61] update provider map to store parsed model name and
 provider

---
 ddtrace/contrib/internal/litellm/patch.py | 7 +++----
 ddtrace/llmobs/_integrations/litellm.py   | 5 +++--
 ddtrace/llmobs/_llmobs.py                 | 1 +
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 2ad1b2fdf10..c17bbcdd447 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -60,7 +60,7 @@ def traced_text_completion(litellm, pin, func, instance, args, kwargs):
 
 @with_traced_module
 async def traced_atext_completion(litellm, pin, func, instance, args, kwargs):
-    return await _traced_acompletion(litellm, pin, func, instance, args, kwargs)
+    return await _traced_acompletion(litellm, pin, func, instance, args, kwargs, True)
 
 
 def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion):
@@ -117,8 +117,7 @@ def traced_get_llm_provider(litellm, pin, func, instance, args, kwargs):
     integration = litellm._datadog_integration
     model, custom_llm_provider, dynamic_api_key, api_base = func(*args, **kwargs)
     # Store the provider information in the integration
-    integration._provider_map[requested_model] = custom_llm_provider
-    integration._provider_map[model] = custom_llm_provider
+    integration._model_map[requested_model] = (model, custom_llm_provider)
     return model, custom_llm_provider, dynamic_api_key, api_base
 
 
@@ -152,5 +151,5 @@ def unpatch():
     unwrap(litellm, "atext_completion")
     unwrap(litellm, "get_llm_provider")
     unwrap(litellm.litellm.main, "get_llm_provider")
-    
+
     delattr(litellm, "_datadog_integration")
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index f3ba73763cf..94eb1f2a579 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -15,7 +15,8 @@
 
 class LiteLLMIntegration(BaseLLMIntegration):
     _integration_name = "litellm"
-    _provider_map = {}
+    # maps requested model name to parsed model name and provider
+    _model_map = {}
 
     def _set_base_span_tags(
         self, span: Span, model: Optional[str] = None, **kwargs: Dict[str, Any]
@@ -32,7 +33,7 @@ def _llmobs_set_tags(
         operation: str = "",
     ) -> None:
         model_name = span.get_tag("litellm.request.model")
-        model_provider = self._provider_map.get(model_name, "")
+        _, model_provider = self._model_map.get(model_name, (model_name, ""))
 
         # response format will match Open AI
         if operation == "completion":
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 61ead184924..c3f278cf6d1 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -85,6 +85,7 @@
     "google_generativeai": "google_generativeai",
     "vertexai": "vertexai",
     "langgraph": "langgraph",
+    "litellm": "litellm",
 }
 
 

From 122cf458f8f7a00fd24d6878703a2046297c1255 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 27 Mar 2025 08:48:36 -0400
Subject: [PATCH 24/61] update model name based on model map

---
 ddtrace/llmobs/_integrations/litellm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index 94eb1f2a579..024720313f2 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -33,7 +33,7 @@ def _llmobs_set_tags(
         operation: str = "",
     ) -> None:
         model_name = span.get_tag("litellm.request.model")
-        _, model_provider = self._model_map.get(model_name, (model_name, ""))
+        model_name, model_provider = self._model_map.get(model_name, (model_name, ""))
 
         # response format will match Open AI
         if operation == "completion":

From 92670c5c15925df84786fd3ef69e3f2d6675e895 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 27 Mar 2025 13:46:27 -0400
Subject: [PATCH 25/61] add llmobs test file and remove usage extraction logic

---
 ddtrace/contrib/internal/litellm/utils.py     |  32 ---
 .../completion_stream_exclude_usage.yaml      | 190 ++++++++++++++
 ...stream_multiple_choices_exclude_usage.yaml | 242 ++++++++++++++++++
 tests/contrib/litellm/conftest.py             |  25 +-
 tests/contrib/litellm/test_litellm.py         |  23 +-
 tests/contrib/litellm/test_litellm_llmobs.py  |  47 ++++
 tests/contrib/litellm/utils.py                |  30 ++-
 7 files changed, 548 insertions(+), 41 deletions(-)
 create mode 100644 tests/contrib/litellm/cassettes/completion_stream_exclude_usage.yaml
 create mode 100644 tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage.yaml
 create mode 100644 tests/contrib/litellm/test_litellm_llmobs.py

diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index 26803f1f5ab..e854f814aa9 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -36,7 +36,6 @@ def __iter__(self):
         exception_raised = False
         try:
             for chunk in self._generator:
-                self._extract_token_chunk(chunk)
                 yield chunk
                 _loop_handler(chunk, self._streamed_chunks)
         except Exception:
@@ -49,23 +48,6 @@ def __iter__(self):
                     self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
                 )
             self._dd_span.finish()
-    
-    def _extract_token_chunk(self, chunk):
-        """Attempt to extract the token chunk (last chunk in the stream) from the streamed response."""
-        choices = getattr(chunk, "choices")
-        if not choices:
-            return
-        choice = choices[0]
-        if not getattr(choice, "finish_reason", None):
-            # Only the second-last chunk in the stream with token usage enabled will have finish_reason set
-            return
-        try:
-            # User isn't expecting last token chunk to be present since it's not part of the default streamed response,
-            # so we consume it and extract the token usage metadata before it reaches the user.
-            usage_chunk = self._generator.__next__()
-            self._streamed_chunks[0].insert(0, usage_chunk)
-        except (StopIteration, GeneratorExit):
-            return
 
 class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream):
     async def __aenter__(self):
@@ -79,7 +61,6 @@ async def __aiter__(self):
         exception_raised = False
         try:
             async for chunk in self._generator:
-                self._extract_token_chunk(chunk)
                 yield chunk
                 _loop_handler(chunk, self._streamed_chunks)
         except Exception:
@@ -92,19 +73,6 @@ async def __aiter__(self):
                     self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
                 )
             self._dd_span.finish()
-    
-    async def _extract_token_chunk(self, chunk):
-        choices = getattr(chunk, "choices")
-        if not choices:
-            return
-        choice = choices[0]
-        if not getattr(choice, "finish_reason", None):
-            return
-        try:
-            usage_chunk = await self._generator.__anext__()
-            self._streamed_chunks[0].insert(0, usage_chunk)
-        except (StopAsyncIteration, GeneratorExit):
-            return
 
 def _loop_handler(chunk, streamed_chunks):
     """Appends the chunk to the correct index in the streamed_chunks list.
diff --git a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage.yaml b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage.yaml
new file mode 100644
index 00000000000..7f5f315dbd0
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage.yaml
@@ -0,0 +1,190 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":false}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '144'
+      content-type:
+      - application/json
+      cookie:
+      - _cfuvid=XCe0nOmJMT2AZm.ZO6Nwin7Mu3h3tYIlybA60Pa1myk-1742476858288-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        Not"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        much"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        just"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        here"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        to"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        chat"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        and"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        assist"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        with"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        anything"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        need"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        are"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        doing"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMGV32R7vAAVNOxXzM6JtOSVzIy","object":"chat.completion.chunk","created":1743088976,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 926fe2d998864ce4-BOS
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Thu, 27 Mar 2025 15:22:57 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=CKfpgOZbNgeO_hZnZwDsP9MQL771OL.QGPQvL7sPRLM-1743088977-1.0.1.1-_AOAMiv0VN3eR0.0l1ZyAhvT8I.sKfG.FnBMJqIAMVU5fFpO4aETM8QMsSGgjjx2dyoOnQ9sOSa6vt2WO_I8dLE2qo4dNe7VwOTDw21Ujrw;
+        path=/; expires=Thu, 27-Mar-25 15:52:57 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=D6vyj85I9udz_8Fd3dvOGdjJWNUTz5W_P_XpI71JrJw-1743088977188-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '178'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999994'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_601c6b7020f2cb6a2bb1fbd6d195dabc
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage.yaml
new file mode 100644
index 00000000000..3b4b44429c0
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage.yaml
@@ -0,0 +1,242 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":false}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '144'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Not"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        much"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        Not"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        just"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        much"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        here"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":","},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        ready"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        just"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        to"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        here"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        chat"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        to"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        with"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        help"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        can"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        can"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        I"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        I"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        assist"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        assist"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"
+        today"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"
+        today"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+
+        data: {"id":"chatcmpl-BFjMHfAHRDNfKBnktssaHo3Ly6OU0","object":"chat.completion.chunk","created":1743088977,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 926fe2dd98378f69-BOS
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Thu, 27 Mar 2025 15:22:57 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=tMtTAMSYbzl6Mz.ZmEx.t97SoHOEXO_PAGvnES4TErc-1743088977-1.0.1.1-9HXDCBRrHw.0632QNaKGFswPnd4Q7Gcf7tPaifQTEHGv.NOLfayXgXIeHlotH7TAOqyxUdp.KNZ2w43w08vOKnwATLE4VdXkeKJ05zjIvV8;
+        path=/; expires=Thu, 27-Mar-25 15:52:57 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=fueXgERk0DK.0YI2CrP74Rvo77MpY9vRD4SXAqqK4S4-1743088977877-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '170'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999993'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_eb315d75e206dc7de5d075296b7b9b6c
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
index 9f88e2f5921..d12810564c4 100644
--- a/tests/contrib/litellm/conftest.py
+++ b/tests/contrib/litellm/conftest.py
@@ -1,5 +1,5 @@
 import os
-from typing import Generator
+import mock
 
 import pytest
 from ddtrace.contrib.internal.litellm.patch import patch
@@ -11,6 +11,7 @@
 from tests.utils import override_env
 from tests.utils import override_global_config
 from tests.contrib.litellm.utils import get_request_vcr
+from ddtrace.llmobs import LLMObs
 
 
 def default_global_config():
@@ -27,6 +28,17 @@ def ddtrace_config_litellm():
     return {}
 
 
+@pytest.fixture()
+def mock_llmobs_writer():
+    patcher = mock.patch("ddtrace.llmobs._llmobs.LLMObsSpanWriter")
+    try:
+        LLMObsSpanWriterMock = patcher.start()
+        m = mock.MagicMock()
+        LLMObsSpanWriterMock.return_value = m
+        yield m
+    finally:
+        patcher.stop()
+
 @pytest.fixture
 def litellm(ddtrace_global_config, ddtrace_config_litellm):
     global_config = default_global_config()
@@ -44,15 +56,22 @@ def litellm(ddtrace_global_config, ddtrace_config_litellm):
                 yield litellm
                 unpatch()
 
-
 @pytest.fixture
-def mock_tracer(litellm):
+def mock_tracer(litellm, ddtrace_global_config):
     pin = Pin.get_from(litellm)
     mock_tracer = DummyTracer(writer=DummyWriter(trace_flush_enabled=False))
     pin._override(litellm, tracer=mock_tracer)
     pin.tracer._configure()
+
+    if ddtrace_global_config.get("_llmobs_enabled", False):
+        # Have to disable and re-enable LLMObs to use to mock tracer.
+        LLMObs.disable()
+        LLMObs.enable(_tracer=mock_tracer, integrations_enabled=False)
+
     yield mock_tracer
 
+    LLMObs.disable()
+
 
 @pytest.fixture
 def request_vcr():
diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py
index 6cb65fd69f6..570c2d23251 100644
--- a/tests/contrib/litellm/test_litellm.py
+++ b/tests/contrib/litellm/test_litellm.py
@@ -33,12 +33,15 @@ def test_global_tags(ddtrace_config_litellm, litellm, request_vcr, mock_tracer):
 def test_litellm_completion(litellm, request_vcr, stream, n):
     with request_vcr.use_cassette(get_cassette_name(stream, n)):
         messages = [{"content": "Hey, what is up?", "role": "user"}]
-        litellm.completion(
+        resp =litellm.completion(
             model="gpt-3.5-turbo",
             messages=messages,
             stream=stream,
             n=n,
         )
+        if stream:
+            for _ in resp:
+                pass
 
 
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
@@ -46,36 +49,48 @@ def test_litellm_completion(litellm, request_vcr, stream, n):
 async def test_litellm_acompletion(litellm, request_vcr, stream, n):
     with request_vcr.use_cassette(get_cassette_name(stream, n)):
         messages = [{"content": "Hey, what is up?", "role": "user"}]
-        await litellm.acompletion(
+        resp = await litellm.acompletion(
             model="gpt-3.5-turbo",
             messages=messages,
             stream=stream,
             n=n,
         )
+        if stream:
+            async for _ in resp:
+                pass
+
+
+
 
 
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
 @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
 def test_litellm_text_completion(litellm, request_vcr, stream, n):
     with request_vcr.use_cassette(get_cassette_name(stream, n)):
-        litellm.text_completion(
+        resp = litellm.text_completion(
             model="gpt-3.5-turbo",
             prompt="Hello world",
             stream=stream,
             n=n,
         )
+        if stream:
+            for _ in resp:
+                pass
 
 
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
 @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
 async def test_litellm_atext_completion(litellm, request_vcr, stream, n):
     with request_vcr.use_cassette(get_cassette_name(stream, n)):
-        await litellm.atext_completion(
+        resp = await litellm.atext_completion(
             model="gpt-3.5-turbo",
             prompt="Hello world",
             stream=stream,
             n=n,
         )
+        if stream:
+            async for _ in resp:
+                pass
 
 
 @pytest.mark.parametrize("model", ["vertex_ai/gemini-pro", "anthropic/claude-3-5-sonnet-20240620"])
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
new file mode 100644
index 00000000000..811b6343d5e
--- /dev/null
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -0,0 +1,47 @@
+import pytest
+
+from tests.contrib.litellm.utils import get_cassette_name, consume_stream
+from tests.llmobs._utils import _expected_llmobs_llm_span_event
+
+
+@pytest.mark.parametrize(
+    "ddtrace_global_config", [dict(_llmobs_enabled=True, _llmobs_sample_rate=1.0, _llmobs_ml_app="<ml-app-name>")]
+)
+class TestLLMObsLiteLLM:
+    # TODO: need to behind the scenes extract token usage from the stream so that it can still be reported even if not returned in the response
+    @pytest.mark.parametrize("stream,n,include_usage", [(True, 1, True), (True, 2, True), (False, 1, True), (False, 2, True), (True, 1, False), (True, 2, False), (False, 1, False), (False, 2, False)])
+    def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
+        with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
+            messages = [{"content": "Hey, what is up?", "role": "user"}]
+            resp = litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                stream=stream,
+                n=n,
+                stream_options={"include_usage": include_usage},
+            )
+            if stream:
+                output_messages, token_metrics = consume_stream(resp, n)
+            else:
+                output_messages = [{"content": message.message.content, "role": message.message.role} for message in resp.choices]
+                token_metrics = {
+                    "input_tokens": resp.usage.prompt_tokens,
+                    "output_tokens": resp.usage.completion_tokens,
+                    "total_tokens": resp.usage.total_tokens,
+                }
+        span = mock_tracer.pop_traces()[0][0]
+        assert mock_llmobs_writer.enqueue.call_count == 1
+        mock_llmobs_writer.enqueue.assert_called_with(
+            _expected_llmobs_llm_span_event(
+                span,
+                model_name="gpt-3.5-turbo",
+                model_provider="openai",
+                input_messages=messages,
+                output_messages=output_messages,
+                metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
+                token_metrics=token_metrics,
+                tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
+            )
+        )
+
+
diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py
index 62eb8fe6334..55684b9f602 100644
--- a/tests/contrib/litellm/utils.py
+++ b/tests/contrib/litellm/utils.py
@@ -21,7 +21,33 @@ def get_request_vcr():
 # Get the name of the cassette to use for a given test
 # All LiteLLM requests that use Open AI get routed to the chat completions endpoint,
 # so we can reuse the same cassette for each combination of stream and n
-def get_cassette_name(stream, n):
+def get_cassette_name(stream, n, include_usage=True):
     stream_suffix = "_stream" if stream else ""
     choice_suffix = "_multiple_choices" if n > 1 else ""
-    return "completion" + stream_suffix + choice_suffix + CASETTE_EXTENSION
+    # include_usage only affects streamed responses
+    if stream and not include_usage:
+        usage_suffix = "_exclude_usage"
+    else:
+        usage_suffix = ""
+    return "completion" + stream_suffix + choice_suffix + usage_suffix + CASETTE_EXTENSION
+
+
+def consume_stream(resp, n):
+    output_messages = [{"content": "", "role": ""} for _ in range(n)]
+    token_metrics = {}
+    role = None
+    for chunk in resp:
+        for choice in chunk["choices"]:
+            content = choice["delta"]["content"] or ""
+            output_messages[choice.index]["content"] += content
+            if not output_messages[choice.index]["role"]:
+                role = choice["delta"]["role"] or role
+                output_messages[choice.index]["role"] = role
+
+        if "usage" in chunk:
+            token_metrics = {
+                "input_tokens": chunk["usage"]["prompt_tokens"],
+                "output_tokens": chunk["usage"]["completion_tokens"],
+                "total_tokens": chunk["usage"]["total_tokens"],
+            }
+    return output_messages, token_metrics

From 60830c4ed05427efd752a67fc97e19d8b75a89a6 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Fri, 28 Mar 2025 11:06:47 -0400
Subject: [PATCH 26/61] add more tests

---
 tests/contrib/litellm/test_litellm_llmobs.py | 154 +++++++++++++++++--
 tests/contrib/litellm/utils.py               |  55 +++++--
 2 files changed, 189 insertions(+), 20 deletions(-)

diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 811b6343d5e..e64c5975c28 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -1,6 +1,6 @@
 import pytest
 
-from tests.contrib.litellm.utils import get_cassette_name, consume_stream
+from tests.contrib.litellm.utils import async_consume_stream, get_cassette_name, consume_stream, parse_response
 from tests.llmobs._utils import _expected_llmobs_llm_span_event
 
 
@@ -8,8 +8,19 @@
     "ddtrace_global_config", [dict(_llmobs_enabled=True, _llmobs_sample_rate=1.0, _llmobs_ml_app="<ml-app-name>")]
 )
 class TestLLMObsLiteLLM:
-    # TODO: need to behind the scenes extract token usage from the stream so that it can still be reported even if not returned in the response
-    @pytest.mark.parametrize("stream,n,include_usage", [(True, 1, True), (True, 2, True), (False, 1, True), (False, 2, True), (True, 1, False), (True, 2, False), (False, 1, False), (False, 2, False)])
+    @pytest.mark.parametrize(
+        "stream,n,include_usage",
+        [
+            (True, 1, True),
+            (True, 2, True),
+            (False, 1, True),
+            (False, 2, True),
+            (True, 1, False),
+            (True, 2, False),
+            (False, 1, False),
+            (False, 2, False),
+        ],
+    )
     def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]
@@ -23,12 +34,51 @@ def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer,
             if stream:
                 output_messages, token_metrics = consume_stream(resp, n)
             else:
-                output_messages = [{"content": message.message.content, "role": message.message.role} for message in resp.choices]
-                token_metrics = {
-                    "input_tokens": resp.usage.prompt_tokens,
-                    "output_tokens": resp.usage.completion_tokens,
-                    "total_tokens": resp.usage.total_tokens,
-                }
+                output_messages, token_metrics = parse_response(resp)
+
+        span = mock_tracer.pop_traces()[0][0]
+        assert mock_llmobs_writer.enqueue.call_count == 1
+        mock_llmobs_writer.enqueue.assert_called_with(
+            _expected_llmobs_llm_span_event(
+                span,
+                model_name="gpt-3.5-turbo",
+                model_provider="openai",
+                input_messages=messages,
+                output_messages=output_messages,
+                metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
+                token_metrics=token_metrics,
+                tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
+            )
+        )
+
+    @pytest.mark.parametrize(
+        "stream,n,include_usage",
+        [
+            (True, 1, True),
+            (True, 2, True),
+            (False, 1, True),
+            (False, 2, True),
+            (True, 1, False),
+            (True, 2, False),
+            (False, 1, False),
+            (False, 2, False),
+        ],
+    )
+    async def test_acompletion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
+        with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
+            messages = [{"content": "Hey, what is up?", "role": "user"}]
+            resp = await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                stream=stream,
+                n=n,
+                stream_options={"include_usage": include_usage},
+            )
+            if stream:
+                output_messages, token_metrics = await async_consume_stream(resp, n)
+            else:
+                output_messages, token_metrics = parse_response(resp)
+
         span = mock_tracer.pop_traces()[0][0]
         assert mock_llmobs_writer.enqueue.call_count == 1
         mock_llmobs_writer.enqueue.assert_called_with(
@@ -44,4 +94,90 @@ def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer,
             )
         )
 
+    @pytest.mark.parametrize(
+        "stream,n,include_usage",
+        [
+            (True, 1, True),
+            (True, 2, True),
+            (False, 1, True),
+            (False, 2, True),
+            (True, 1, False),
+            (True, 2, False),
+            (False, 1, False),
+            (False, 2, False),
+        ],
+    )
+    def test_text_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
+        with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
+            prompt = "Hey, what is up?"
+            resp = litellm.text_completion(
+                model="gpt-3.5-turbo",
+                prompt=prompt,
+                stream=stream,
+                n=n,
+                stream_options={"include_usage": include_usage},
+            )
+            if stream:
+                output_messages, token_metrics = consume_stream(resp, n, is_completion=True)
+            else:
+                output_messages, token_metrics = parse_response(resp, is_completion=True)
 
+        span = mock_tracer.pop_traces()[0][0]
+        assert mock_llmobs_writer.enqueue.call_count == 1
+        mock_llmobs_writer.enqueue.assert_called_with(
+            _expected_llmobs_llm_span_event(
+                span,
+                model_name="gpt-3.5-turbo",
+                model_provider="openai",
+                input_messages=[{"content": prompt}],
+                output_messages=output_messages,
+                metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
+                token_metrics=token_metrics,
+                tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
+            )
+        )
+
+    @pytest.mark.parametrize(
+        "stream,n,include_usage",
+        [
+            (True, 1, True),
+            (True, 2, True),
+            (False, 1, True),
+            (False, 2, True),
+            (True, 1, False),
+            (True, 2, False),
+            (False, 1, False),
+            (False, 2, False),
+        ],
+    )
+    async def test_atext_completion(
+        self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage
+    ):
+        with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
+            prompt = "Hey, what is up?"
+            resp = await litellm.atext_completion(
+                model="gpt-3.5-turbo",
+                prompt=prompt,
+                stream=stream,
+                n=n,
+                stream_options={"include_usage": include_usage},
+            )
+            if stream:
+                output_messages, token_metrics = await async_consume_stream(resp, n, is_completion=True)
+            else:
+                output_messages, token_metrics = parse_response(resp, is_completion=True)
+
+        span = mock_tracer.pop_traces()[0][0]
+        assert mock_llmobs_writer.enqueue.call_count == 1
+        mock_llmobs_writer.enqueue.assert_called_with(
+            _expected_llmobs_llm_span_event(
+                span,
+                model_name="gpt-3.5-turbo",
+                model_provider="openai",
+                input_messages=[{"content": prompt}],
+                output_messages=output_messages,
+                metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
+                token_metrics=token_metrics,
+                tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
+            )
+        )
diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py
index 55684b9f602..508d6a1fd3e 100644
--- a/tests/contrib/litellm/utils.py
+++ b/tests/contrib/litellm/utils.py
@@ -32,22 +32,55 @@ def get_cassette_name(stream, n, include_usage=True):
     return "completion" + stream_suffix + choice_suffix + usage_suffix + CASETTE_EXTENSION
 
 
-def consume_stream(resp, n):
-    output_messages = [{"content": "", "role": ""} for _ in range(n)]
+def consume_stream(resp, n, is_completion=False):
+    output_messages = [{"content": ""} for _ in range(n)]
     token_metrics = {}
     role = None
     for chunk in resp:
-        for choice in chunk["choices"]:
-            content = choice["delta"]["content"] or ""
-            output_messages[choice.index]["content"] += content
-            if not output_messages[choice.index]["role"]:
-                role = choice["delta"]["role"] or role
-                output_messages[choice.index]["role"] = role
-
-        if "usage" in chunk:
-            token_metrics = {
+        role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion)
+    return output_messages, token_metrics
+
+
+async def async_consume_stream(resp, n, is_completion=False):
+    output_messages = [{"content": ""} for _ in range(n)]
+    token_metrics = {}
+    role = None
+    async for chunk in resp:
+        role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion)
+    return output_messages, token_metrics
+
+
+def extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion=False):
+    for choice in chunk["choices"]:
+        content = choice["text"] if is_completion else choice["delta"]["content"]
+        content = content or ""
+        output_messages[choice.index]["content"] += content
+        if "role" not in output_messages[choice.index] and (choice.get("delta", {}).get("role") or role):
+            role = choice.get("delta", {}).get("role") or role
+            output_messages[choice.index]["role"] = role
+
+    if "usage" in chunk and chunk["usage"]:
+        token_metrics.update(
+            {
                 "input_tokens": chunk["usage"]["prompt_tokens"],
                 "output_tokens": chunk["usage"]["completion_tokens"],
                 "total_tokens": chunk["usage"]["total_tokens"],
             }
+        )
+
+    return role
+
+
+def parse_response(resp, is_completion=False):
+    output_messages = []
+    for choice in resp.choices:
+        message = {"content": choice.text if is_completion else choice.message.content}
+        if choice.get("role", None) or choice.get("message", {}).get("role", None):
+            message["role"] = choice["role"] if is_completion else choice["message"]["role"]
+        output_messages.append(message)
+    token_metrics = {
+        "input_tokens": resp.usage.prompt_tokens,
+        "output_tokens": resp.usage.completion_tokens,
+        "total_tokens": resp.usage.total_tokens,
+    }
     return output_messages, token_metrics

From f4d76d74e7f7a3f3a33799b86afb1a621b39a685 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Fri, 28 Mar 2025 11:20:40 -0400
Subject: [PATCH 27/61] run black

---
 ddtrace/contrib/_litellm.py               |  2 +-
 ddtrace/contrib/internal/litellm/patch.py | 19 +++++++++++--------
 ddtrace/contrib/internal/litellm/utils.py | 21 +++++++++++++++++----
 ddtrace/contrib/internal/openai/utils.py  | 14 +++++++++++---
 ddtrace/llmobs/_integrations/litellm.py   | 17 ++++++++++++-----
 ddtrace/llmobs/_integrations/openai.py    |  6 +++++-
 ddtrace/llmobs/_integrations/utils.py     | 16 ++++++++++++----
 tests/contrib/litellm/conftest.py         |  2 ++
 tests/contrib/litellm/test_litellm.py     |  5 +----
 9 files changed, 72 insertions(+), 30 deletions(-)

diff --git a/ddtrace/contrib/_litellm.py b/ddtrace/contrib/_litellm.py
index 026d11250ca..70d4038a173 100644
--- a/ddtrace/contrib/_litellm.py
+++ b/ddtrace/contrib/_litellm.py
@@ -44,4 +44,4 @@
     from ddtrace import Pin, config
 
     Pin.override(litellm, service="my-litellm-service")
-"""  # noqa: E501
\ No newline at end of file
+"""  # noqa: E501
diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index c17bbcdd447..74515b8e829 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -50,14 +50,17 @@ def _create_span(litellm, pin, func, instance, args, kwargs):
 def traced_completion(litellm, pin, func, instance, args, kwargs):
     return _traced_completion(litellm, pin, func, instance, args, kwargs, False)
 
+
 @with_traced_module
 async def traced_acompletion(litellm, pin, func, instance, args, kwargs):
     return await _traced_acompletion(litellm, pin, func, instance, args, kwargs, False)
 
+
 @with_traced_module
 def traced_text_completion(litellm, pin, func, instance, args, kwargs):
     return _traced_completion(litellm, pin, func, instance, args, kwargs, True)
 
+
 @with_traced_module
 async def traced_atext_completion(litellm, pin, func, instance, args, kwargs):
     return await _traced_acompletion(litellm, pin, func, instance, args, kwargs, True)
@@ -72,9 +75,7 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion
     try:
         resp = func(*args, **kwargs)
         if stream:
-            return TracedLiteLLMStream(
-                resp, integration, span, args, kwargs, is_completion
-            )
+            return TracedLiteLLMStream(resp, integration, span, args, kwargs, is_completion)
         return resp
     except Exception:
         span.set_exc_info(*sys.exc_info())
@@ -83,7 +84,9 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion
         # streamed spans will be finished separately once the stream generator is exhausted
         if span.error or not stream:
             if integration.is_pc_sampled_llmobs(span):
-                integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat")
+                integration.llmobs_set_tags(
+                    span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat"
+                )
             span.finish()
 
 
@@ -96,9 +99,7 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com
     try:
         resp = await func(*args, **kwargs)
         if stream:
-            return TracedLiteLLMAsyncStream(
-                resp, integration, span, args, kwargs, is_completion
-            )
+            return TracedLiteLLMAsyncStream(resp, integration, span, args, kwargs, is_completion)
         return resp
     except Exception:
         span.set_exc_info(*sys.exc_info())
@@ -107,7 +108,9 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com
         # streamed spans will be finished separately once the stream generator is exhausted
         if span.error or not stream:
             if integration.is_pc_sampled_llmobs(span):
-                integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat")
+                integration.llmobs_set_tags(
+                    span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat"
+                )
             span.finish()
 
 
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index e854f814aa9..37fdcf01b55 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -4,14 +4,19 @@
 from typing import List
 
 from ddtrace.internal.logger import get_logger
-from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks, openai_construct_message_from_streamed_chunks
+from ddtrace.llmobs._integrations.utils import (
+    openai_construct_completion_from_streamed_chunks,
+    openai_construct_message_from_streamed_chunks,
+)
 
 log = get_logger(__name__)
 
+
 def tag_request(span, kwargs):
     if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]:
         span.set_tag_str("litellm.request.host", kwargs["metadata"]["headers"]["host"])
 
+
 class BaseTracedLiteLLMStream:
     def __init__(self, generator, integration, span, args, kwargs, is_completion=False):
         n = kwargs.get("n", 1) or 1
@@ -49,6 +54,7 @@ def __iter__(self):
                 )
             self._dd_span.finish()
 
+
 class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream):
     async def __aenter__(self):
         await self._generator.__aenter__()
@@ -74,6 +80,7 @@ async def __aiter__(self):
                 )
             self._dd_span.finish()
 
+
 def _loop_handler(chunk, streamed_chunks):
     """Appends the chunk to the correct index in the streamed_chunks list.
 
@@ -88,11 +95,17 @@ def _loop_handler(chunk, streamed_chunks):
 def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_completion=False):
     try:
         if is_completion:
-            formatted_completions = [openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks]
+            formatted_completions = [
+                openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks
+            ]
         else:
-            formatted_completions = [openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks]
+            formatted_completions = [
+                openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks
+            ]
         operation = "completion" if is_completion else "chat"
         if integration.is_pc_sampled_llmobs(span):
-            integration.llmobs_set_tags(span, args=[], kwargs=kwargs, response=formatted_completions, operation=operation)
+            integration.llmobs_set_tags(
+                span, args=[], kwargs=kwargs, response=formatted_completions, operation=operation
+            )
     except Exception:
         log.warning("Error processing streamed completion/chat response.", exc_info=True)
diff --git a/ddtrace/contrib/internal/openai/utils.py b/ddtrace/contrib/internal/openai/utils.py
index 5f4f227054e..ddf0eb37b49 100644
--- a/ddtrace/contrib/internal/openai/utils.py
+++ b/ddtrace/contrib/internal/openai/utils.py
@@ -6,7 +6,10 @@
 from typing import Generator
 from typing import List
 
-from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks, openai_construct_message_from_streamed_chunks
+from ddtrace.llmobs._integrations.utils import (
+    openai_construct_completion_from_streamed_chunks,
+    openai_construct_message_from_streamed_chunks,
+)
 import wrapt
 
 from ddtrace.internal.logger import get_logger
@@ -266,9 +269,13 @@ def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_comp
     request_messages = kwargs.get("messages", None)
     try:
         if is_completion:
-            formatted_completions = [openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks]
+            formatted_completions = [
+                openai_construct_completion_from_streamed_chunks(choice) for choice in streamed_chunks
+            ]
         else:
-            formatted_completions = [openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks]
+            formatted_completions = [
+                openai_construct_message_from_streamed_chunks(choice) for choice in streamed_chunks
+            ]
         if integration.is_pc_sampled_span(span):
             _tag_streamed_response(integration, span, formatted_completions)
         _set_token_metrics(span, formatted_completions, prompts, request_messages, kwargs)
@@ -277,6 +284,7 @@ def _process_finished_stream(integration, span, kwargs, streamed_chunks, is_comp
     except Exception:
         log.warning("Error processing streamed completion/chat response.", exc_info=True)
 
+
 def _tag_streamed_response(integration, span, completions_or_messages=None):
     """Tagging logic for streamed completions and chat completions."""
     for idx, choice in enumerate(completions_or_messages):
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index 024720313f2..c15c7836e4a 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -3,11 +3,20 @@
 from typing import List
 from typing import Optional
 
-from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY, METRICS, OUTPUT_TOKENS_METRIC_KEY, TOTAL_TOKENS_METRIC_KEY
+from ddtrace.llmobs._constants import (
+    INPUT_TOKENS_METRIC_KEY,
+    METRICS,
+    OUTPUT_TOKENS_METRIC_KEY,
+    TOTAL_TOKENS_METRIC_KEY,
+)
 from ddtrace.llmobs._constants import MODEL_NAME
 from ddtrace.llmobs._constants import MODEL_PROVIDER
 from ddtrace.llmobs._constants import SPAN_KIND
-from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags, openai_set_meta_tags_from_chat, openai_set_meta_tags_from_completion
+from ddtrace.llmobs._integrations.utils import (
+    get_llmobs_metrics_tags,
+    openai_set_meta_tags_from_chat,
+    openai_set_meta_tags_from_completion,
+)
 from ddtrace.llmobs._utils import _get_attr
 from ddtrace.trace import Span
 from ddtrace.llmobs._integrations.base import BaseLLMIntegration
@@ -18,9 +27,7 @@ class LiteLLMIntegration(BaseLLMIntegration):
     # maps requested model name to parsed model name and provider
     _model_map = {}
 
-    def _set_base_span_tags(
-        self, span: Span, model: Optional[str] = None, **kwargs: Dict[str, Any]
-    ) -> None:
+    def _set_base_span_tags(self, span: Span, model: Optional[str] = None, **kwargs: Dict[str, Any]) -> None:
         if model is not None:
             span.set_tag_str("litellm.request.model", model)
 
diff --git a/ddtrace/llmobs/_integrations/openai.py b/ddtrace/llmobs/_integrations/openai.py
index b0ae3fc8e17..6c8464bf2a3 100644
--- a/ddtrace/llmobs/_integrations/openai.py
+++ b/ddtrace/llmobs/_integrations/openai.py
@@ -20,7 +20,11 @@
 from ddtrace.llmobs._constants import SPAN_KIND
 from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._integrations.base import BaseLLMIntegration
-from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags, openai_set_meta_tags_from_chat, openai_set_meta_tags_from_completion
+from ddtrace.llmobs._integrations.utils import (
+    get_llmobs_metrics_tags,
+    openai_set_meta_tags_from_chat,
+    openai_set_meta_tags_from_completion,
+)
 from ddtrace.llmobs._integrations.utils import is_openai_default_base_url
 from ddtrace.llmobs._utils import _get_attr
 from ddtrace.llmobs.utils import Document
diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py
index 855b0a829aa..cb7e022f802 100644
--- a/ddtrace/llmobs/_integrations/utils.py
+++ b/ddtrace/llmobs/_integrations/utils.py
@@ -279,7 +279,9 @@ def openai_set_meta_tags_from_completion(span: Span, kwargs: Dict[str, Any], com
     prompt = kwargs.get("prompt", "")
     if isinstance(prompt, str):
         prompt = [prompt]
-    parameters = {k: v for k, v in kwargs.items() if k not in ("model", "prompt", "api_key", "user_api_key", "user_api_key_hash")}
+    parameters = {
+        k: v for k, v in kwargs.items() if k not in ("model", "prompt", "api_key", "user_api_key", "user_api_key_hash")
+    }
     output_messages = [{"content": ""}]
     if not span.error and completions:
         choices = getattr(completions, "choices", completions)
@@ -292,23 +294,28 @@ def openai_set_meta_tags_from_completion(span: Span, kwargs: Dict[str, Any], com
         }
     )
 
+
 def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: Optional[Any]) -> None:
     """Extract prompt/response tags from a chat completion and set them as temporary "_ml_obs.meta.*" tags."""
     input_messages = []
     for m in kwargs.get("messages", []):
         input_messages.append({"content": str(_get_attr(m, "content", "")), "role": str(_get_attr(m, "role", ""))})
-    parameters = {k: v for k, v in kwargs.items() if k not in ("model", "messages", "tools", "functions", "api_key", "user_api_key", "user_api_key_hash")}
+    parameters = {
+        k: v
+        for k, v in kwargs.items()
+        if k not in ("model", "messages", "tools", "functions", "api_key", "user_api_key", "user_api_key_hash")
+    }
     span._set_ctx_items({INPUT_MESSAGES: input_messages, METADATA: parameters})
 
     if span.error or not messages:
         span._set_ctx_item(OUTPUT_MESSAGES, [{"content": ""}])
         return
     if isinstance(messages, list):  # streamed response
-        role = "" 
+        role = ""
         output_messages = []
         for streamed_message in messages:
             # litellm roles appear only on the first choice, so store it to be used for all choices
-            role = streamed_message.get("role", "") or role 
+            role = streamed_message.get("role", "") or role
             message = {"content": streamed_message.get("content", ""), "role": role}
             tool_calls = streamed_message.get("tool_calls", [])
             if tool_calls:
@@ -353,6 +360,7 @@ def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages:
         output_messages.append({"content": content, "role": role})
     span._set_ctx_item(OUTPUT_MESSAGES, output_messages)
 
+
 def openai_construct_completion_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]:
     """Constructs a completion dictionary of form {"text": "...", "finish_reason": "..."} from streamed chunks."""
     if not streamed_chunks:
diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
index d12810564c4..9bc9e4fa887 100644
--- a/tests/contrib/litellm/conftest.py
+++ b/tests/contrib/litellm/conftest.py
@@ -39,6 +39,7 @@ def mock_llmobs_writer():
     finally:
         patcher.stop()
 
+
 @pytest.fixture
 def litellm(ddtrace_global_config, ddtrace_config_litellm):
     global_config = default_global_config()
@@ -56,6 +57,7 @@ def litellm(ddtrace_global_config, ddtrace_config_litellm):
                 yield litellm
                 unpatch()
 
+
 @pytest.fixture
 def mock_tracer(litellm, ddtrace_global_config):
     pin = Pin.get_from(litellm)
diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py
index 570c2d23251..a6c94383811 100644
--- a/tests/contrib/litellm/test_litellm.py
+++ b/tests/contrib/litellm/test_litellm.py
@@ -33,7 +33,7 @@ def test_global_tags(ddtrace_config_litellm, litellm, request_vcr, mock_tracer):
 def test_litellm_completion(litellm, request_vcr, stream, n):
     with request_vcr.use_cassette(get_cassette_name(stream, n)):
         messages = [{"content": "Hey, what is up?", "role": "user"}]
-        resp =litellm.completion(
+        resp = litellm.completion(
             model="gpt-3.5-turbo",
             messages=messages,
             stream=stream,
@@ -60,9 +60,6 @@ async def test_litellm_acompletion(litellm, request_vcr, stream, n):
                 pass
 
 
-
-
-
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
 @pytest.mark.snapshot(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"])
 def test_litellm_text_completion(litellm, request_vcr, stream, n):

From a4a2b24a70b3ad816f80a0a7b0f3f7966e36284c Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Sun, 30 Mar 2025 18:32:41 -0400
Subject: [PATCH 28/61] add tool call tests

---
 .riot/requirements/45c12de.txt                |  25 +--
 .riot/requirements/e05a904.txt                |  25 +--
 .riot/requirements/e8c8851.txt                |  25 +--
 .riot/requirements/f30dfc2.txt                |  25 +--
 ...ompletion_multiple_choices_with_tools.yaml | 107 ++++++++++
 ...etion_stream_exclude_usage_with_tools.yaml | 130 +++++++++++++
 ...iple_choices_exclude_usage_with_tools.yaml | 162 ++++++++++++++++
 ...on_stream_multiple_choices_with_tools.yaml | 183 ++++++++++++++++++
 .../completion_stream_with_tools.yaml         | 151 +++++++++++++++
 .../cassettes/completion_with_tools.yaml      | 110 +++++++++++
 tests/contrib/litellm/test_litellm_llmobs.py  | 103 +++++-----
 tests/contrib/litellm/utils.py                |  74 ++++++-
 12 files changed, 1011 insertions(+), 109 deletions(-)
 create mode 100644 tests/contrib/litellm/cassettes/completion_multiple_choices_with_tools.yaml
 create mode 100644 tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml
 create mode 100644 tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml
 create mode 100644 tests/contrib/litellm/cassettes/completion_stream_multiple_choices_with_tools.yaml
 create mode 100644 tests/contrib/litellm/cassettes/completion_stream_with_tools.yaml
 create mode 100644 tests/contrib/litellm/cassettes/completion_with_tools.yaml

diff --git a/.riot/requirements/45c12de.txt b/.riot/requirements/45c12de.txt
index c2da32fca1e..3f0c6ae7839 100644
--- a/.riot/requirements/45c12de.txt
+++ b/.riot/requirements/45c12de.txt
@@ -10,8 +10,8 @@ aiosignal==1.3.2
 annotated-types==0.7.0
 anyio==4.9.0
 attrs==25.3.0
-boto3==1.37.17
-botocore==1.37.17
+boto3==1.37.22
+botocore==1.37.22
 cachetools==5.5.2
 certifi==2025.1.31
 charset-normalizer==3.4.1
@@ -35,30 +35,30 @@ jiter==0.9.0
 jmespath==1.0.1
 jsonschema==4.23.0
 jsonschema-specifications==2024.10.1
-litellm==1.63.12
+litellm==1.64.1
 markupsafe==3.0.2
 mock==5.2.0
 multidict==6.2.0
-openai==1.68.2
+openai==1.69.0
 opentracing==2.4.0
 packaging==24.2
 pluggy==1.5.0
-propcache==0.3.0
+propcache==0.3.1
 pyasn1==0.6.1
-pyasn1-modules==0.4.1
-pydantic==2.10.6
-pydantic-core==2.27.2
+pyasn1-modules==0.4.2
+pydantic==2.11.0
+pydantic-core==2.33.0
 pytest==8.3.5
-pytest-asyncio==0.25.3
+pytest-asyncio==0.26.0
 pytest-cov==6.0.0
 pytest-mock==3.14.0
 python-dateutil==2.9.0.post0
-python-dotenv==1.0.1
+python-dotenv==1.1.0
 pyyaml==6.0.2
 referencing==0.36.2
 regex==2024.11.6
 requests==2.32.3
-rpds-py==0.23.1
+rpds-py==0.24.0
 rsa==4.9
 s3transfer==0.11.4
 six==1.17.0
@@ -67,7 +67,8 @@ sortedcontainers==2.4.0
 tiktoken==0.9.0
 tokenizers==0.21.1
 tqdm==4.67.1
-typing-extensions==4.12.2
+typing-extensions==4.13.0
+typing-inspection==0.4.0
 urllib3==2.3.0
 vcrpy==7.0.0
 wrapt==1.17.2
diff --git a/.riot/requirements/e05a904.txt b/.riot/requirements/e05a904.txt
index 48afc8fdf4a..e46108467ec 100644
--- a/.riot/requirements/e05a904.txt
+++ b/.riot/requirements/e05a904.txt
@@ -11,8 +11,8 @@ annotated-types==0.7.0
 anyio==4.9.0
 async-timeout==5.0.1
 attrs==25.3.0
-boto3==1.37.17
-botocore==1.37.17
+boto3==1.37.22
+botocore==1.37.22
 cachetools==5.5.2
 certifi==2025.1.31
 charset-normalizer==3.4.1
@@ -37,30 +37,30 @@ jiter==0.9.0
 jmespath==1.0.1
 jsonschema==4.23.0
 jsonschema-specifications==2024.10.1
-litellm==1.63.12
+litellm==1.64.1
 markupsafe==3.0.2
 mock==5.2.0
 multidict==6.2.0
-openai==1.68.2
+openai==1.69.0
 opentracing==2.4.0
 packaging==24.2
 pluggy==1.5.0
-propcache==0.3.0
+propcache==0.3.1
 pyasn1==0.6.1
-pyasn1-modules==0.4.1
-pydantic==2.10.6
-pydantic-core==2.27.2
+pyasn1-modules==0.4.2
+pydantic==2.11.0
+pydantic-core==2.33.0
 pytest==8.3.5
-pytest-asyncio==0.25.3
+pytest-asyncio==0.26.0
 pytest-cov==6.0.0
 pytest-mock==3.14.0
 python-dateutil==2.9.0.post0
-python-dotenv==1.0.1
+python-dotenv==1.1.0
 pyyaml==6.0.2
 referencing==0.36.2
 regex==2024.11.6
 requests==2.32.3
-rpds-py==0.23.1
+rpds-py==0.24.0
 rsa==4.9
 s3transfer==0.11.4
 six==1.17.0
@@ -70,7 +70,8 @@ tiktoken==0.9.0
 tokenizers==0.21.1
 tomli==2.2.1
 tqdm==4.67.1
-typing-extensions==4.12.2
+typing-extensions==4.13.0
+typing-inspection==0.4.0
 urllib3==2.3.0
 vcrpy==7.0.0
 wrapt==1.17.2
diff --git a/.riot/requirements/e8c8851.txt b/.riot/requirements/e8c8851.txt
index a209020993c..55dd71876c7 100644
--- a/.riot/requirements/e8c8851.txt
+++ b/.riot/requirements/e8c8851.txt
@@ -10,8 +10,8 @@ aiosignal==1.3.2
 annotated-types==0.7.0
 anyio==4.9.0
 attrs==25.3.0
-boto3==1.37.17
-botocore==1.37.17
+boto3==1.37.22
+botocore==1.37.22
 cachetools==5.5.2
 certifi==2025.1.31
 charset-normalizer==3.4.1
@@ -35,30 +35,30 @@ jiter==0.9.0
 jmespath==1.0.1
 jsonschema==4.23.0
 jsonschema-specifications==2024.10.1
-litellm==1.63.12
+litellm==1.64.1
 markupsafe==3.0.2
 mock==5.2.0
 multidict==6.2.0
-openai==1.68.2
+openai==1.69.0
 opentracing==2.4.0
 packaging==24.2
 pluggy==1.5.0
-propcache==0.3.0
+propcache==0.3.1
 pyasn1==0.6.1
-pyasn1-modules==0.4.1
-pydantic==2.10.6
-pydantic-core==2.27.2
+pyasn1-modules==0.4.2
+pydantic==2.11.0
+pydantic-core==2.33.0
 pytest==8.3.5
-pytest-asyncio==0.25.3
+pytest-asyncio==0.26.0
 pytest-cov==6.0.0
 pytest-mock==3.14.0
 python-dateutil==2.9.0.post0
-python-dotenv==1.0.1
+python-dotenv==1.1.0
 pyyaml==6.0.2
 referencing==0.36.2
 regex==2024.11.6
 requests==2.32.3
-rpds-py==0.23.1
+rpds-py==0.24.0
 rsa==4.9
 s3transfer==0.11.4
 six==1.17.0
@@ -67,7 +67,8 @@ sortedcontainers==2.4.0
 tiktoken==0.9.0
 tokenizers==0.21.1
 tqdm==4.67.1
-typing-extensions==4.12.2
+typing-extensions==4.13.0
+typing-inspection==0.4.0
 urllib3==2.3.0
 vcrpy==7.0.0
 wrapt==1.17.2
diff --git a/.riot/requirements/f30dfc2.txt b/.riot/requirements/f30dfc2.txt
index c9092e0225c..0f57ac61e7a 100644
--- a/.riot/requirements/f30dfc2.txt
+++ b/.riot/requirements/f30dfc2.txt
@@ -11,8 +11,8 @@ annotated-types==0.7.0
 anyio==4.9.0
 async-timeout==5.0.1
 attrs==25.3.0
-boto3==1.37.17
-botocore==1.37.17
+boto3==1.37.22
+botocore==1.37.22
 cachetools==5.5.2
 certifi==2025.1.31
 charset-normalizer==3.4.1
@@ -37,30 +37,30 @@ jiter==0.9.0
 jmespath==1.0.1
 jsonschema==4.23.0
 jsonschema-specifications==2024.10.1
-litellm==1.63.12
+litellm==1.64.1
 markupsafe==3.0.2
 mock==5.2.0
 multidict==6.2.0
-openai==1.68.2
+openai==1.69.0
 opentracing==2.4.0
 packaging==24.2
 pluggy==1.5.0
-propcache==0.3.0
+propcache==0.3.1
 pyasn1==0.6.1
-pyasn1-modules==0.4.1
-pydantic==2.10.6
-pydantic-core==2.27.2
+pyasn1-modules==0.4.2
+pydantic==2.11.0
+pydantic-core==2.33.0
 pytest==8.3.5
-pytest-asyncio==0.25.3
+pytest-asyncio==0.26.0
 pytest-cov==6.0.0
 pytest-mock==3.14.0
 python-dateutil==2.9.0.post0
-python-dotenv==1.0.1
+python-dotenv==1.1.0
 pyyaml==6.0.2
 referencing==0.36.2
 regex==2024.11.6
 requests==2.32.3
-rpds-py==0.23.1
+rpds-py==0.24.0
 rsa==4.9
 s3transfer==0.11.4
 six==1.17.0
@@ -70,7 +70,8 @@ tiktoken==0.9.0
 tokenizers==0.21.1
 tomli==2.2.1
 tqdm==4.67.1
-typing-extensions==4.12.2
+typing-extensions==4.13.0
+typing-inspection==0.4.0
 urllib3==1.26.20
 vcrpy==7.0.0
 wrapt==1.17.2
diff --git a/tests/contrib/litellm/cassettes/completion_multiple_choices_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_multiple_choices_with_tools.yaml
new file mode 100644
index 00000000000..ebbbb24714e
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_multiple_choices_with_tools.yaml
@@ -0,0 +1,107 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get
+      the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The
+      city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '487'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A;
+        _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//7FTLbtswELzrK4g924Fkx4/olhRJkNZ12iatkdaBQFNrmTVFEiTVNjD8
+        74UoV5IfBXrooYfqIBA73Nnl7JCbgBDgKcQE2Io6lmvRvbod3X/U4cVlev368eZ81FsjhvcXt0/X
+        w+kUOmWGWnxF5n5lnTGVa4GOK1nBzCB1WLJGo/N+NI76o9ADuUpRlGmZdt3+2aDrCrNQ3TDqDXaZ
+        K8UZWojJl4AQQjb+X/YoU/wBMfE8PpKjtTRDiOtNhIBRoowAtZZbR6WDTgMyJR3Ksm1ZCNECnFIi
+        YVSIpnD1bVrrRigqRPL+avbm3fhusFZTFn64fzsYDqOUPdhWvYr6RfuGloVktUAtvI7HB8UIAUlz
+        n5uhS1hhDEqXfEfqVmgOaAgBarIiR+nKI8BmDkIxWhLPIZ7DA5XkxlDJuGWqQ15dzmELewzb4NT6
+        uSWSwWVhqThWj0qpnK/l5XveIdt6UkJl2qiFPUiFJZfcrhKD1HoB2nMIWhRHJoj+FRPcjacLrj+H
+        T3z4ic/EYzRDOplM/pvgb5og2LUAxd6oQRuVa5c4tUZfdDyoSKF5jhqwP96BTjkqmnjU63dO0CUp
+        Osq9FWr3McpWmDapzVNEi5SrFtA273E3p7ir43OZ/Ql9AzCG2mGaaIMpZ/snbrYZLF/r322rRfYN
+        g0XzjTNMHEdTjiPFJS1EdYXAvliHebLkMkOjDa/vUbANfgIAAP//AwBATheKSwYAAA==
+    headers:
+      CF-RAY:
+      - 9278b28d69eb3ba6-BOS
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 28 Mar 2025 17:02:51 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '540'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999985'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_7f2afa7681587d4ae31ca5d9d75824f0
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml
new file mode 100644
index 00000000000..515680c5d04
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml
@@ -0,0 +1,130 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":false},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get
+      the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The
+      city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '542'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A;
+        _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_ty5BH4ChPTiw8GnzCSqhxhoP","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
+        Francisco"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
+        CA"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 9278b292789f3ba6-BOS
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Fri, 28 Mar 2025 17:02:51 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '281'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999985'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_78940dfd1e163cd37e49e666383b7944
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml
new file mode 100644
index 00000000000..fda11e5011a
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml
@@ -0,0 +1,162 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":false},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get
+      the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The
+      city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '542'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A;
+        _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_bWoRSYt75lffFv8JkMI4uDdz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_bWoRSYt75lffFv8JkMI4uDdz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
+        Francisco"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
+        Francisco"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
+        CA"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
+        CA"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]}
+
+
+        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 9278b2960dbd3ba6-BOS
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Fri, 28 Mar 2025 17:02:52 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '406'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999985'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_7e8b09694a1029b3eb2fecf93deef4a3
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_with_tools.yaml
new file mode 100644
index 00000000000..a68aabafd1d
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_with_tools.yaml
@@ -0,0 +1,183 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get
+      the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The
+      city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '541'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_ZLW6rwgBZAlsBPJv2F9O3zhz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_ZLW6rwgBZAlsBPJv2F9O3zhz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
+        Francisco"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
+        Francisco"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
+        CA"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
+        CA"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\",\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"unit"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"c"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"elsius"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OTM3ub2HDuZ1PCj137Me5EGRPt","object":"chat.completion.chunk","created":1743181369,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":85,"completion_tokens":43,"total_tokens":128,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 9278b28799743ba6-BOS
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Fri, 28 Mar 2025 17:02:50 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=E5.J70d433QZA9Yb..SoyuL46jC1xpIxn4pnxkMjOWc-1743181370-1.0.1.1-bQRVFg.zcyoLYbcsK6DabkiL3ZaPDY.X.mSq2T37uuxnG9X7_mV50crYaQ8tZJdqTZAxOMLe2RYv8mB5jn6GmqPhSgI41BBm4DMMl4lW8FY;
+        path=/; expires=Fri, 28-Mar-25 17:32:50 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=wXfAZSge17hyjNvaLx1PDXNyLNpOX59UJ.sov3vRs0U-1743181370249-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '586'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999985'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_e2c3786bb1e2c88f639d2f20e45a9e88
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_stream_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_with_tools.yaml
new file mode 100644
index 00000000000..a14ca18675e
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_stream_with_tools.yaml
@@ -0,0 +1,151 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":true},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get
+      the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The
+      city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '541'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_HuRWIamjJM7bLsbCamjSgf8e","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
+        Francisco"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
+        CA"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\",\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"unit"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"c"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"elsius"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-BG7OSdeqXVRlIVkBZl8NLocpKOqML","object":"chat.completion.chunk","created":1743181368,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":85,"completion_tokens":24,"total_tokens":109,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 9278b28278948f69-BOS
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Fri, 28 Mar 2025 17:02:49 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=eBHeBQfyYm2koe.GVJdLen4F5mzqqi7jmZT_YvXZOLk-1743181369-1.0.1.1-LT_wEU5NDwWbrNU7lyULsUd_ptgtackPCBbB6I8i.4_taWWP57cHdMtWDz1rfhzKB9f_pKfJzxijQ_Z27_P6iLDT1hf4ioC2b0otZHD3c4o;
+        path=/; expires=Fri, 28-Mar-25 17:32:49 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=x0zurymIN4SzRny8DSr5RMnqvVD_AwW_LNnMmnuYuRg-1743181369158-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '295'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999986'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_941396c1f446305a0aed13f33a158719
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_with_tools.yaml
new file mode 100644
index 00000000000..78f08f3a0af
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_with_tools.yaml
@@ -0,0 +1,110 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get
+      the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The
+      city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '487'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jFNNj9owEL3nV1hzhlUoIGhusOqu1FIJqQekllVk7CHx4tiRPaFdIf57
+        FYdNAkul5hBZ8/zevPnwKWIMlISEgcg5iaLUw+Xz7Msi36xkWcnHldLrxXoZfzt8nW2mPxcwqBl2
+        94qC3lkPwhalRlLWNLBwyAlr1dFsMh7N49k4DkBhJeqalpU0HD9Mh1S5nR3Go0/TCzO3SqCHhP2K
+        GGPsFP61RyPxDyQs6IRIgd7zDCFpLzEGzuo6Atx75YkbgkEHCmsITW3bVFr3ALJWp4Jr3SVuvlPv
+        3DWKa52K9fhwHMvv89flLBOb4zE+ztUkX/XyNdJvZTC0r4xoG9TD23hyk4wxMLwI3AwpFZVzaCj9
+        jZxydDcyjAF3WVWgoboEOG1BW8Fr4S0kW/jBDXty3AjlhR2wx8UWznClcI7unV96TXK4rzzXH7vH
+        jbEUcoX2vVyQczspbbPS2Z2/ocJeGeXz1CH3oQH9OUTvRoIFqK5GDaWzRUkp2QOGpPNpIwrdJnbg
+        6PMFJEtc9+LxZHBHLpVIXIVVaLdPcJGj7KjdFvJKKtsDol7pH93c027KVyb7H/kOEAJLQpmWDqUS
+        1xV31xzWD/Vf19omB8Pg0R2VwJQUunocEve80s0TAv/mCYt0r0yGrnSqfUfROfoLAAD//wMASyVc
+        NkYEAAA=
+    headers:
+      CF-RAY:
+      - 9278a2ee88a28ff6-BOS
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 28 Mar 2025 16:52:11 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A;
+        path=/; expires=Fri, 28-Mar-25 17:22:11 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '397'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999986'
+      x-ratelimit-reset-requests:
+      - 4ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_f08846e9b273c1b121279f2a187948dc
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index e64c5975c28..c76b63075f2 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -1,26 +1,26 @@
 import pytest
 
-from tests.contrib.litellm.utils import async_consume_stream, get_cassette_name, consume_stream, parse_response
+from tests.contrib.litellm.utils import async_consume_stream, get_cassette_name, consume_stream, parse_response, tools
 from tests.llmobs._utils import _expected_llmobs_llm_span_event
 
 
 @pytest.mark.parametrize(
     "ddtrace_global_config", [dict(_llmobs_enabled=True, _llmobs_sample_rate=1.0, _llmobs_ml_app="<ml-app-name>")]
 )
+@pytest.mark.parametrize(
+    "stream,n,include_usage",
+    [
+        (True, 1, True),
+        (True, 2, True),
+        (False, 1, True),
+        (False, 2, True),
+        (True, 1, False),
+        (True, 2, False),
+        (False, 1, False),
+        (False, 2, False),
+    ],
+)
 class TestLLMObsLiteLLM:
-    @pytest.mark.parametrize(
-        "stream,n,include_usage",
-        [
-            (True, 1, True),
-            (True, 2, True),
-            (False, 1, True),
-            (False, 2, True),
-            (True, 1, False),
-            (True, 2, False),
-            (False, 1, False),
-            (False, 2, False),
-        ],
-    )
     def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]
@@ -51,19 +51,41 @@ def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer,
             )
         )
 
-    @pytest.mark.parametrize(
-        "stream,n,include_usage",
-        [
-            (True, 1, True),
-            (True, 2, True),
-            (False, 1, True),
-            (False, 2, True),
-            (True, 1, False),
-            (True, 2, False),
-            (False, 1, False),
-            (False, 2, False),
-        ],
-    )
+    def test_completion_with_tools(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
+        if stream and n > 1:
+            pytest.skip("Streamed responses with multiple completions and tool calls are not supported: see open issue https://github.com/BerriAI/litellm/issues/8977")
+        with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage, tools=True)):
+            messages = [{"content": "What is the weather like in San Francisco, CA?", "role": "user"}]
+            resp = litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                stream=stream,
+                n=n,
+                stream_options={"include_usage": include_usage},
+                tools=tools,
+                tool_choice="auto",
+            )
+            if stream:
+                output_messages, token_metrics = consume_stream(resp, n)
+            else:
+                output_messages, token_metrics = parse_response(resp)
+        
+        span = mock_tracer.pop_traces()[0][0]
+        assert mock_llmobs_writer.enqueue.call_count == 1
+        mock_llmobs_writer.enqueue.assert_called_with(
+            _expected_llmobs_llm_span_event(
+                span,
+                model_name="gpt-3.5-turbo",
+                model_provider="openai",
+                input_messages=messages,
+                output_messages=output_messages,
+                metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}, "tool_choice": "auto"},
+                token_metrics=token_metrics,
+                tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
+            )
+        )
+    
+
     async def test_acompletion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]
@@ -94,19 +116,6 @@ async def test_acompletion(self, litellm, request_vcr, mock_llmobs_writer, mock_
             )
         )
 
-    @pytest.mark.parametrize(
-        "stream,n,include_usage",
-        [
-            (True, 1, True),
-            (True, 2, True),
-            (False, 1, True),
-            (False, 2, True),
-            (True, 1, False),
-            (True, 2, False),
-            (False, 1, False),
-            (False, 2, False),
-        ],
-    )
     def test_text_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
             prompt = "Hey, what is up?"
@@ -137,19 +146,6 @@ def test_text_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tr
             )
         )
 
-    @pytest.mark.parametrize(
-        "stream,n,include_usage",
-        [
-            (True, 1, True),
-            (True, 2, True),
-            (False, 1, True),
-            (False, 2, True),
-            (True, 1, False),
-            (True, 2, False),
-            (False, 1, False),
-            (False, 2, False),
-        ],
-    )
     async def test_atext_completion(
         self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage
     ):
@@ -181,3 +177,4 @@ async def test_atext_completion(
                 tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
             )
         )
+
diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py
index 508d6a1fd3e..26bbc755a13 100644
--- a/tests/contrib/litellm/utils.py
+++ b/tests/contrib/litellm/utils.py
@@ -1,5 +1,6 @@
 import vcr
 import os
+import json
 
 CASETTE_EXTENSION = ".yaml"
 
@@ -21,7 +22,7 @@ def get_request_vcr():
 # Get the name of the cassette to use for a given test
 # All LiteLLM requests that use Open AI get routed to the chat completions endpoint,
 # so we can reuse the same cassette for each combination of stream and n
-def get_cassette_name(stream, n, include_usage=True):
+def get_cassette_name(stream, n, include_usage=True, tools=False):
     stream_suffix = "_stream" if stream else ""
     choice_suffix = "_multiple_choices" if n > 1 else ""
     # include_usage only affects streamed responses
@@ -29,24 +30,27 @@ def get_cassette_name(stream, n, include_usage=True):
         usage_suffix = "_exclude_usage"
     else:
         usage_suffix = ""
-    return "completion" + stream_suffix + choice_suffix + usage_suffix + CASETTE_EXTENSION
+    tools_suffix = "_with_tools" if tools else ""
+    return "completion" + stream_suffix + choice_suffix + usage_suffix + tools_suffix + CASETTE_EXTENSION
 
 
 def consume_stream(resp, n, is_completion=False):
-    output_messages = [{"content": ""} for _ in range(n)]
+    output_messages = [{"content": "", "tool_calls": []} for _ in range(n)]
     token_metrics = {}
     role = None
     for chunk in resp:
-        role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion)
+        output_messages, token_metrics, role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion)
+    output_messages = parse_tool_calls(output_messages)
     return output_messages, token_metrics
 
 
 async def async_consume_stream(resp, n, is_completion=False):
-    output_messages = [{"content": ""} for _ in range(n)]
+    output_messages = [{"content": "", "tool_calls": []} for _ in range(n)]
     token_metrics = {}
     role = None
     async for chunk in resp:
-        role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion)
+        output_messages, token_metrics, role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion)
+    output_messages = parse_tool_calls(output_messages)
     return output_messages, token_metrics
 
 
@@ -58,6 +62,16 @@ def extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_co
         if "role" not in output_messages[choice.index] and (choice.get("delta", {}).get("role") or role):
             role = choice.get("delta", {}).get("role") or role
             output_messages[choice.index]["role"] = role
+        if choice.get("delta", {}).get("tool_calls", []):
+            tool_calls_chunk = choice["delta"]["tool_calls"]
+            for tool_call in tool_calls_chunk:
+                while tool_call.index >= len(output_messages[choice.index]["tool_calls"]):
+                    output_messages[choice.index]["tool_calls"].append({})
+                arguments = output_messages[choice.index]["tool_calls"][tool_call.index].get("arguments", "")
+                output_messages[choice.index]["tool_calls"][tool_call.index]["name"] = output_messages[choice.index]["tool_calls"][tool_call.index].get("name", None) or tool_call.function.name
+                output_messages[choice.index]["tool_calls"][tool_call.index]["arguments"] = arguments + tool_call.function.arguments
+                output_messages[choice.index]["tool_calls"][tool_call.index]["tool_id"] = output_messages[choice.index]["tool_calls"][tool_call.index].get("tool_id", None) or tool_call.id
+                output_messages[choice.index]["tool_calls"][tool_call.index]["type"] = tool_call.type
 
     if "usage" in chunk and chunk["usage"]:
         token_metrics.update(
@@ -68,15 +82,38 @@ def extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_co
             }
         )
 
-    return role
+    return output_messages, token_metrics, role
+
+
+def parse_tool_calls(output_messages):
+    # remove tool_calls from messages if they are empty and parse arguments
+    for message in output_messages:
+        if message["tool_calls"]:
+            for tool_call in message["tool_calls"]:
+                if "arguments" in tool_call:
+                    tool_call["arguments"] = json.loads(tool_call["arguments"])
+        else:
+            del message["tool_calls"]
+    return output_messages
 
 
 def parse_response(resp, is_completion=False):
     output_messages = []
     for choice in resp.choices:
-        message = {"content": choice.text if is_completion else choice.message.content}
+        content = choice.text if is_completion else choice.message.content
+        message = {"content": content or ""}
         if choice.get("role", None) or choice.get("message", {}).get("role", None):
             message["role"] = choice["role"] if is_completion else choice["message"]["role"]
+        tool_calls = choice.get("message", {}).get("tool_calls", [])
+        if tool_calls:
+            message["tool_calls"] = []
+            for tool_call in tool_calls:
+                message["tool_calls"].append({
+                    "name": tool_call["function"]["name"],
+                    "arguments": json.loads(tool_call["function"]["arguments"]),
+                    "tool_id": tool_call["id"],
+                    "type": tool_call["type"]
+                })
         output_messages.append(message)
     token_metrics = {
         "input_tokens": resp.usage.prompt_tokens,
@@ -84,3 +121,24 @@ def parse_response(resp, is_completion=False):
         "total_tokens": resp.usage.total_tokens,
     }
     return output_messages, token_metrics
+
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_current_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city and state, e.g. San Francisco, CA",
+                    },
+                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                },
+                "required": ["location"],
+            },
+        },
+    }
+]
\ No newline at end of file

From 64f48a1ac7a3a9518b0a374573fb37731c60dd24 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Mon, 31 Mar 2025 15:54:51 -0400
Subject: [PATCH 29/61] add test for case where integrations are enabled

---
 ddtrace/contrib/internal/litellm/patch.py    |  3 +-
 ddtrace/llmobs/_integrations/litellm.py      | 19 ++++++-
 tests/contrib/litellm/conftest.py            |  5 +-
 tests/contrib/litellm/test_litellm_llmobs.py | 58 +++++++++++++++++++-
 4 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 74515b8e829..c69697519b8 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -36,12 +36,11 @@ def _create_span(litellm, pin, func, instance, args, kwargs):
     """Helper function to create and configure a traced span."""
     model = get_argument_value(args, kwargs, 0, "model", None)
     integration = litellm._datadog_integration
-    base_url = kwargs.get("api_base", None)
     span = integration.trace(
         pin,
         "litellm.%s" % func.__name__,
         model=model,
-        submit_to_llmobs=integration.should_submit_to_llmobs(base_url),
+        submit_to_llmobs=integration.should_submit_to_llmobs(model, kwargs),
     )
     return span
 
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index c15c7836e4a..65d1e12bc3b 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -3,6 +3,7 @@
 from typing import List
 from typing import Optional
 
+import ddtrace
 from ddtrace.llmobs._constants import (
     INPUT_TOKENS_METRIC_KEY,
     METRICS,
@@ -13,7 +14,6 @@
 from ddtrace.llmobs._constants import MODEL_PROVIDER
 from ddtrace.llmobs._constants import SPAN_KIND
 from ddtrace.llmobs._integrations.utils import (
-    get_llmobs_metrics_tags,
     openai_set_meta_tags_from_chat,
     openai_set_meta_tags_from_completion,
 )
@@ -68,5 +68,18 @@ def _extract_llmobs_metrics(resp: Any) -> Dict[str, Any]:
                 TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens,
             }
 
-    def should_submit_to_llmobs(self, base_url: Optional[str] = None) -> bool:
-        return base_url is None
+    def should_submit_to_llmobs(self, model: Optional[str] = None, kwargs: Dict[str, Any] = None) -> bool:
+        """
+        Span should be NOT submitted to LLMObs if:
+            - base_url is not None
+            - model provider is Open AI or Azure AND request is not being streamed AND Open AI integration is enabled
+        """
+        base_url = kwargs.get("api_base", None)
+        if base_url is not None:
+            return False
+        stream = kwargs.get("stream", False)
+        model_lower = model.lower() if model else ""
+        # model provider is unknown until request completes; therefore, this is a best effort attempt to check if model provider is Open AI or Azure
+        if ("gpt" in model_lower or "openai" in model_lower or "azure" in model_lower) and not stream and "openai" in ddtrace._monkey._get_patched_modules():
+            return False
+        return True
diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
index 9bc9e4fa887..1ca4294bd80 100644
--- a/tests/contrib/litellm/conftest.py
+++ b/tests/contrib/litellm/conftest.py
@@ -66,9 +66,10 @@ def mock_tracer(litellm, ddtrace_global_config):
     pin.tracer._configure()
 
     if ddtrace_global_config.get("_llmobs_enabled", False):
-        # Have to disable and re-enable LLMObs to use to mock tracer.
+        # Have to disable and re-enable LLMObs to use the mock tracer.
         LLMObs.disable()
-        LLMObs.enable(_tracer=mock_tracer, integrations_enabled=False)
+        enable_integrations = ddtrace_global_config.get("_integrations_enabled", False)
+        LLMObs.enable(_tracer=mock_tracer, integrations_enabled=enable_integrations)
 
     yield mock_tracer
 
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index c76b63075f2..16f3e61f3a9 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -1,7 +1,10 @@
+from ddtrace._trace.pin import Pin
+from ddtrace.llmobs._llmobs import LLMObs
 import pytest
 
 from tests.contrib.litellm.utils import async_consume_stream, get_cassette_name, consume_stream, parse_response, tools
 from tests.llmobs._utils import _expected_llmobs_llm_span_event
+from tests.utils import DummyTracer
 
 
 @pytest.mark.parametrize(
@@ -84,7 +87,6 @@ def test_completion_with_tools(self, litellm, request_vcr, mock_llmobs_writer, m
                 tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
             )
         )
-    
 
     async def test_acompletion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
@@ -178,3 +180,57 @@ async def test_atext_completion(
             )
         )
 
+    def test_completion_integrations_enabled(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
+        if stream:
+            pytest.skip("Streamed Open AI requests will lead to unfinished spans; therefore, skip them for now")
+        with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
+            LLMObs.disable()
+
+            LLMObs.enable(integrations_enabled=True)
+            mock_tracer = DummyTracer()
+            import litellm
+            import openai
+
+            pin = Pin.get_from(litellm)
+            pin._override(litellm, tracer=mock_tracer)
+            pin._override(openai, tracer=mock_tracer)
+
+            messages = [{"content": "Hey, what is up?", "role": "user"}]
+            resp = litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                stream=stream,
+                n=n,
+                stream_options={"include_usage": include_usage},
+            )
+            LLMObs.disable()
+            if stream:
+                output_messages, token_metrics = consume_stream(resp, n)
+            else:
+                output_messages, token_metrics = parse_response(resp)
+
+        openai_span = mock_tracer.pop_traces()[0][1]
+        # remove parent span since LiteLLM request span will not be submitted to LLMObs
+        openai_span._parent = None
+        assert mock_llmobs_writer.enqueue.call_count == 1
+        mock_llmobs_writer.enqueue.assert_called_with(
+            _expected_llmobs_llm_span_event(
+                openai_span,
+                model_name="gpt-3.5-turbo-0125",
+                model_provider="openai",
+                input_messages=messages,
+                output_messages=output_messages,
+                metadata={
+                    "n": n,
+                    "extra_body": {},
+                    "timeout": 600.0,
+                    "extra_headers": {
+                        "X-Stainless-Raw-Response": "true"
+                    }
+                },
+                token_metrics=token_metrics,
+                tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
+            )
+        )
+
+ 
\ No newline at end of file

From 4dd467c7cb6f909c7e93f0ccf3202c25f3eea077 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Mon, 31 Mar 2025 16:40:17 -0400
Subject: [PATCH 30/61] add tests for proxy requests

---
 .../completion_multiple_choices_proxy.yaml    | 133 ++++++++++
 .../litellm/cassettes/completion_proxy.yaml   | 132 ++++++++++
 ...completion_stream_exclude_usage_proxy.yaml | 144 +++++++++++
 ..._multiple_choices_exclude_usage_proxy.yaml | 240 ++++++++++++++++++
 ...pletion_stream_multiple_choices_proxy.yaml | 186 ++++++++++++++
 .../cassettes/completion_stream_proxy.yaml    | 154 +++++++++++
 tests/contrib/litellm/conftest.py             |   4 +
 tests/contrib/litellm/test_litellm_llmobs.py  |  19 ++
 tests/contrib/litellm/utils.py                |  10 +-
 9 files changed, 1017 insertions(+), 5 deletions(-)
 create mode 100644 tests/contrib/litellm/cassettes/completion_multiple_choices_proxy.yaml
 create mode 100644 tests/contrib/litellm/cassettes/completion_proxy.yaml
 create mode 100644 tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml
 create mode 100644 tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml
 create mode 100644 tests/contrib/litellm/cassettes/completion_stream_multiple_choices_proxy.yaml
 create mode 100644 tests/contrib/litellm/cassettes/completion_stream_proxy.yaml

diff --git a/tests/contrib/litellm/cassettes/completion_multiple_choices_proxy.yaml b/tests/contrib/litellm/cassettes/completion_multiple_choices_proxy.yaml
new file mode 100644
index 00000000000..7641313c6a3
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_multiple_choices_proxy.yaml
@@ -0,0 +1,133 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '89'
+      content-type:
+      - application/json
+      host:
+      - 0.0.0.0:4000
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: http://0.0.0.0:4000/chat/completions
+  response:
+    body:
+      string: '{"id":"chatcmpl-BHGBewTYXrkQYXQ5DUtzsz7lL7gjy","created":1743453498,"model":"gpt-3.5-turbo-0125","object":"chat.completion","system_fingerprint":null,"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Not
+        much, just here to help you with anything you need. How can I assist you today?","role":"assistant","tool_calls":null,"function_call":null}},{"finish_reason":"stop","index":1,"message":{"content":"Not
+        much, just here to chat and help with anything you need. How can I assist
+        you today?","role":"assistant","tool_calls":null,"function_call":null}}],"usage":{"completion_tokens":43,"prompt_tokens":13,"total_tokens":56,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0}},"service_tier":"default"}'
+    headers:
+      content-length:
+      - '855'
+      content-type:
+      - application/json
+      date:
+      - Mon, 31 Mar 2025 20:38:18 GMT
+      llm_provider-access-control-expose-headers:
+      - X-Request-ID
+      llm_provider-alt-svc:
+      - h3=":443"; ma=86400
+      llm_provider-cf-cache-status:
+      - DYNAMIC
+      llm_provider-cf-ray:
+      - 9292a64f29e7c989-IAD
+      llm_provider-connection:
+      - keep-alive
+      llm_provider-content-encoding:
+      - gzip
+      llm_provider-content-type:
+      - application/json
+      llm_provider-date:
+      - Mon, 31 Mar 2025 20:38:19 GMT
+      llm_provider-openai-organization:
+      - datadog-4
+      llm_provider-openai-processing-ms:
+      - '420'
+      llm_provider-openai-version:
+      - '2020-10-01'
+      llm_provider-server:
+      - cloudflare
+      llm_provider-strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      llm_provider-transfer-encoding:
+      - chunked
+      llm_provider-x-content-type-options:
+      - nosniff
+      llm_provider-x-ratelimit-limit-requests:
+      - '15000'
+      llm_provider-x-ratelimit-limit-tokens:
+      - '2000000'
+      llm_provider-x-ratelimit-remaining-requests:
+      - '14999'
+      llm_provider-x-ratelimit-remaining-tokens:
+      - '1999993'
+      llm_provider-x-ratelimit-reset-requests:
+      - 4ms
+      llm_provider-x-ratelimit-reset-tokens:
+      - 0s
+      llm_provider-x-request-id:
+      - req_743e3e93e074d74f8c2dcdaff378a836
+      server:
+      - uvicorn
+      x-litellm-attempted-fallbacks:
+      - '0'
+      x-litellm-attempted-retries:
+      - '0'
+      x-litellm-call-id:
+      - 7c65af0a-51fe-4b5b-8491-d52aeed495c6
+      x-litellm-key-spend:
+      - '0.0'
+      x-litellm-model-api-base:
+      - https://api.openai.com
+      x-litellm-model-group:
+      - gpt-3.5-turbo
+      x-litellm-model-id:
+      - e748de28e0562f80b594285ded0e720cb492b705ea9ad445427dff7c5b722753
+      x-litellm-overhead-duration-ms:
+      - '1.125'
+      x-litellm-response-cost:
+      - '7.099999999999999e-05'
+      x-litellm-response-duration-ms:
+      - '558.881'
+      x-litellm-version:
+      - 1.63.11
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999993'
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_proxy.yaml b/tests/contrib/litellm/cassettes/completion_proxy.yaml
new file mode 100644
index 00000000000..15e7ea403f3
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_proxy.yaml
@@ -0,0 +1,132 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '89'
+      content-type:
+      - application/json
+      host:
+      - 0.0.0.0:4000
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: http://0.0.0.0:4000/chat/completions
+  response:
+    body:
+      string: '{"id":"chatcmpl-BHGAf6WA7lmIL9yuwftXYqc5kADAy","created":1743453437,"model":"gpt-3.5-turbo-0125","object":"chat.completion","system_fingerprint":null,"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Not
+        much, just here to help with any questions or tasks you may have. How can
+        I assist you today?","role":"assistant","tool_calls":null,"function_call":null}}],"usage":{"completion_tokens":24,"prompt_tokens":13,"total_tokens":37,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0}},"service_tier":"default"}'
+    headers:
+      content-length:
+      - '663'
+      content-type:
+      - application/json
+      date:
+      - Mon, 31 Mar 2025 20:37:16 GMT
+      llm_provider-access-control-expose-headers:
+      - X-Request-ID
+      llm_provider-alt-svc:
+      - h3=":443"; ma=86400
+      llm_provider-cf-cache-status:
+      - DYNAMIC
+      llm_provider-cf-ray:
+      - 9292a4d20e95057d-IAD
+      llm_provider-connection:
+      - keep-alive
+      llm_provider-content-encoding:
+      - gzip
+      llm_provider-content-type:
+      - application/json
+      llm_provider-date:
+      - Mon, 31 Mar 2025 20:37:18 GMT
+      llm_provider-openai-organization:
+      - datadog-4
+      llm_provider-openai-processing-ms:
+      - '406'
+      llm_provider-openai-version:
+      - '2020-10-01'
+      llm_provider-server:
+      - cloudflare
+      llm_provider-strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      llm_provider-transfer-encoding:
+      - chunked
+      llm_provider-x-content-type-options:
+      - nosniff
+      llm_provider-x-ratelimit-limit-requests:
+      - '15000'
+      llm_provider-x-ratelimit-limit-tokens:
+      - '2000000'
+      llm_provider-x-ratelimit-remaining-requests:
+      - '14999'
+      llm_provider-x-ratelimit-remaining-tokens:
+      - '1999994'
+      llm_provider-x-ratelimit-reset-requests:
+      - 4ms
+      llm_provider-x-ratelimit-reset-tokens:
+      - 0s
+      llm_provider-x-request-id:
+      - req_6fdefc3db6a6e5b77dae976930efe649
+      server:
+      - uvicorn
+      x-litellm-attempted-fallbacks:
+      - '0'
+      x-litellm-attempted-retries:
+      - '0'
+      x-litellm-call-id:
+      - 30958a62-d9f7-47e6-8971-2b58852f2976
+      x-litellm-key-spend:
+      - '0.0'
+      x-litellm-model-api-base:
+      - https://api.openai.com
+      x-litellm-model-group:
+      - gpt-3.5-turbo
+      x-litellm-model-id:
+      - e748de28e0562f80b594285ded0e720cb492b705ea9ad445427dff7c5b722753
+      x-litellm-overhead-duration-ms:
+      - '1.424'
+      x-litellm-response-cost:
+      - '4.25e-05'
+      x-litellm-response-duration-ms:
+      - '611.3'
+      x-litellm-version:
+      - 1.63.11
+      x-ratelimit-limit-requests:
+      - '15000'
+      x-ratelimit-limit-tokens:
+      - '2000000'
+      x-ratelimit-remaining-requests:
+      - '14999'
+      x-ratelimit-remaining-tokens:
+      - '1999994'
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml
new file mode 100644
index 00000000000..39cf74be7cb
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml
@@ -0,0 +1,144 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":false}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '144'
+      content-type:
+      - application/json
+      host:
+      - 0.0.0.0:4000
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: http://0.0.0.0:4000/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        much"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        just"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        here"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        to"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        help"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        you"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        with"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        anything"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        you"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        need"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        How"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        can"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        I"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        assist"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        you"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        today"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":false}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      content-type:
+      - text/event-stream; charset=utf-8
+      date:
+      - Mon, 31 Mar 2025 20:38:18 GMT
+      server:
+      - uvicorn
+      transfer-encoding:
+      - chunked
+      x-litellm-call-id:
+      - a5a87fc0-874f-4432-b608-91b437b91fb2
+      x-litellm-key-spend:
+      - '0.0'
+      x-litellm-version:
+      - 1.63.11
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml
new file mode 100644
index 00000000000..7cface6a716
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml
@@ -0,0 +1,240 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":false}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '144'
+      content-type:
+      - application/json
+      host:
+      - 0.0.0.0:4000
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: http://0.0.0.0:4000/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"Not"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        much"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        much"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":","}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        just"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        just"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        here"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        here"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        to"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        to"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        chat"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        assist"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        and"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        you"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        help"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        with"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        with"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        any"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        anything"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        questions"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        you"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        or"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        need"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        tasks"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        you"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        How"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        may"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        can"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        have"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        I"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"."}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        assist"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        How"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        you"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        can"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        today"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        I"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        help"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        you"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        today"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}}
+
+
+        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":false}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      content-type:
+      - text/event-stream; charset=utf-8
+      date:
+      - Mon, 31 Mar 2025 20:38:19 GMT
+      server:
+      - uvicorn
+      transfer-encoding:
+      - chunked
+      x-litellm-call-id:
+      - b4f152d1-5074-4fb3-a79d-ad0529fa5aa1
+      x-litellm-key-spend:
+      - '0.0'
+      x-litellm-version:
+      - 1.63.11
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_proxy.yaml
new file mode 100644
index 00000000000..479f5817089
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_proxy.yaml
@@ -0,0 +1,186 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '143'
+      content-type:
+      - application/json
+      host:
+      - 0.0.0.0:4000
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: http://0.0.0.0:4000/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"Not"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        much"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        much"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":","}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        just"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        just"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        here"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        here"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        to"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        to"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        help"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        help"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"."}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        How"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        How"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        can"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        can"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        I"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        I"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        assist"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        assist"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        you"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        you"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        today"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
+        today"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"?"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{}}],"stream_options":{"include_usage":true},"usage":{"completion_tokens":27,"prompt_tokens":13,"total_tokens":40}}
+
+
+        data: {"id":"chatcmpl-3f890568-66a5-4370-9ce0-8de38b24b1aa","created":1743453498,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{}}],"stream_options":{"include_usage":true}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      content-type:
+      - text/event-stream; charset=utf-8
+      date:
+      - Mon, 31 Mar 2025 20:38:17 GMT
+      server:
+      - uvicorn
+      transfer-encoding:
+      - chunked
+      x-litellm-call-id:
+      - 7396b03c-8ab2-4593-8e46-a3e1285bd4d4
+      x-litellm-key-spend:
+      - '0.0'
+      x-litellm-version:
+      - 1.63.11
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_stream_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_proxy.yaml
new file mode 100644
index 00000000000..32e111c1775
--- /dev/null
+++ b/tests/contrib/litellm/cassettes/completion_stream_proxy.yaml
@@ -0,0 +1,154 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '143'
+      content-type:
+      - application/json
+      host:
+      - 0.0.0.0:4000
+      user-agent:
+      - OpenAI/Python 1.68.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.68.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - '600.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: http://0.0.0.0:4000/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        much"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        just"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        here"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        to"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        chat"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        and"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        help"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        with"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        anything"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        you"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        need"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        How"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        can"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        I"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        assist"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        you"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
+        today"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{}}],"stream_options":{"include_usage":true}}
+
+
+        data: {"id":"chatcmpl-2c9d9d0f-ba63-48ab-ae48-cfc80eabf1ec","created":1743453497,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{}}],"stream_options":{"include_usage":true},"usage":{"completion_tokens":22,"prompt_tokens":13,"total_tokens":35,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0}}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      content-type:
+      - text/event-stream; charset=utf-8
+      date:
+      - Mon, 31 Mar 2025 20:38:16 GMT
+      server:
+      - uvicorn
+      transfer-encoding:
+      - chunked
+      x-litellm-call-id:
+      - 8b484c9d-0eae-4d95-8b0e-fe1bdb114b9a
+      x-litellm-key-spend:
+      - '0.0'
+      x-litellm-version:
+      - 1.63.11
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
index 1ca4294bd80..c94314d8902 100644
--- a/tests/contrib/litellm/conftest.py
+++ b/tests/contrib/litellm/conftest.py
@@ -79,3 +79,7 @@ def mock_tracer(litellm, ddtrace_global_config):
 @pytest.fixture
 def request_vcr():
     return get_request_vcr()
+
+@pytest.fixture
+def request_vcr_include_localhost():
+    return get_request_vcr(ignore_localhost=False)
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 16f3e61f3a9..a7023042caf 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -232,5 +232,24 @@ def test_completion_integrations_enabled(self, litellm, request_vcr, mock_llmobs
                 tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
             )
         )
+    
+    def test_completion_proxy(self, litellm, request_vcr_include_localhost, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
+        with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, include_usage, proxy=True)):
+            messages = [{"content": "Hey, what is up?", "role": "user"}]
+            resp = litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                stream=stream,
+                n=n,
+                stream_options={"include_usage": include_usage},
+                api_base="http://0.0.0.0:4000",
+            )
+            if stream:
+                consume_stream(resp, n)
+
+        # client side requests made to the proxy are not submitted to LLMObs
+        assert mock_llmobs_writer.enqueue.call_count == 0
+
+    
 
  
\ No newline at end of file
diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py
index 26bbc755a13..cd5eadb97b2 100644
--- a/tests/contrib/litellm/utils.py
+++ b/tests/contrib/litellm/utils.py
@@ -8,21 +8,20 @@
 # VCR is used to capture and store network requests made to Anthropic.
 # This is done to avoid making real calls to the API which could introduce
 # flakiness and cost.
-def get_request_vcr():
+def get_request_vcr(ignore_localhost=True):
     return vcr.VCR(
         cassette_library_dir=os.path.join(os.path.dirname(__file__), "cassettes"),
         record_mode="once",
         match_on=["path"],
         filter_headers=["authorization", "x-api-key", "api-key"],
-        # Ignore requests to the agent
-        ignore_localhost=True,
+        ignore_localhost=ignore_localhost,
     )
 
 
 # Get the name of the cassette to use for a given test
 # All LiteLLM requests that use Open AI get routed to the chat completions endpoint,
 # so we can reuse the same cassette for each combination of stream and n
-def get_cassette_name(stream, n, include_usage=True, tools=False):
+def get_cassette_name(stream, n, include_usage=True, tools=False, proxy=False):
     stream_suffix = "_stream" if stream else ""
     choice_suffix = "_multiple_choices" if n > 1 else ""
     # include_usage only affects streamed responses
@@ -31,7 +30,8 @@ def get_cassette_name(stream, n, include_usage=True, tools=False):
     else:
         usage_suffix = ""
     tools_suffix = "_with_tools" if tools else ""
-    return "completion" + stream_suffix + choice_suffix + usage_suffix + tools_suffix + CASETTE_EXTENSION
+    proxy_suffix = "_proxy" if proxy else ""
+    return "completion" + stream_suffix + choice_suffix + usage_suffix + tools_suffix + proxy_suffix + CASETTE_EXTENSION
 
 
 def consume_stream(resp, n, is_completion=False):

From f0b8a721afc615928d097cb14b259cb1d800835b Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 1 Apr 2025 10:03:04 -0400
Subject: [PATCH 31/61] add litellm tests to suitespec

---
 tests/llmobs/suitespec.yml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/llmobs/suitespec.yml b/tests/llmobs/suitespec.yml
index a9d3da34dc5..40ba83b535a 100644
--- a/tests/llmobs/suitespec.yml
+++ b/tests/llmobs/suitespec.yml
@@ -12,6 +12,9 @@ components:
   langchain:
     - ddtrace/contrib/_langchain.py
     - ddtrace/contrib/internal/langchain/*
+  litellm:
+    - ddtrace/contrib/_litellm.py
+    - ddtrace/contrib/internal/litellm/*
   llmobs:
     - ddtrace/llmobs/*
   openai:
@@ -73,6 +76,18 @@ suites:
       - tests/snapshots/tests.contrib.langchain.*
     runner: riot
     snapshot: true
+  litellm:
+    paths:
+      - '@bootstrap'
+      - '@core'
+      - '@tracing'
+      - '@contrib'
+      - '@litellm'
+      - '@llmobs'
+      - tests/contrib/litellm/*
+      - tests/snapshots/tests.contrib.litellm.*
+    runner: riot
+    snapshot: true
   llmobs:
     paths:
       - '@bootstrap'

From 8d5d24b77a8fe0cd90458e1566cb5667b782359a Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 8 Apr 2025 10:17:12 -0400
Subject: [PATCH 32/61] consume streams for apm tests

---
 tests/contrib/litellm/test_litellm.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py
index 5b1c787ddba..43a9a4d4036 100644
--- a/tests/contrib/litellm/test_litellm.py
+++ b/tests/contrib/litellm/test_litellm.py
@@ -33,12 +33,16 @@ def test_litellm_completion(litellm, snapshot_context, request_vcr, stream, n):
     with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion"):
         with request_vcr.use_cassette(get_cassette_name(stream, n)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]
-            litellm.completion(
+            resp = litellm.completion(
                 model="gpt-3.5-turbo",
                 messages=messages,
                 stream=stream,
                 n=n,
             )
+            if stream:
+                for _ in resp:
+                    pass
+
 
 
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
@@ -46,36 +50,44 @@ async def test_litellm_acompletion(litellm, snapshot_context, request_vcr, strea
     with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]):
         with request_vcr.use_cassette(get_cassette_name(stream, n)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]
-            await litellm.acompletion(
+            resp = await litellm.acompletion(
                 model="gpt-3.5-turbo",
                 messages=messages,
                 stream=stream,
                 n=n,
             )
-
-
+            if stream:
+                async for _ in resp:
+                    pass
+            
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
 def test_litellm_text_completion(litellm, snapshot_context, request_vcr, stream, n):
     with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]):
         with request_vcr.use_cassette(get_cassette_name(stream, n)):
-            litellm.text_completion(
+            resp = litellm.text_completion(
                 model="gpt-3.5-turbo",
                 prompt="Hello world",
                 stream=stream,
                 n=n,
             )
+            if stream:
+                for _ in resp:
+                    pass
 
 
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
 async def test_litellm_atext_completion(litellm, snapshot_context, request_vcr, stream, n):
     with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]):
         with request_vcr.use_cassette(get_cassette_name(stream, n)):
-            await litellm.atext_completion(
+            resp = await litellm.atext_completion(
                 model="gpt-3.5-turbo",
                 prompt="Hello world",
                 stream=stream,
                 n=n,
             )
+            if stream:
+                async for _ in resp:
+                    pass
 
 
 @pytest.mark.parametrize("model", ["command-r", "anthropic/claude-3-5-sonnet-20240620"])

From 377b40d27c021f6ab6060ce6e3e378f3d5cef3bd Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 8 Apr 2025 10:26:38 -0400
Subject: [PATCH 33/61] remove unnecessary snapshot files and riot requirements

---
 .riot/requirements/45c12de.txt                |  76 ------------
 .riot/requirements/e05a904.txt                |  79 -------------
 .riot/requirements/e8c8851.txt                |  76 ------------
 .riot/requirements/f30dfc2.txt                |  79 -------------
 ddtrace/contrib/internal/litellm/patch.py     |   1 -
 ddtrace/contrib/internal/litellm/utils.py     |   5 -
 .../sources/min_compatible_versions.csv       |   1 -
 min_compatible_versions.csv                   |   1 -
 .../claude-3-5-sonnet-20240620.yaml           |  86 --------------
 .../completion_vertex_ai/gemini-pro.yaml      | 110 ------------------
 tests/contrib/litellm/test_litellm_llmobs.py  |   2 -
 ...llm.test_litellm_acompletion[False-1].json |  27 -----
 ...llm.test_litellm_acompletion[False-2].json |  27 -----
 ...ellm.test_litellm_acompletion[True-2].json |  27 -----
 ...est_litellm_atext_completion[False-1].json |  27 -----
 ...est_litellm_atext_completion[False-2].json |  27 -----
 ...test_litellm_atext_completion[True-2].json |  27 -----
 ...ellm.test_litellm_completion[False-1].json |  27 -----
 ...ellm.test_litellm_completion[False-2].json |  27 -----
 ...tellm.test_litellm_completion[True-2].json |  27 -----
 ...t_litellm_completion_different_models.json |  27 -----
 ...anthropic_claude-3-5-sonnet-20240620].json |  27 -----
 ...test_litellm_text_completion[False-1].json |  27 -----
 ...test_litellm_text_completion[False-2].json |  27 -----
 ....test_litellm_text_completion[True-2].json |  27 -----
 25 files changed, 894 deletions(-)
 delete mode 100644 .riot/requirements/45c12de.txt
 delete mode 100644 .riot/requirements/e05a904.txt
 delete mode 100644 .riot/requirements/e8c8851.txt
 delete mode 100644 .riot/requirements/f30dfc2.txt
 delete mode 100644 tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml
 delete mode 100644 tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml
 delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json
 delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json
 delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json
 delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json
 delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json
 delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json
 delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json
 delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json
 delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json
 delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json
 delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json
 delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json
 delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json
 delete mode 100644 tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json

diff --git a/.riot/requirements/45c12de.txt b/.riot/requirements/45c12de.txt
deleted file mode 100644
index 3f0c6ae7839..00000000000
--- a/.riot/requirements/45c12de.txt
+++ /dev/null
@@ -1,76 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.12
-# by the following command:
-#
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/45c12de.in
-#
-aiohappyeyeballs==2.6.1
-aiohttp==3.11.14
-aiosignal==1.3.2
-annotated-types==0.7.0
-anyio==4.9.0
-attrs==25.3.0
-boto3==1.37.22
-botocore==1.37.22
-cachetools==5.5.2
-certifi==2025.1.31
-charset-normalizer==3.4.1
-click==8.1.8
-coverage[toml]==7.7.1
-distro==1.9.0
-filelock==3.18.0
-frozenlist==1.5.0
-fsspec==2025.3.0
-google-auth==2.38.0
-h11==0.14.0
-httpcore==1.0.7
-httpx==0.28.1
-huggingface-hub==0.29.3
-hypothesis==6.45.0
-idna==3.10
-importlib-metadata==8.6.1
-iniconfig==2.1.0
-jinja2==3.1.6
-jiter==0.9.0
-jmespath==1.0.1
-jsonschema==4.23.0
-jsonschema-specifications==2024.10.1
-litellm==1.64.1
-markupsafe==3.0.2
-mock==5.2.0
-multidict==6.2.0
-openai==1.69.0
-opentracing==2.4.0
-packaging==24.2
-pluggy==1.5.0
-propcache==0.3.1
-pyasn1==0.6.1
-pyasn1-modules==0.4.2
-pydantic==2.11.0
-pydantic-core==2.33.0
-pytest==8.3.5
-pytest-asyncio==0.26.0
-pytest-cov==6.0.0
-pytest-mock==3.14.0
-python-dateutil==2.9.0.post0
-python-dotenv==1.1.0
-pyyaml==6.0.2
-referencing==0.36.2
-regex==2024.11.6
-requests==2.32.3
-rpds-py==0.24.0
-rsa==4.9
-s3transfer==0.11.4
-six==1.17.0
-sniffio==1.3.1
-sortedcontainers==2.4.0
-tiktoken==0.9.0
-tokenizers==0.21.1
-tqdm==4.67.1
-typing-extensions==4.13.0
-typing-inspection==0.4.0
-urllib3==2.3.0
-vcrpy==7.0.0
-wrapt==1.17.2
-yarl==1.18.3
-zipp==3.21.0
diff --git a/.riot/requirements/e05a904.txt b/.riot/requirements/e05a904.txt
deleted file mode 100644
index e46108467ec..00000000000
--- a/.riot/requirements/e05a904.txt
+++ /dev/null
@@ -1,79 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.10
-# by the following command:
-#
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/e05a904.in
-#
-aiohappyeyeballs==2.6.1
-aiohttp==3.11.14
-aiosignal==1.3.2
-annotated-types==0.7.0
-anyio==4.9.0
-async-timeout==5.0.1
-attrs==25.3.0
-boto3==1.37.22
-botocore==1.37.22
-cachetools==5.5.2
-certifi==2025.1.31
-charset-normalizer==3.4.1
-click==8.1.8
-coverage[toml]==7.7.1
-distro==1.9.0
-exceptiongroup==1.2.2
-filelock==3.18.0
-frozenlist==1.5.0
-fsspec==2025.3.0
-google-auth==2.38.0
-h11==0.14.0
-httpcore==1.0.7
-httpx==0.28.1
-huggingface-hub==0.29.3
-hypothesis==6.45.0
-idna==3.10
-importlib-metadata==8.6.1
-iniconfig==2.1.0
-jinja2==3.1.6
-jiter==0.9.0
-jmespath==1.0.1
-jsonschema==4.23.0
-jsonschema-specifications==2024.10.1
-litellm==1.64.1
-markupsafe==3.0.2
-mock==5.2.0
-multidict==6.2.0
-openai==1.69.0
-opentracing==2.4.0
-packaging==24.2
-pluggy==1.5.0
-propcache==0.3.1
-pyasn1==0.6.1
-pyasn1-modules==0.4.2
-pydantic==2.11.0
-pydantic-core==2.33.0
-pytest==8.3.5
-pytest-asyncio==0.26.0
-pytest-cov==6.0.0
-pytest-mock==3.14.0
-python-dateutil==2.9.0.post0
-python-dotenv==1.1.0
-pyyaml==6.0.2
-referencing==0.36.2
-regex==2024.11.6
-requests==2.32.3
-rpds-py==0.24.0
-rsa==4.9
-s3transfer==0.11.4
-six==1.17.0
-sniffio==1.3.1
-sortedcontainers==2.4.0
-tiktoken==0.9.0
-tokenizers==0.21.1
-tomli==2.2.1
-tqdm==4.67.1
-typing-extensions==4.13.0
-typing-inspection==0.4.0
-urllib3==2.3.0
-vcrpy==7.0.0
-wrapt==1.17.2
-yarl==1.18.3
-zipp==3.21.0
diff --git a/.riot/requirements/e8c8851.txt b/.riot/requirements/e8c8851.txt
deleted file mode 100644
index 55dd71876c7..00000000000
--- a/.riot/requirements/e8c8851.txt
+++ /dev/null
@@ -1,76 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.11
-# by the following command:
-#
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/e8c8851.in
-#
-aiohappyeyeballs==2.6.1
-aiohttp==3.11.14
-aiosignal==1.3.2
-annotated-types==0.7.0
-anyio==4.9.0
-attrs==25.3.0
-boto3==1.37.22
-botocore==1.37.22
-cachetools==5.5.2
-certifi==2025.1.31
-charset-normalizer==3.4.1
-click==8.1.8
-coverage[toml]==7.7.1
-distro==1.9.0
-filelock==3.18.0
-frozenlist==1.5.0
-fsspec==2025.3.0
-google-auth==2.38.0
-h11==0.14.0
-httpcore==1.0.7
-httpx==0.28.1
-huggingface-hub==0.29.3
-hypothesis==6.45.0
-idna==3.10
-importlib-metadata==8.6.1
-iniconfig==2.1.0
-jinja2==3.1.6
-jiter==0.9.0
-jmespath==1.0.1
-jsonschema==4.23.0
-jsonschema-specifications==2024.10.1
-litellm==1.64.1
-markupsafe==3.0.2
-mock==5.2.0
-multidict==6.2.0
-openai==1.69.0
-opentracing==2.4.0
-packaging==24.2
-pluggy==1.5.0
-propcache==0.3.1
-pyasn1==0.6.1
-pyasn1-modules==0.4.2
-pydantic==2.11.0
-pydantic-core==2.33.0
-pytest==8.3.5
-pytest-asyncio==0.26.0
-pytest-cov==6.0.0
-pytest-mock==3.14.0
-python-dateutil==2.9.0.post0
-python-dotenv==1.1.0
-pyyaml==6.0.2
-referencing==0.36.2
-regex==2024.11.6
-requests==2.32.3
-rpds-py==0.24.0
-rsa==4.9
-s3transfer==0.11.4
-six==1.17.0
-sniffio==1.3.1
-sortedcontainers==2.4.0
-tiktoken==0.9.0
-tokenizers==0.21.1
-tqdm==4.67.1
-typing-extensions==4.13.0
-typing-inspection==0.4.0
-urllib3==2.3.0
-vcrpy==7.0.0
-wrapt==1.17.2
-yarl==1.18.3
-zipp==3.21.0
diff --git a/.riot/requirements/f30dfc2.txt b/.riot/requirements/f30dfc2.txt
deleted file mode 100644
index 0f57ac61e7a..00000000000
--- a/.riot/requirements/f30dfc2.txt
+++ /dev/null
@@ -1,79 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.9
-# by the following command:
-#
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/f30dfc2.in
-#
-aiohappyeyeballs==2.6.1
-aiohttp==3.11.14
-aiosignal==1.3.2
-annotated-types==0.7.0
-anyio==4.9.0
-async-timeout==5.0.1
-attrs==25.3.0
-boto3==1.37.22
-botocore==1.37.22
-cachetools==5.5.2
-certifi==2025.1.31
-charset-normalizer==3.4.1
-click==8.1.8
-coverage[toml]==7.7.1
-distro==1.9.0
-exceptiongroup==1.2.2
-filelock==3.18.0
-frozenlist==1.5.0
-fsspec==2025.3.0
-google-auth==2.38.0
-h11==0.14.0
-httpcore==1.0.7
-httpx==0.28.1
-huggingface-hub==0.29.3
-hypothesis==6.45.0
-idna==3.10
-importlib-metadata==8.6.1
-iniconfig==2.1.0
-jinja2==3.1.6
-jiter==0.9.0
-jmespath==1.0.1
-jsonschema==4.23.0
-jsonschema-specifications==2024.10.1
-litellm==1.64.1
-markupsafe==3.0.2
-mock==5.2.0
-multidict==6.2.0
-openai==1.69.0
-opentracing==2.4.0
-packaging==24.2
-pluggy==1.5.0
-propcache==0.3.1
-pyasn1==0.6.1
-pyasn1-modules==0.4.2
-pydantic==2.11.0
-pydantic-core==2.33.0
-pytest==8.3.5
-pytest-asyncio==0.26.0
-pytest-cov==6.0.0
-pytest-mock==3.14.0
-python-dateutil==2.9.0.post0
-python-dotenv==1.1.0
-pyyaml==6.0.2
-referencing==0.36.2
-regex==2024.11.6
-requests==2.32.3
-rpds-py==0.24.0
-rsa==4.9
-s3transfer==0.11.4
-six==1.17.0
-sniffio==1.3.1
-sortedcontainers==2.4.0
-tiktoken==0.9.0
-tokenizers==0.21.1
-tomli==2.2.1
-tqdm==4.67.1
-typing-extensions==4.13.0
-typing-inspection==0.4.0
-urllib3==1.26.20
-vcrpy==7.0.0
-wrapt==1.17.2
-yarl==1.18.3
-zipp==3.21.0
diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index c00d4c819f5..9aa7b77d89c 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -7,7 +7,6 @@
 from ddtrace.contrib.trace_utils import unwrap
 from ddtrace.contrib.trace_utils import with_traced_module
 from ddtrace.contrib.trace_utils import wrap
-from ddtrace.contrib.internal.litellm.utils import tag_request
 from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMStream
 from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMAsyncStream
 from ddtrace.llmobs._integrations import LiteLLMIntegration
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index 37fdcf01b55..9f813458e57 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -12,11 +12,6 @@
 log = get_logger(__name__)
 
 
-def tag_request(span, kwargs):
-    if "metadata" in kwargs and "headers" in kwargs["metadata"] and "host" in kwargs["metadata"]["headers"]:
-        span.set_tag_str("litellm.request.host", kwargs["metadata"]["headers"]["host"])
-
-
 class BaseTracedLiteLLMStream:
     def __init__(self, generator, integration, span, args, kwargs, is_completion=False):
         n = kwargs.get("n", 1) or 1
diff --git a/lib-injection/sources/min_compatible_versions.csv b/lib-injection/sources/min_compatible_versions.csv
index 7617732a2e2..97aa880036f 100644
--- a/lib-injection/sources/min_compatible_versions.csv
+++ b/lib-injection/sources/min_compatible_versions.csv
@@ -67,7 +67,6 @@ flask-caching,~=1.10.0
 flask-openapi3,0
 gevent,~=20.12.0
 google-ai-generativelanguage,0
-google-auth,0
 google-generativeai,0
 googleapis-common-protos,0
 graphene,~=3.0.0
diff --git a/min_compatible_versions.csv b/min_compatible_versions.csv
index 7617732a2e2..97aa880036f 100644
--- a/min_compatible_versions.csv
+++ b/min_compatible_versions.csv
@@ -67,7 +67,6 @@ flask-caching,~=1.10.0
 flask-openapi3,0
 gevent,~=20.12.0
 google-ai-generativelanguage,0
-google-auth,0
 google-generativeai,0
 googleapis-common-protos,0
 graphene,~=3.0.0
diff --git a/tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml b/tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml
deleted file mode 100644
index a719ac0af83..00000000000
--- a/tests/contrib/litellm/cassettes/completion_anthropic/claude-3-5-sonnet-20240620.yaml
+++ /dev/null
@@ -1,86 +0,0 @@
-interactions:
-- request:
-    body: '{"model": "claude-3-5-sonnet-20240620", "messages": [{"role": "user", "content":
-      [{"type": "text", "text": "Hey, what is up?"}]}], "max_tokens": 4096}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      anthropic-version:
-      - '2023-06-01'
-      connection:
-      - keep-alive
-      content-length:
-      - '150'
-      content-type:
-      - application/json
-      host:
-      - api.anthropic.com
-      user-agent:
-      - litellm/1.63.12
-    method: POST
-    uri: https://api.anthropic.com/v1/messages
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA2RQTW/UQAz9K8aXvcyibNoCygWVU5dyQagnhCJ3xiSjztpp7Gm7Wu1/RwlUAnGy
-        9L70nk+YE3Z4sKFvdrd37Xhf33+gT5+/yXT7ZUdPX9s7DOjHiRcVm9HAGHDWsgBkls1JHAMeNHHB
-        DmOhmnh7sb3amoqwb9umvWzetQ0GjCrO4th9P72GOr8s9vV0eMOl6Bu4NiCB6z0UkqHSwLDGB9hD
-        Utk4jPTEMPFsKlSAXyaeM0tkA53hJ3PJMliA++qw3xxg5JnBFUYuExy1wnP2EUiO8FjZPKusRtcp
-        R1sEmwQlP6yelC1Ws7dwo88QSWAPv3evQa6Jjh/x/COguU79zGQq2CFL6r3Ogn8I48e6NMROaikB
-        6/rK7oRZpuq96wOLYbe7CBgpjtzHmWlp1v8raF75mSn9z2n1v5HLq/P5FwAAAP//AwAsXlFX5AEA
-        AA==
-    headers:
-      CF-RAY:
-      - 923fd79c4a4a7cfc-EWR
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Fri, 21 Mar 2025 19:26:40 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Robots-Tag:
-      - none
-      anthropic-organization-id:
-      - 0280e0cf-573a-4392-b276-1b73319958fb
-      anthropic-ratelimit-input-tokens-limit:
-      - '20000'
-      anthropic-ratelimit-input-tokens-remaining:
-      - '20000'
-      anthropic-ratelimit-input-tokens-reset:
-      - '2025-03-21T19:26:40Z'
-      anthropic-ratelimit-output-tokens-limit:
-      - '4000'
-      anthropic-ratelimit-output-tokens-remaining:
-      - '4000'
-      anthropic-ratelimit-output-tokens-reset:
-      - '2025-03-21T19:26:40Z'
-      anthropic-ratelimit-requests-limit:
-      - '5'
-      anthropic-ratelimit-requests-remaining:
-      - '4'
-      anthropic-ratelimit-requests-reset:
-      - '2025-03-21T19:26:52Z'
-      anthropic-ratelimit-tokens-limit:
-      - '24000'
-      anthropic-ratelimit-tokens-remaining:
-      - '24000'
-      anthropic-ratelimit-tokens-reset:
-      - '2025-03-21T19:26:40Z'
-      cf-cache-status:
-      - DYNAMIC
-      request-id:
-      - req_01RRDNDcX3wjQFEMkLiTep47
-      via:
-      - 1.1 google
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml b/tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml
deleted file mode 100644
index 5e712cce0bb..00000000000
--- a/tests/contrib/litellm/cassettes/completion_vertex_ai/gemini-pro.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-interactions:
-- request:
-    body: assertion=eyJ0eXAiOiAiSldUIiwgImFsZyI6ICJSUzI1NiIsICJraWQiOiAiZjc0ODI1ZGMzZDE4ZWU2YTY5Y2I2YTE0NmQ5OGUxNTg4YTM5YWU3YyJ9.eyJpYXQiOiAxNzQyNTg1NTY0LCAiZXhwIjogMTc0MjU4OTE2NCwgImlzcyI6ICJsbG1vYnMtdGVzdEBkYXRhZG9nLXNhbmRib3guaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iLCAiYXVkIjogImh0dHBzOi8vb2F1dGgyLmdvb2dsZWFwaXMuY29tL3Rva2VuIiwgInNjb3BlIjogImh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29tL2F1dGgvY2xvdWQtcGxhdGZvcm0ifQ.ZivXu-4DxUTH_3JzIngRTswHdKmxiPR_yFl7T7o7C7FSrp4zf0cHa-fid8jMBiwzOz0ooBny11AezGE0w5b15NvhbrQq3HDYdoHXGooo9yBOnhez7v5EaP8iMfpkcp0EW8DUdSUrs2-y9rYT67rA6KxxWcdQLPFyk15ka-FC3f1BsdF_c0CdoPfKEG0mpBj5OHvmwjE3L5GP-2OLgx75B9loCFs3npkEa74YfCJ5OZXHUAPgONXC9VxiXf7__Secb-sDqZLKnGi2HSwaTZJ7TWkLyVufp71IMWpYaExI9Qw2IPPok3h-tCRJjljjJ1kfFy4N0AZCv1STT3p7w8jxyA&grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer
-    headers:
-      Accept:
-      - '*/*'
-      Accept-Encoding:
-      - gzip, deflate
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '801'
-      Content-Type:
-      - application/x-www-form-urlencoded
-      User-Agent:
-      - python-requests/2.32.3
-      x-goog-api-client:
-      - gl-python/3.11.10 auth/2.38.0 auth-request-type/at cred-type/sa
-    method: POST
-    uri: https://oauth2.googleapis.com/token
-  response:
-    body:
-      string: '{"access_token":"ya29.c.c0ASRK0GZU0bfQUP0GYf4fuNe4BhHzumcFny6u68QFuxIsG64_oIXfd5scBCRKnTn4AavchTw0iXipb8jUGWZnRC-3IuQPmizs1oQAAWKL90jbwCqpwefQVEMkoPi6Sp1qs0RJjm3gjX_KVPPZlleAEHtWA-lxhbPkP56KRmOHaPWIC6z019UAO4wAuylihJSAq0QheNEW42e2E9NA6MAaCfMgARAvvrNhoaJ2NvvFPYTc_B4Ii8J-fdweojRHAn115d6k4LV0hDqdMeuuDmycrZPjXb6_DTRCYbXrbEjuSlHAAYLFqXaq1q1Uv_rzTt5yaGN8fQig3SZ0b6kQ959wq_6MWRrIlip3UnO3kAjMl7HKp8cZUS2l7sLAN385CjUXmxoq36By_bvYQRmavYvqlIxei6d7mym5I6Fnon1xb69jl2c4Ykatuey3yF2I195zhf1Q_nStxax5ikZoa8gztfzUQO8uz7wXt5zawkgYvvg_y4fZsU4J2gjb44encv0oaFlvw-uXS4WI-_meXXVp-vgexUvZaS6yXO31lhyiVe60BzFsanMMQu4UyJycm2bo4pfdOe6fg3uXZS2fvv8-551p9iWcixYswr-o6h_0FrucJQ0yp-wQ791mYaMx7q7a90iSaJ1s4OqeB_Mw4kl5nscbI-xjb1YVMMYd07MfMwS5fg-0Y-7lv2tYqJbk31hq_npBnvgX-96iupo6y2ZiXebMgIgBRgZxoqvQtsgBM4Jn2Wntjebw7Sy5UIqZOaleqz1nZjeXX7bw2VQkqrgFaXF-RdaBoxJF46Us0hhkRSdIkQMMQmbOf692lo6Sjwvc2ZkziieWp1-am1uaezpamYnckZy0g8eQoZY_S6dgMpMtxcS9Jinf5c2qmMxvIwQdzx3hjt3lsedhre3mhfQvf7ypS-j5g1JVxhyxRaw01jF7tycWp4ecbuVccwwWati9M-qpQ254Mtx89v1Rt85Xh7U66lu12cJc0oktJ7nX766QlZSsx-qf1ri","expires_in":3599,"token_type":"Bearer"}'
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Fri, 21 Mar 2025 19:32:45 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - nosniff
-      X-Frame-Options:
-      - SAMEORIGIN
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
-- request:
-    body: '{"contents":[{"role":"user","parts":[{"text":"Hey, what is up?"}]}],"generationConfig":{"candidate_count":1}}'
-    headers:
-      accept:
-      - '*/*'
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '109'
-      content-type:
-      - application/json
-      host:
-      - us-central1-aiplatform.googleapis.com
-      user-agent:
-      - litellm/1.63.12
-    method: POST
-    uri: https://us-central1-aiplatform.googleapis.com/v1/projects/datadog-sandbox/locations/us-central1/publishers/google/models/gemini-pro:generateContent
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAC/7VVXW/aMBR951d4edkLIMf5IOGlYtSCTBQQSbvSqUKGuMFbiFHstEUV/312AjRM
-        6sNUlofo5n4c33MdH781ADBWJItZTCQVRhf8VB4A3sq3jvFM0kyqwNGlnDlPqfIYGx7T1Gi+B7Yk
-        l+8g1fNWs1WKpK8azRiyL2DMJdgUq3UT/CqEBGuSJSxLAC8kYBmQawpiljBJUvDC8zRughfCpM6Q
-        HCwpWNN0+1SkbTDkL4AsddmOF1fgx5rIrwIkvATLNJby5xWIqo3J7sqodbU/2Y8Ha3/kZDyxjIn1
-        jBLBM912GE2mJ8aGIE9U7mZEN3XOu85aDVjShOe7kndvdrPo9yI8mMzmi6EyFuEU4/6wNkc9yZwv
-        yZKlTJZVYzwYBYPg2wh/mBaueK53BbahBVHHsi3vLFXQZ5of4MomQnyHZ0E0X3wEfqyoISPo+y70
-        7VPavvmvlK974wGeTW7DRX8yjvA4uiBx3zNt27Euz9uGyLMQtD7BW331wvDmwoShpdv6Dxtt+h3k
-        Ou4nCIf4/rY3Gs0X+H46CvrBBXkj3/aR6V3+BzehCTs2Qu+0D9bj6cyT52TEE92WPvEtVeO5jqlO
-        hoNcz3N8r5qZLiyLjEKQhN5QSZTEkpOQamKbrYz4b5r1eVFKrFstUhPks7CFDnHJlSaehw6jqKOK
-        a7UmS+u6VNNwpd3kOPsI39c3R+HXmzpqYqM2h79bvNBaFjpfrHH476qb5o7mglUynNCNEuaW2YYt
-        xbgFISoxjVVOVU8R25T3E4LIaUGrhczI9LsW6tpO24KO7ZsPVXpOxZZnggaxTreWnfjBn4ZYuP2N
-        H0xzu/Pd6wVGY9/4A9Z/lKYoBwAA
-    headers:
-      Alt-Svc:
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json; charset=UTF-8
-      Date:
-      - Fri, 21 Mar 2025 19:32:46 GMT
-      Server:
-      - scaffolding on HTTPServer2
-      Transfer-Encoding:
-      - chunked
-      Vary:
-      - Origin
-      - X-Origin
-      - Referer
-      X-Content-Type-Options:
-      - nosniff
-      X-Frame-Options:
-      - SAMEORIGIN
-      X-XSS-Protection:
-      - '0'
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 0e08d5af1ca..143842b701a 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -181,8 +181,6 @@ async def test_atext_completion(
         )
 
     def test_completion_integrations_enabled(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
-        if stream:
-            pytest.skip("Streamed Open AI requests will lead to unfinished spans; therefore, skip them for now")
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
             LLMObs.disable()
 
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json
deleted file mode 100644
index 214afe91718..00000000000
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-1].json
+++ /dev/null
@@ -1,27 +0,0 @@
-[[
-  {
-    "name": "litellm.request",
-    "service": "tests.contrib.litellm",
-    "resource": "litellm.acompletion",
-    "trace_id": 0,
-    "span_id": 1,
-    "parent_id": 0,
-    "type": "",
-    "error": 0,
-    "meta": {
-      "_dd.p.dm": "-0",
-      "_dd.p.tid": "67ddaa2400000000",
-      "language": "python",
-      "litellm.request.model": "gpt-3.5-turbo",
-      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
-    },
-    "metrics": {
-      "_dd.measured": 1,
-      "_dd.top_level": 1,
-      "_dd.tracer_kr": 1.0,
-      "_sampling_priority_v1": 1,
-      "process_id": 44834
-    },
-    "duration": 11473000,
-    "start": 1742580260689536000
-  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json
deleted file mode 100644
index c0e47e63442..00000000000
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[False-2].json
+++ /dev/null
@@ -1,27 +0,0 @@
-[[
-  {
-    "name": "litellm.request",
-    "service": "tests.contrib.litellm",
-    "resource": "litellm.acompletion",
-    "trace_id": 0,
-    "span_id": 1,
-    "parent_id": 0,
-    "type": "",
-    "error": 0,
-    "meta": {
-      "_dd.p.dm": "-0",
-      "_dd.p.tid": "67ddaa2400000000",
-      "language": "python",
-      "litellm.request.model": "gpt-3.5-turbo",
-      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
-    },
-    "metrics": {
-      "_dd.measured": 1,
-      "_dd.top_level": 1,
-      "_dd.tracer_kr": 1.0,
-      "_sampling_priority_v1": 1,
-      "process_id": 44834
-    },
-    "duration": 11410000,
-    "start": 1742580260717377000
-  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json
deleted file mode 100644
index 2edd0a58339..00000000000
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_acompletion[True-2].json
+++ /dev/null
@@ -1,27 +0,0 @@
-[[
-  {
-    "name": "litellm.request",
-    "service": "tests.contrib.litellm",
-    "resource": "litellm.acompletion",
-    "trace_id": 0,
-    "span_id": 1,
-    "parent_id": 0,
-    "type": "",
-    "error": 0,
-    "meta": {
-      "_dd.p.dm": "-0",
-      "_dd.p.tid": "67ddaa2400000000",
-      "language": "python",
-      "litellm.request.model": "gpt-3.5-turbo",
-      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
-    },
-    "metrics": {
-      "_dd.measured": 1,
-      "_dd.top_level": 1,
-      "_dd.tracer_kr": 1.0,
-      "_sampling_priority_v1": 1,
-      "process_id": 44834
-    },
-    "duration": 10910000,
-    "start": 1742580260660336000
-  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json
deleted file mode 100644
index 4823265c91e..00000000000
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-1].json
+++ /dev/null
@@ -1,27 +0,0 @@
-[[
-  {
-    "name": "litellm.request",
-    "service": "tests.contrib.litellm",
-    "resource": "litellm.atext_completion",
-    "trace_id": 0,
-    "span_id": 1,
-    "parent_id": 0,
-    "type": "",
-    "error": 0,
-    "meta": {
-      "_dd.p.dm": "-0",
-      "_dd.p.tid": "67ddaa2400000000",
-      "language": "python",
-      "litellm.request.model": "gpt-3.5-turbo",
-      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
-    },
-    "metrics": {
-      "_dd.measured": 1,
-      "_dd.top_level": 1,
-      "_dd.tracer_kr": 1.0,
-      "_sampling_priority_v1": 1,
-      "process_id": 44834
-    },
-    "duration": 11645000,
-    "start": 1742580260892670000
-  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json
deleted file mode 100644
index 82aa9e0797b..00000000000
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[False-2].json
+++ /dev/null
@@ -1,27 +0,0 @@
-[[
-  {
-    "name": "litellm.request",
-    "service": "tests.contrib.litellm",
-    "resource": "litellm.atext_completion",
-    "trace_id": 0,
-    "span_id": 1,
-    "parent_id": 0,
-    "type": "",
-    "error": 0,
-    "meta": {
-      "_dd.p.dm": "-0",
-      "_dd.p.tid": "67ddaa2400000000",
-      "language": "python",
-      "litellm.request.model": "gpt-3.5-turbo",
-      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
-    },
-    "metrics": {
-      "_dd.measured": 1,
-      "_dd.top_level": 1,
-      "_dd.tracer_kr": 1.0,
-      "_sampling_priority_v1": 1,
-      "process_id": 44834
-    },
-    "duration": 11638000,
-    "start": 1742580260921802000
-  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json
deleted file mode 100644
index 019e6861dd0..00000000000
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_atext_completion[True-2].json
+++ /dev/null
@@ -1,27 +0,0 @@
-[[
-  {
-    "name": "litellm.request",
-    "service": "tests.contrib.litellm",
-    "resource": "litellm.atext_completion",
-    "trace_id": 0,
-    "span_id": 1,
-    "parent_id": 0,
-    "type": "",
-    "error": 0,
-    "meta": {
-      "_dd.p.dm": "-0",
-      "_dd.p.tid": "67ddaa2400000000",
-      "language": "python",
-      "litellm.request.model": "gpt-3.5-turbo",
-      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
-    },
-    "metrics": {
-      "_dd.measured": 1,
-      "_dd.top_level": 1,
-      "_dd.tracer_kr": 1.0,
-      "_sampling_priority_v1": 1,
-      "process_id": 44834
-    },
-    "duration": 10966000,
-    "start": 1742580260866220000
-  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json
deleted file mode 100644
index 70b4b725406..00000000000
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-1].json
+++ /dev/null
@@ -1,27 +0,0 @@
-[[
-  {
-    "name": "litellm.request",
-    "service": "tests.contrib.litellm",
-    "resource": "litellm.completion",
-    "trace_id": 0,
-    "span_id": 1,
-    "parent_id": 0,
-    "type": "",
-    "error": 0,
-    "meta": {
-      "_dd.p.dm": "-0",
-      "_dd.p.tid": "67ddaa2400000000",
-      "language": "python",
-      "litellm.request.model": "gpt-3.5-turbo",
-      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
-    },
-    "metrics": {
-      "_dd.measured": 1,
-      "_dd.top_level": 1,
-      "_dd.tracer_kr": 1.0,
-      "_sampling_priority_v1": 1,
-      "process_id": 44834
-    },
-    "duration": 64700000,
-    "start": 1742580260523466000
-  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json
deleted file mode 100644
index 70100a1b6fe..00000000000
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[False-2].json
+++ /dev/null
@@ -1,27 +0,0 @@
-[[
-  {
-    "name": "litellm.request",
-    "service": "tests.contrib.litellm",
-    "resource": "litellm.completion",
-    "trace_id": 0,
-    "span_id": 1,
-    "parent_id": 0,
-    "type": "",
-    "error": 0,
-    "meta": {
-      "_dd.p.dm": "-0",
-      "_dd.p.tid": "67ddaa2400000000",
-      "language": "python",
-      "litellm.request.model": "gpt-3.5-turbo",
-      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
-    },
-    "metrics": {
-      "_dd.measured": 1,
-      "_dd.top_level": 1,
-      "_dd.tracer_kr": 1.0,
-      "_sampling_priority_v1": 1,
-      "process_id": 44834
-    },
-    "duration": 6278000,
-    "start": 1742580260607014000
-  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json
deleted file mode 100644
index 9f99831ec22..00000000000
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion[True-2].json
+++ /dev/null
@@ -1,27 +0,0 @@
-[[
-  {
-    "name": "litellm.request",
-    "service": "tests.contrib.litellm",
-    "resource": "litellm.completion",
-    "trace_id": 0,
-    "span_id": 1,
-    "parent_id": 0,
-    "type": "",
-    "error": 0,
-    "meta": {
-      "_dd.p.dm": "-0",
-      "_dd.p.tid": "67ddaa2400000000",
-      "language": "python",
-      "litellm.request.model": "gpt-3.5-turbo",
-      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
-    },
-    "metrics": {
-      "_dd.measured": 1,
-      "_dd.top_level": 1,
-      "_dd.tracer_kr": 1.0,
-      "_sampling_priority_v1": 1,
-      "process_id": 44834
-    },
-    "duration": 6885000,
-    "start": 1742580260495830000
-  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json
deleted file mode 100644
index 3f4326ea8f5..00000000000
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models.json
+++ /dev/null
@@ -1,27 +0,0 @@
-[[
-  {
-    "name": "litellm.request",
-    "service": "tests.contrib.litellm",
-    "resource": "litellm.completion",
-    "trace_id": 0,
-    "span_id": 1,
-    "parent_id": 0,
-    "type": "",
-    "error": 0,
-    "meta": {
-      "_dd.p.dm": "-0",
-      "_dd.p.tid": "67ddbedc00000000",
-      "language": "python",
-      "litellm.request.model": "vertex_ai/gemini-pro",
-      "runtime-id": "0b4c7b69fe1b4d69a2cf9a6cb3449ea9"
-    },
-    "metrics": {
-      "_dd.measured": 1,
-      "_dd.top_level": 1,
-      "_dd.tracer_kr": 1.0,
-      "_sampling_priority_v1": 1,
-      "process_id": 4385
-    },
-    "duration": 1211993000,
-    "start": 1742585564858520000
-  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json
deleted file mode 100644
index 852a1cdfd4a..00000000000
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_completion_different_models[anthropic_claude-3-5-sonnet-20240620].json
+++ /dev/null
@@ -1,27 +0,0 @@
-[[
-  {
-    "name": "litellm.request",
-    "service": "tests.contrib.litellm",
-    "resource": "litellm.completion",
-    "trace_id": 0,
-    "span_id": 1,
-    "parent_id": 0,
-    "type": "",
-    "error": 0,
-    "meta": {
-      "_dd.p.dm": "-0",
-      "_dd.p.tid": "67ddbede00000000",
-      "language": "python",
-      "litellm.request.model": "anthropic/claude-3-5-sonnet-20240620",
-      "runtime-id": "0b4c7b69fe1b4d69a2cf9a6cb3449ea9"
-    },
-    "metrics": {
-      "_dd.measured": 1,
-      "_dd.top_level": 1,
-      "_dd.tracer_kr": 1.0,
-      "_sampling_priority_v1": 1,
-      "process_id": 4385
-    },
-    "duration": 11249000,
-    "start": 1742585566097702000
-  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json
deleted file mode 100644
index 3639ac22839..00000000000
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-1].json
+++ /dev/null
@@ -1,27 +0,0 @@
-[[
-  {
-    "name": "litellm.request",
-    "service": "tests.contrib.litellm",
-    "resource": "litellm.text_completion",
-    "trace_id": 0,
-    "span_id": 1,
-    "parent_id": 0,
-    "type": "",
-    "error": 0,
-    "meta": {
-      "_dd.p.dm": "-0",
-      "_dd.p.tid": "67ddaa2400000000",
-      "language": "python",
-      "litellm.request.model": "gpt-3.5-turbo",
-      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
-    },
-    "metrics": {
-      "_dd.measured": 1,
-      "_dd.top_level": 1,
-      "_dd.tracer_kr": 1.0,
-      "_sampling_priority_v1": 1,
-      "process_id": 44834
-    },
-    "duration": 7357000,
-    "start": 1742580260791261000
-  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json
deleted file mode 100644
index cfa5ca52417..00000000000
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[False-2].json
+++ /dev/null
@@ -1,27 +0,0 @@
-[[
-  {
-    "name": "litellm.request",
-    "service": "tests.contrib.litellm",
-    "resource": "litellm.text_completion",
-    "trace_id": 0,
-    "span_id": 1,
-    "parent_id": 0,
-    "type": "",
-    "error": 0,
-    "meta": {
-      "_dd.p.dm": "-0",
-      "_dd.p.tid": "67ddaa2400000000",
-      "language": "python",
-      "litellm.request.model": "gpt-3.5-turbo",
-      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
-    },
-    "metrics": {
-      "_dd.measured": 1,
-      "_dd.top_level": 1,
-      "_dd.tracer_kr": 1.0,
-      "_sampling_priority_v1": 1,
-      "process_id": 44834
-    },
-    "duration": 6527000,
-    "start": 1742580260815275000
-  }]]
diff --git a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json b/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json
deleted file mode 100644
index e68a5d04c10..00000000000
--- a/tests/snapshots/tests.contrib.litellm.test_litellm.test_litellm_text_completion[True-2].json
+++ /dev/null
@@ -1,27 +0,0 @@
-[[
-  {
-    "name": "litellm.request",
-    "service": "tests.contrib.litellm",
-    "resource": "litellm.text_completion",
-    "trace_id": 0,
-    "span_id": 1,
-    "parent_id": 0,
-    "type": "",
-    "error": 0,
-    "meta": {
-      "_dd.p.dm": "-0",
-      "_dd.p.tid": "67ddaa2400000000",
-      "language": "python",
-      "litellm.request.model": "gpt-3.5-turbo",
-      "runtime-id": "08aa25bdd7e14886aec0589c876d2f70"
-    },
-    "metrics": {
-      "_dd.measured": 1,
-      "_dd.top_level": 1,
-      "_dd.tracer_kr": 1.0,
-      "_sampling_priority_v1": 1,
-      "process_id": 44834
-    },
-    "duration": 5879000,
-    "start": 1742580260771604000
-  }]]

From d4185d8cab2d16ff7d9ed8341f026f9259f058ff Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 8 Apr 2025 11:35:36 -0400
Subject: [PATCH 34/61] fix merge conflict bug for openai span linking

---
 ddtrace/llmobs/_integrations/openai.py | 96 --------------------------
 ddtrace/llmobs/_integrations/utils.py  | 30 ++++++--
 2 files changed, 26 insertions(+), 100 deletions(-)

diff --git a/ddtrace/llmobs/_integrations/openai.py b/ddtrace/llmobs/_integrations/openai.py
index aa07ff0aff6..c913282a2e5 100644
--- a/ddtrace/llmobs/_integrations/openai.py
+++ b/ddtrace/llmobs/_integrations/openai.py
@@ -142,102 +142,6 @@ def _llmobs_set_tags(
             {SPAN_KIND: span_kind, MODEL_NAME: model_name or "", MODEL_PROVIDER: model_provider, METRICS: metrics}
         )
 
-    @staticmethod
-    def _llmobs_set_meta_tags_from_completion(span: Span, kwargs: Dict[str, Any], completions: Any) -> None:
-        """Extract prompt/response tags from a completion and set them as temporary "_ml_obs.meta.*" tags."""
-        prompt = kwargs.get("prompt", "")
-        if isinstance(prompt, str):
-            prompt = [prompt]
-        parameters = {k: v for k, v in kwargs.items() if k not in ("model", "prompt")}
-        output_messages = [{"content": ""}]
-        if not span.error and completions:
-            choices = getattr(completions, "choices", completions)
-            output_messages = [{"content": _get_attr(choice, "text", "")} for choice in choices]
-        span._set_ctx_items(
-            {
-                INPUT_MESSAGES: [{"content": str(p)} for p in prompt],
-                METADATA: parameters,
-                OUTPUT_MESSAGES: output_messages,
-            }
-        )
-
-    @staticmethod
-    def _llmobs_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: Optional[Any]) -> None:
-        """Extract prompt/response tags from a chat completion and set them as temporary "_ml_obs.meta.*" tags."""
-        input_messages = []
-        for m in kwargs.get("messages", []):
-            tool_call_id = m.get("tool_call_id")
-            if tool_call_id:
-                core.dispatch(DISPATCH_ON_TOOL_CALL_OUTPUT_USED, (tool_call_id, span))
-            input_messages.append({"content": str(_get_attr(m, "content", "")), "role": str(_get_attr(m, "role", ""))})
-        parameters = {k: v for k, v in kwargs.items() if k not in ("model", "messages", "tools", "functions")}
-        span._set_ctx_items({INPUT_MESSAGES: input_messages, METADATA: parameters})
-
-        if span.error or not messages:
-            span._set_ctx_item(OUTPUT_MESSAGES, [{"content": ""}])
-            return
-        if isinstance(messages, list):  # streamed response
-            output_messages = []
-            for streamed_message in messages:
-                message = {"content": streamed_message["content"], "role": streamed_message["role"]}
-                tool_calls = streamed_message.get("tool_calls", [])
-                if tool_calls:
-                    message["tool_calls"] = [
-                        {
-                            "name": tool_call.get("name", ""),
-                            "arguments": json.loads(tool_call.get("arguments", "")),
-                            "tool_id": tool_call.get("tool_id", ""),
-                            "type": tool_call.get("type", ""),
-                        }
-                        for tool_call in tool_calls
-                    ]
-                output_messages.append(message)
-            span._set_ctx_item(OUTPUT_MESSAGES, output_messages)
-            return
-        choices = _get_attr(messages, "choices", [])
-        output_messages = []
-        for idx, choice in enumerate(choices):
-            tool_calls_info = []
-            choice_message = _get_attr(choice, "message", {})
-            role = _get_attr(choice_message, "role", "")
-            content = _get_attr(choice_message, "content", "") or ""
-            function_call = _get_attr(choice_message, "function_call", None)
-            if function_call:
-                function_name = _get_attr(function_call, "name", "")
-                arguments = json.loads(_get_attr(function_call, "arguments", ""))
-                function_call_info = {"name": function_name, "arguments": arguments}
-                output_messages.append({"content": content, "role": role, "tool_calls": [function_call_info]})
-                continue
-            tool_calls = _get_attr(choice_message, "tool_calls", []) or []
-            for tool_call in tool_calls:
-                tool_args = getattr(tool_call.function, "arguments", "")
-                tool_name = getattr(tool_call.function, "name", "")
-                tool_id = getattr(tool_call, "id", "")
-                tool_call_info = {
-                    "name": tool_name,
-                    "arguments": json.loads(tool_args),
-                    "tool_id": tool_id,
-                    "type": "function",
-                }
-                tool_calls_info.append(tool_call_info)
-                core.dispatch(
-                    DISPATCH_ON_LLM_TOOL_CHOICE,
-                    (
-                        tool_id,
-                        tool_name,
-                        tool_args,
-                        {
-                            "trace_id": format_trace_id(span.trace_id),
-                            "span_id": str(span.span_id),
-                        },
-                    ),
-                )
-            if tool_calls_info:
-                output_messages.append({"content": content, "role": role, "tool_calls": tool_calls_info})
-                continue
-            output_messages.append({"content": content, "role": role})
-        span._set_ctx_item(OUTPUT_MESSAGES, output_messages)
-
     @staticmethod
     def _llmobs_set_meta_tags_from_embedding(span: Span, kwargs: Dict[str, Any], resp: Any) -> None:
         """Extract prompt tags from an embedding and set them as temporary "_ml_obs.meta.*" tags."""
diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py
index 0f1bbb94239..06367d2e677 100644
--- a/ddtrace/llmobs/_integrations/utils.py
+++ b/ddtrace/llmobs/_integrations/utils.py
@@ -10,6 +10,10 @@
 from typing import Tuple
 from typing import Union
 from urllib.parse import urlparse
+from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE
+from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED
+from ddtrace.internal import core
+from ddtrace.internal.utils.formats import format_trace_id
 
 from ddtrace.internal.logger import get_logger
 from ddtrace.llmobs._constants import OAI_HANDOFF_TOOL_ARG
@@ -310,6 +314,9 @@ def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages:
     """Extract prompt/response tags from a chat completion and set them as temporary "_ml_obs.meta.*" tags."""
     input_messages = []
     for m in kwargs.get("messages", []):
+        tool_call_id = m.get("tool_call_id")
+        if tool_call_id:
+            core.dispatch(DISPATCH_ON_TOOL_CALL_OUTPUT_USED, (tool_call_id, span))
         input_messages.append({"content": str(_get_attr(m, "content", "")), "role": str(_get_attr(m, "role", ""))})
     parameters = {
         k: v
@@ -358,13 +365,28 @@ def openai_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages:
             continue
         tool_calls = _get_attr(choice_message, "tool_calls", []) or []
         for tool_call in tool_calls:
+            tool_args = getattr(tool_call.function, "arguments", "")
+            tool_name = getattr(tool_call.function, "name", "")
+            tool_id = getattr(tool_call, "id", "")
             tool_call_info = {
-                "name": getattr(tool_call.function, "name", ""),
-                "arguments": json.loads(getattr(tool_call.function, "arguments", "")),
-                "tool_id": getattr(tool_call, "id", ""),
-                "type": getattr(tool_call, "type", ""),
+                "name": tool_name,
+                "arguments": json.loads(tool_args),
+                "tool_id": tool_id,
+                "type": "function",
             }
             tool_calls_info.append(tool_call_info)
+            core.dispatch(
+                DISPATCH_ON_LLM_TOOL_CHOICE,
+                (
+                    tool_id,
+                    tool_name,
+                    tool_args,
+                    {
+                        "trace_id": format_trace_id(span.trace_id),
+                        "span_id": str(span.span_id),
+                    },
+                ),
+            )
         if tool_calls_info:
             output_messages.append({"content": content, "role": role, "tool_calls": tool_calls_info})
             continue

From 48e57c3138cb6aa943bf5564ddc1b2f0c879a4c0 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 8 Apr 2025 12:56:24 -0400
Subject: [PATCH 35/61] fix integrations enabled test

---
 tests/contrib/litellm/test_litellm_llmobs.py | 49 +++++++++++++-------
 1 file changed, 32 insertions(+), 17 deletions(-)

diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 143842b701a..c287caaa32b 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -201,35 +201,50 @@ def test_completion_integrations_enabled(self, litellm, request_vcr, mock_llmobs
                 n=n,
                 stream_options={"include_usage": include_usage},
             )
-            LLMObs.disable()
             if stream:
                 output_messages, token_metrics = consume_stream(resp, n)
             else:
                 output_messages, token_metrics = parse_response(resp)
+            
+            LLMObs.disable()
 
-        openai_span = mock_tracer.pop_traces()[0][1]
-        # remove parent span since LiteLLM request span will not be submitted to LLMObs
-        openai_span._parent = None
+        spans = mock_tracer.pop_traces()
+        # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request
+        if stream:
+            span = spans[0][0]
+            metadata = {
+                "stream": stream,
+                "n": n,
+                "stream_options": {
+                    "include_usage": include_usage
+                }
+            }
+            model_name = "gpt-3.5-turbo"
+        else:
+            span = spans[0][1]
+            # remove parent span since LiteLLM request span will not be submitted to LLMObs
+            span._parent = None
+            metadata = {
+                "n": n,
+                "extra_body": {},
+                "timeout": 600.0,
+                "extra_headers": {
+                    "X-Stainless-Raw-Response": "true"
+                }
+            }
+            model_name = "gpt-3.5-turbo-0125"
         assert mock_llmobs_writer.enqueue.call_count == 1
-        mock_llmobs_writer.enqueue.assert_called_with(
-            _expected_llmobs_llm_span_event(
-                openai_span,
-                model_name="gpt-3.5-turbo-0125",
+        expected_event = _expected_llmobs_llm_span_event(
+                span,
+                model_name=model_name,
                 model_provider="openai",
                 input_messages=messages,
                 output_messages=output_messages,
-                metadata={
-                    "n": n,
-                    "extra_body": {},
-                    "timeout": 600.0,
-                    "extra_headers": {
-                        "X-Stainless-Raw-Response": "true"
-                    }
-                },
+                metadata=metadata,
                 token_metrics=token_metrics,
                 tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
             )
-        )
+        mock_llmobs_writer.enqueue.assert_called_with(expected_event)
     
     def test_completion_proxy(self, litellm, request_vcr_include_localhost, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
         with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, include_usage, proxy=True)):

From 252f0927cfb592bf01578239510fbd18cf50f96b Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 8 Apr 2025 13:34:12 -0400
Subject: [PATCH 36/61] add next and anext methods to traced stream classes

---
 ddtrace/contrib/internal/litellm/patch.py |  2 +-
 ddtrace/contrib/internal/litellm/utils.py | 33 +++++++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 9aa7b77d89c..823e8f12d5b 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -119,7 +119,7 @@ def traced_get_llm_provider(litellm, pin, func, instance, args, kwargs):
     requested_model = get_argument_value(args, kwargs, 0, "model", None)
     integration = litellm._datadog_integration
     model, custom_llm_provider, dynamic_api_key, api_base = func(*args, **kwargs)
-    # Store the provider information in the integration
+    # store the model name and provider in the integration
     integration._model_map[requested_model] = (model, custom_llm_provider)
     return model, custom_llm_provider, dynamic_api_key, api_base
 
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index 9f813458e57..f4d234b8ae9 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -49,6 +49,23 @@ def __iter__(self):
                 )
             self._dd_span.finish()
 
+    def __next__(self):
+        try:
+            chunk = self._generator.__next__()
+            _loop_handler(chunk, self._streamed_chunks)
+            return chunk
+        except StopIteration:
+            _process_finished_stream(
+                self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
+            )
+            self._dd_span.finish()
+            raise
+        except Exception:
+            self._dd_span.set_exc_info(*sys.exc_info())
+            self._dd_span.finish()
+            raise
+    
+
 
 class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream):
     async def __aenter__(self):
@@ -74,6 +91,22 @@ async def __aiter__(self):
                     self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
                 )
             self._dd_span.finish()
+    
+    async def __anext__(self):
+        try:
+            chunk = await self._generator.__anext__()
+            _loop_handler(chunk, self._streamed_chunks)
+            return chunk
+        except StopAsyncIteration:
+            _process_finished_stream(
+                self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
+            )
+            self._dd_span.finish()
+            raise
+        except Exception:
+            self._dd_span.set_exc_info(*sys.exc_info())
+            self._dd_span.finish()
+            raise
 
 
 def _loop_handler(chunk, streamed_chunks):

From 07685a83df07c05c7b3dc34f70d8d6a6e0a97718 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 8 Apr 2025 13:55:22 -0400
Subject: [PATCH 37/61] remove unnecessary config stuff

---
 ddtrace/llmobs/_integrations/litellm.py      |  3 ++-
 tests/contrib/litellm/conftest.py            | 25 +++++++-------------
 tests/contrib/litellm/test_litellm_llmobs.py |  4 ----
 3 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index 5f0a1c93f80..c32a4aa937b 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -44,9 +44,10 @@ def _llmobs_set_tags(
         operation: str = "",
     ) -> None:
         model_name = span.get_tag("litellm.request.model")
+        # get resolved model name and provider
         model_name, model_provider = self._model_map.get(model_name, (model_name, ""))
 
-        # response format will match Open AI
+        # use Open AI helpers since response format will match Open AI
         if operation == "completion":
             openai_set_meta_tags_from_completion(span, kwargs, response)
         else:
diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
index 8b8e615d93d..205d0a65bd8 100644
--- a/tests/contrib/litellm/conftest.py
+++ b/tests/contrib/litellm/conftest.py
@@ -6,7 +6,6 @@
 from ddtrace.contrib.internal.litellm.patch import unpatch
 from tests.utils import DummyTracer
 from tests.utils import DummyWriter
-from tests.utils import override_config
 from tests.utils import override_global_config
 from tests.contrib.litellm.utils import get_request_vcr
 from ddtrace.llmobs import LLMObs
@@ -21,11 +20,6 @@ def ddtrace_global_config():
     return {}
 
 
-@pytest.fixture
-def ddtrace_config_litellm():
-    return {}
-
-
 @pytest.fixture()
 def mock_llmobs_writer():
     patcher = mock.patch("ddtrace.llmobs._llmobs.LLMObsSpanWriter")
@@ -39,19 +33,18 @@ def mock_llmobs_writer():
 
 
 @pytest.fixture
-def litellm(ddtrace_global_config, ddtrace_config_litellm, monkeypatch):
+def litellm(ddtrace_global_config, monkeypatch):
     global_config = default_global_config()
     global_config.update(ddtrace_global_config)
     with override_global_config(global_config):
-        with override_config("litellm", ddtrace_config_litellm):
-            monkeypatch.setenv("OPENAI_API_KEY", "<not-a-real-key>")
-            monkeypatch.setenv("ANTHROPIC_API_KEY", "<not-a-real-key>")
-            monkeypatch.setenv("COHERE_API_KEY", "<not-a-real-key>")
-            patch()
-            import litellm
-
-            yield litellm
-            unpatch()
+        monkeypatch.setenv("OPENAI_API_KEY", "<not-a-real-key>")
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "<not-a-real-key>")
+        monkeypatch.setenv("COHERE_API_KEY", "<not-a-real-key>")
+        patch()
+        import litellm
+
+        yield litellm
+        unpatch()
 
 
 @pytest.fixture
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index c287caaa32b..8e39f5f3dba 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -262,7 +262,3 @@ def test_completion_proxy(self, litellm, request_vcr_include_localhost, mock_llm
 
         # client side requests made to the proxy are not submitted to LLMObs
         assert mock_llmobs_writer.enqueue.call_count == 0
-
-    
-
- 
\ No newline at end of file

From c273b52525d56fdeb114a764c560518e268c9fe1 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 8 Apr 2025 13:59:10 -0400
Subject: [PATCH 38/61] add release note

---
 releasenotes/notes/feat-llmobs-litellm-7fc73fd28ab88fac.yaml | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 releasenotes/notes/feat-llmobs-litellm-7fc73fd28ab88fac.yaml

diff --git a/releasenotes/notes/feat-llmobs-litellm-7fc73fd28ab88fac.yaml b/releasenotes/notes/feat-llmobs-litellm-7fc73fd28ab88fac.yaml
new file mode 100644
index 00000000000..5c88002a4a4
--- /dev/null
+++ b/releasenotes/notes/feat-llmobs-litellm-7fc73fd28ab88fac.yaml
@@ -0,0 +1,4 @@
+---
+features:
+  - |
+    LLM Observability: Adds support to automatically submit LiteLLM SDK requests to LLM Observability.
\ No newline at end of file

From 5ee0f038a5c393735690267c222486d34530a3b4 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 8 Apr 2025 15:29:07 -0400
Subject: [PATCH 39/61] fix errored out streamed requests not setting llmobs
 tags properly

---
 ddtrace/contrib/internal/litellm/patch.py |  4 +--
 ddtrace/contrib/internal/litellm/utils.py | 38 ++++++++++-------------
 2 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 823e8f12d5b..e5c2ce52296 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -73,7 +73,7 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion
         raise
     finally:
         # streamed spans will be finished separately once the stream generator is exhausted
-        if span.error or not stream:
+        if not stream:
             if integration.is_pc_sampled_llmobs(span):
                 integration.llmobs_set_tags(
                     span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat"
@@ -106,7 +106,7 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com
         raise
     finally:
         # streamed spans will be finished separately once the stream generator is exhausted
-        if span.error or not stream:
+        if not stream:
             if integration.is_pc_sampled_llmobs(span):
                 integration.llmobs_set_tags(
                     span, args=args, kwargs=kwargs, response=resp, operation="completion" if is_completion else "chat"
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index f4d234b8ae9..e33e3bf5726 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -33,20 +33,17 @@ def __exit__(self, exc_type, exc_val, exc_tb):
         self._generator.__exit__(exc_type, exc_val, exc_tb)
 
     def __iter__(self):
-        exception_raised = False
         try:
             for chunk in self._generator:
                 yield chunk
                 _loop_handler(chunk, self._streamed_chunks)
         except Exception:
             self._dd_span.set_exc_info(*sys.exc_info())
-            exception_raised = True
             raise
         finally:
-            if not exception_raised:
-                _process_finished_stream(
-                    self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
-                )
+            _process_finished_stream(
+                self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
+            )
             self._dd_span.finish()
 
     def __next__(self):
@@ -55,15 +52,15 @@ def __next__(self):
             _loop_handler(chunk, self._streamed_chunks)
             return chunk
         except StopIteration:
-            _process_finished_stream(
-                self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
-            )
-            self._dd_span.finish()
             raise
         except Exception:
             self._dd_span.set_exc_info(*sys.exc_info())
-            self._dd_span.finish()
             raise
+        finally:
+            _process_finished_stream(
+                self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
+            )
+            self._dd_span.finish()
     
 
 
@@ -76,20 +73,17 @@ async def __aexit__(self, exc_type, exc_val, exc_tb):
         await self._generator.__aexit__(exc_type, exc_val, exc_tb)
 
     async def __aiter__(self):
-        exception_raised = False
         try:
             async for chunk in self._generator:
                 yield chunk
                 _loop_handler(chunk, self._streamed_chunks)
         except Exception:
             self._dd_span.set_exc_info(*sys.exc_info())
-            exception_raised = True
             raise
         finally:
-            if not exception_raised:
-                _process_finished_stream(
-                    self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
-                )
+            _process_finished_stream(
+                self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
+            )
             self._dd_span.finish()
     
     async def __anext__(self):
@@ -98,15 +92,15 @@ async def __anext__(self):
             _loop_handler(chunk, self._streamed_chunks)
             return chunk
         except StopAsyncIteration:
-            _process_finished_stream(
-                self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
-            )
-            self._dd_span.finish()
             raise
         except Exception:
             self._dd_span.set_exc_info(*sys.exc_info())
-            self._dd_span.finish()
             raise
+        finally:
+            _process_finished_stream(
+                self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
+            )
+            self._dd_span.finish()
 
 
 def _loop_handler(chunk, streamed_chunks):

From a5cdfa5e1bf38f9e09c459d8897759b7cccd06ca Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 8 Apr 2025 15:43:16 -0400
Subject: [PATCH 40/61] run black

---
 ddtrace/contrib/internal/litellm/utils.py    |  3 +-
 ddtrace/llmobs/_integrations/litellm.py      |  6 +-
 ddtrace/llmobs/_integrations/utils.py        |  2 +
 tests/contrib/litellm/conftest.py            |  1 +
 tests/contrib/litellm/test_litellm.py        |  4 +-
 tests/contrib/litellm/test_litellm_llmobs.py | 69 ++++++++++++--------
 tests/contrib/litellm/utils.py               | 36 ++++++----
 7 files changed, 77 insertions(+), 44 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index e33e3bf5726..f9c4463190d 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -61,7 +61,6 @@ def __next__(self):
                 self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
             )
             self._dd_span.finish()
-    
 
 
 class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream):
@@ -85,7 +84,7 @@ async def __aiter__(self):
                 self._dd_integration, self._dd_span, self._kwargs, self._streamed_chunks, self._is_completion
             )
             self._dd_span.finish()
-    
+
     async def __anext__(self):
         try:
             chunk = await self._generator.__anext__()
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index c32a4aa937b..0eea298f87d 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -85,6 +85,10 @@ def should_submit_to_llmobs(self, model: Optional[str] = None, kwargs: Dict[str,
         stream = kwargs.get("stream", False)
         model_lower = model.lower() if model else ""
         # model provider is unknown until request completes; therefore, this is a best effort attempt to check if model provider is Open AI or Azure
-        if ("gpt" in model_lower or "openai" in model_lower or "azure" in model_lower) and not stream and "openai" in ddtrace._monkey._get_patched_modules():
+        if (
+            ("gpt" in model_lower or "openai" in model_lower or "azure" in model_lower)
+            and not stream
+            and "openai" in ddtrace._monkey._get_patched_modules()
+        ):
             return False
         return True
diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py
index 06367d2e677..c1d600a9713 100644
--- a/ddtrace/llmobs/_integrations/utils.py
+++ b/ddtrace/llmobs/_integrations/utils.py
@@ -468,6 +468,8 @@ def openai_construct_message_from_streamed_chunks(streamed_chunks: List[Any]) ->
         message.pop("tool_calls", None)
     message["content"] = message["content"].strip()
     return message
+
+
 class OaiSpanAdapter:
     """Adapter for Oai Agents SDK Span objects that the llmobs integration code will use.
     This is to consolidate the code where we access oai library types which provides a clear starting point for
diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
index 205d0a65bd8..2e846ddd5e7 100644
--- a/tests/contrib/litellm/conftest.py
+++ b/tests/contrib/litellm/conftest.py
@@ -69,6 +69,7 @@ def mock_tracer(litellm, ddtrace_global_config):
 def request_vcr():
     return get_request_vcr()
 
+
 @pytest.fixture
 def request_vcr_include_localhost():
     return get_request_vcr(ignore_localhost=False)
diff --git a/tests/contrib/litellm/test_litellm.py b/tests/contrib/litellm/test_litellm.py
index 43a9a4d4036..5ddee4173e6 100644
--- a/tests/contrib/litellm/test_litellm.py
+++ b/tests/contrib/litellm/test_litellm.py
@@ -44,7 +44,6 @@ def test_litellm_completion(litellm, snapshot_context, request_vcr, stream, n):
                     pass
 
 
-
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
 async def test_litellm_acompletion(litellm, snapshot_context, request_vcr, stream, n):
     with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]):
@@ -59,7 +58,8 @@ async def test_litellm_acompletion(litellm, snapshot_context, request_vcr, strea
             if stream:
                 async for _ in resp:
                     pass
-            
+
+
 @pytest.mark.parametrize("stream,n", [(True, 1), (True, 2), (False, 1), (False, 2)])
 def test_litellm_text_completion(litellm, snapshot_context, request_vcr, stream, n):
     with snapshot_context(token="tests.contrib.litellm.test_litellm.test_litellm_completion", ignores=["resource"]):
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 8e39f5f3dba..c68fd103223 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -8,7 +8,15 @@
 
 
 @pytest.mark.parametrize(
-    "ddtrace_global_config", [dict(_llmobs_enabled=True, _llmobs_sample_rate=1.0, _llmobs_ml_app="<ml-app-name>", _dd_api_key="<not-a-real-key>")]
+    "ddtrace_global_config",
+    [
+        dict(
+            _llmobs_enabled=True,
+            _llmobs_sample_rate=1.0,
+            _llmobs_ml_app="<ml-app-name>",
+            _dd_api_key="<not-a-real-key>",
+        )
+    ],
 )
 @pytest.mark.parametrize(
     "stream,n,include_usage",
@@ -54,9 +62,13 @@ def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer,
             )
         )
 
-    def test_completion_with_tools(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
+    def test_completion_with_tools(
+        self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage
+    ):
         if stream and n > 1:
-            pytest.skip("Streamed responses with multiple completions and tool calls are not supported: see open issue https://github.com/BerriAI/litellm/issues/8977")
+            pytest.skip(
+                "Streamed responses with multiple completions and tool calls are not supported: see open issue https://github.com/BerriAI/litellm/issues/8977"
+            )
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage, tools=True)):
             messages = [{"content": "What is the weather like in San Francisco, CA?", "role": "user"}]
             resp = litellm.completion(
@@ -72,7 +84,7 @@ def test_completion_with_tools(self, litellm, request_vcr, mock_llmobs_writer, m
                 output_messages, token_metrics = consume_stream(resp, n)
             else:
                 output_messages, token_metrics = parse_response(resp)
-        
+
         span = mock_tracer.pop_traces()[0][0]
         assert mock_llmobs_writer.enqueue.call_count == 1
         mock_llmobs_writer.enqueue.assert_called_with(
@@ -82,7 +94,12 @@ def test_completion_with_tools(self, litellm, request_vcr, mock_llmobs_writer, m
                 model_provider="openai",
                 input_messages=messages,
                 output_messages=output_messages,
-                metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}, "tool_choice": "auto"},
+                metadata={
+                    "stream": stream,
+                    "n": n,
+                    "stream_options": {"include_usage": include_usage},
+                    "tool_choice": "auto",
+                },
                 token_metrics=token_metrics,
                 tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
             )
@@ -180,7 +197,9 @@ async def test_atext_completion(
             )
         )
 
-    def test_completion_integrations_enabled(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
+    def test_completion_integrations_enabled(
+        self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage
+    ):
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
             LLMObs.disable()
 
@@ -205,20 +224,14 @@ def test_completion_integrations_enabled(self, litellm, request_vcr, mock_llmobs
                 output_messages, token_metrics = consume_stream(resp, n)
             else:
                 output_messages, token_metrics = parse_response(resp)
-            
+
             LLMObs.disable()
 
         spans = mock_tracer.pop_traces()
         # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request
         if stream:
             span = spans[0][0]
-            metadata = {
-                "stream": stream,
-                "n": n,
-                "stream_options": {
-                    "include_usage": include_usage
-                }
-            }
+            metadata = {"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}
             model_name = "gpt-3.5-turbo"
         else:
             span = spans[0][1]
@@ -228,25 +241,25 @@ def test_completion_integrations_enabled(self, litellm, request_vcr, mock_llmobs
                 "n": n,
                 "extra_body": {},
                 "timeout": 600.0,
-                "extra_headers": {
-                    "X-Stainless-Raw-Response": "true"
-                }
+                "extra_headers": {"X-Stainless-Raw-Response": "true"},
             }
             model_name = "gpt-3.5-turbo-0125"
         assert mock_llmobs_writer.enqueue.call_count == 1
         expected_event = _expected_llmobs_llm_span_event(
-                span,
-                model_name=model_name,
-                model_provider="openai",
-                input_messages=messages,
-                output_messages=output_messages,
-                metadata=metadata,
-                token_metrics=token_metrics,
-                tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
-            )
+            span,
+            model_name=model_name,
+            model_provider="openai",
+            input_messages=messages,
+            output_messages=output_messages,
+            metadata=metadata,
+            token_metrics=token_metrics,
+            tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
+        )
         mock_llmobs_writer.enqueue.assert_called_with(expected_event)
-    
-    def test_completion_proxy(self, litellm, request_vcr_include_localhost, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
+
+    def test_completion_proxy(
+        self, litellm, request_vcr_include_localhost, mock_llmobs_writer, mock_tracer, stream, n, include_usage
+    ):
         with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, include_usage, proxy=True)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]
             resp = litellm.completion(
diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py
index f41e1098a50..a9b6309770b 100644
--- a/tests/contrib/litellm/utils.py
+++ b/tests/contrib/litellm/utils.py
@@ -39,7 +39,9 @@ def consume_stream(resp, n, is_completion=False):
     token_metrics = {}
     role = None
     for chunk in resp:
-        output_messages, token_metrics, role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion)
+        output_messages, token_metrics, role = extract_output_from_chunk(
+            chunk, output_messages, token_metrics, role, is_completion
+        )
     output_messages = parse_tool_calls(output_messages)
     return output_messages, token_metrics
 
@@ -49,7 +51,9 @@ async def async_consume_stream(resp, n, is_completion=False):
     token_metrics = {}
     role = None
     async for chunk in resp:
-        output_messages, token_metrics, role = extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_completion)
+        output_messages, token_metrics, role = extract_output_from_chunk(
+            chunk, output_messages, token_metrics, role, is_completion
+        )
     output_messages = parse_tool_calls(output_messages)
     return output_messages, token_metrics
 
@@ -68,9 +72,16 @@ def extract_output_from_chunk(chunk, output_messages, token_metrics, role, is_co
                 while tool_call.index >= len(output_messages[choice.index]["tool_calls"]):
                     output_messages[choice.index]["tool_calls"].append({})
                 arguments = output_messages[choice.index]["tool_calls"][tool_call.index].get("arguments", "")
-                output_messages[choice.index]["tool_calls"][tool_call.index]["name"] = output_messages[choice.index]["tool_calls"][tool_call.index].get("name", None) or tool_call.function.name
-                output_messages[choice.index]["tool_calls"][tool_call.index]["arguments"] = arguments + tool_call.function.arguments
-                output_messages[choice.index]["tool_calls"][tool_call.index]["tool_id"] = output_messages[choice.index]["tool_calls"][tool_call.index].get("tool_id", None) or tool_call.id
+                output_messages[choice.index]["tool_calls"][tool_call.index]["name"] = (
+                    output_messages[choice.index]["tool_calls"][tool_call.index].get("name", None)
+                    or tool_call.function.name
+                )
+                output_messages[choice.index]["tool_calls"][tool_call.index]["arguments"] = (
+                    arguments + tool_call.function.arguments
+                )
+                output_messages[choice.index]["tool_calls"][tool_call.index]["tool_id"] = (
+                    output_messages[choice.index]["tool_calls"][tool_call.index].get("tool_id", None) or tool_call.id
+                )
                 output_messages[choice.index]["tool_calls"][tool_call.index]["type"] = tool_call.type
 
     if "usage" in chunk and chunk["usage"]:
@@ -108,12 +119,14 @@ def parse_response(resp, is_completion=False):
         if tool_calls:
             message["tool_calls"] = []
             for tool_call in tool_calls:
-                message["tool_calls"].append({
-                    "name": tool_call["function"]["name"],
-                    "arguments": json.loads(tool_call["function"]["arguments"]),
-                    "tool_id": tool_call["id"],
-                    "type": tool_call["type"]
-                })
+                message["tool_calls"].append(
+                    {
+                        "name": tool_call["function"]["name"],
+                        "arguments": json.loads(tool_call["function"]["arguments"]),
+                        "tool_id": tool_call["id"],
+                        "type": tool_call["type"],
+                    }
+                )
         output_messages.append(message)
     token_metrics = {
         "input_tokens": resp.usage.prompt_tokens,
@@ -122,6 +135,7 @@ def parse_response(resp, is_completion=False):
     }
     return output_messages, token_metrics
 
+
 tools = [
     {
         "type": "function",

From 1474258b57696b43941deb321e3ae718165ada80 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 10 Apr 2025 11:41:15 -0400
Subject: [PATCH 41/61] style fixes

---
 ddtrace/contrib/internal/litellm/patch.py    | 12 ++------
 ddtrace/contrib/internal/litellm/utils.py    |  3 +-
 ddtrace/contrib/internal/openai/utils.py     |  6 ++--
 ddtrace/llmobs/_integrations/litellm.py      | 29 +++++++++-----------
 ddtrace/llmobs/_integrations/openai.py       |  8 ++----
 tests/contrib/litellm/conftest.py            |  1 -
 tests/contrib/litellm/test_litellm_llmobs.py |  6 +++-
 7 files changed, 27 insertions(+), 38 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index e5c2ce52296..8275fc0d049 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -14,13 +14,7 @@
 from ddtrace.internal.utils import get_argument_value
 
 
-config._add(
-    "litellm",
-    {
-        "span_prompt_completion_sample_rate": float(os.getenv("DD_LITELLM_SPAN_PROMPT_COMPLETION_SAMPLE_RATE", 1.0)),
-        "span_char_limit": int(os.getenv("DD_LITELLM_SPAN_CHAR_LIMIT", 128)),
-    },
-)
+config._add("litellm", {})
 
 
 def get_version() -> str:
@@ -66,7 +60,7 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion
     try:
         resp = func(*args, **kwargs)
         if stream:
-            return TracedLiteLLMStream(resp, integration, span, args, kwargs, is_completion)
+            return TracedLiteLLMStream(resp, integration, span, kwargs, is_completion)
         return resp
     except Exception:
         span.set_exc_info(*sys.exc_info())
@@ -99,7 +93,7 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com
     try:
         resp = await func(*args, **kwargs)
         if stream:
-            return TracedLiteLLMAsyncStream(resp, integration, span, args, kwargs, is_completion)
+            return TracedLiteLLMAsyncStream(resp, integration, span, kwargs, is_completion)
         return resp
     except Exception:
         span.set_exc_info(*sys.exc_info())
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index f9c4463190d..1f36a8331f2 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -13,12 +13,11 @@
 
 
 class BaseTracedLiteLLMStream:
-    def __init__(self, generator, integration, span, args, kwargs, is_completion=False):
+    def __init__(self, generator, integration, span, kwargs, is_completion=False):
         n = kwargs.get("n", 1) or 1
         self._generator = generator
         self._dd_integration = integration
         self._dd_span = span
-        self._args = args
         self._kwargs = kwargs
         self._streamed_chunks = [[] for _ in range(n)]
         self._is_completion = is_completion
diff --git a/ddtrace/contrib/internal/openai/utils.py b/ddtrace/contrib/internal/openai/utils.py
index ddf0eb37b49..58d3075cbbc 100644
--- a/ddtrace/contrib/internal/openai/utils.py
+++ b/ddtrace/contrib/internal/openai/utils.py
@@ -6,10 +6,8 @@
 from typing import Generator
 from typing import List
 
-from ddtrace.llmobs._integrations.utils import (
-    openai_construct_completion_from_streamed_chunks,
-    openai_construct_message_from_streamed_chunks,
-)
+from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks
+from ddtrace.llmobs._integrations.utils import openai_construct_message_from_streamed_chunks
 import wrapt
 
 from ddtrace.internal.logger import get_logger
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index 0eea298f87d..e262dca6d3b 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -2,21 +2,19 @@
 from typing import Dict
 from typing import List
 from typing import Optional
+from typing import Tuple
 
 import ddtrace
-from ddtrace.llmobs._constants import (
-    INPUT_TOKENS_METRIC_KEY,
-    METRICS,
-    OUTPUT_TOKENS_METRIC_KEY,
-    TOTAL_TOKENS_METRIC_KEY,
-)
+from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
+from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
+from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
+from ddtrace.llmobs._constants import METRICS
 from ddtrace.llmobs._constants import MODEL_NAME
 from ddtrace.llmobs._constants import MODEL_PROVIDER
 from ddtrace.llmobs._constants import SPAN_KIND
-from ddtrace.llmobs._integrations.utils import (
-    openai_set_meta_tags_from_chat,
-    openai_set_meta_tags_from_completion,
-)
+from ddtrace.llmobs._integrations.openai import openai_set_meta_tags_from_chat
+from ddtrace.llmobs._integrations.openai import openai_set_meta_tags_from_completion
+from ddtrace.llmobs._llmobs import LLMObs
 from ddtrace.llmobs._utils import _get_attr
 from ddtrace.trace import Span
 from ddtrace.llmobs._integrations.base import BaseLLMIntegration
@@ -25,7 +23,7 @@
 class LiteLLMIntegration(BaseLLMIntegration):
     _integration_name = "litellm"
     # maps requested model name to parsed model name and provider
-    _model_map = {}
+    _model_map: Dict[str, Tuple[str, str]] = {}
 
     def _set_base_span_tags(
         self, span: Span, model: Optional[str] = None, host: Optional[str] = None, **kwargs: Dict[str, Any]
@@ -44,7 +42,6 @@ def _llmobs_set_tags(
         operation: str = "",
     ) -> None:
         model_name = span.get_tag("litellm.request.model")
-        # get resolved model name and provider
         model_name, model_provider = self._model_map.get(model_name, (model_name, ""))
 
         # use Open AI helpers since response format will match Open AI
@@ -76,8 +73,8 @@ def _extract_llmobs_metrics(resp: Any) -> Dict[str, Any]:
     def should_submit_to_llmobs(self, model: Optional[str] = None, kwargs: Dict[str, Any] = None) -> bool:
         """
         Span should be NOT submitted to LLMObs if:
-            - base_url is not None
-            - model provider is Open AI or Azure AND request is not being streamed AND Open AI integration is enabled
+            - base_url is not None: is a proxy request and we will capture the LLM request downstream
+            - non-streamed request and model provider is OpenAI/AzureOpenAI and the OpenAI integration is enabled: this request will be captured in the OpenAI integration instead
         """
         base_url = kwargs.get("api_base", None)
         if base_url is not None:
@@ -86,9 +83,9 @@ def should_submit_to_llmobs(self, model: Optional[str] = None, kwargs: Dict[str,
         model_lower = model.lower() if model else ""
         # model provider is unknown until request completes; therefore, this is a best effort attempt to check if model provider is Open AI or Azure
         if (
-            ("gpt" in model_lower or "openai" in model_lower or "azure" in model_lower)
+            any(prefix in model_lower for prefix in ("gpt", "openai", "azure"))
             and not stream
-            and "openai" in ddtrace._monkey._get_patched_modules()
+            and LLMObs._integration_is_enabled("openai")
         ):
             return False
         return True
diff --git a/ddtrace/llmobs/_integrations/openai.py b/ddtrace/llmobs/_integrations/openai.py
index c913282a2e5..87e6a100e9d 100644
--- a/ddtrace/llmobs/_integrations/openai.py
+++ b/ddtrace/llmobs/_integrations/openai.py
@@ -24,11 +24,9 @@
 from ddtrace.llmobs._constants import SPAN_KIND
 from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._integrations.base import BaseLLMIntegration
-from ddtrace.llmobs._integrations.utils import (
-    get_llmobs_metrics_tags,
-    openai_set_meta_tags_from_chat,
-    openai_set_meta_tags_from_completion,
-)
+from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags
+from ddtrace.llmobs._integrations.utils import openai_set_meta_tags_from_chat
+from ddtrace.llmobs._integrations.utils import openai_set_meta_tags_from_completion
 from ddtrace.llmobs._integrations.utils import is_openai_default_base_url
 from ddtrace.llmobs._utils import _get_attr
 from ddtrace.llmobs.utils import Document
diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
index 2e846ddd5e7..3d4d300acd3 100644
--- a/tests/contrib/litellm/conftest.py
+++ b/tests/contrib/litellm/conftest.py
@@ -52,7 +52,6 @@ def mock_tracer(litellm, ddtrace_global_config):
     pin = Pin.get_from(litellm)
     mock_tracer = DummyTracer(writer=DummyWriter(trace_flush_enabled=False))
     pin._override(litellm, tracer=mock_tracer)
-    pin.tracer.configure()
 
     if ddtrace_global_config.get("_llmobs_enabled", False):
         # Have to disable and re-enable LLMObs to use the mock tracer.
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index c68fd103223..baac526c941 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -2,7 +2,11 @@
 from ddtrace.llmobs._llmobs import LLMObs
 import pytest
 
-from tests.contrib.litellm.utils import async_consume_stream, get_cassette_name, consume_stream, parse_response, tools
+from tests.contrib.litellm.utils import async_consume_stream
+from tests.contrib.litellm.utils import get_cassette_name
+from tests.contrib.litellm.utils import consume_stream
+from tests.contrib.litellm.utils import parse_response
+from tests.contrib.litellm.utils import tools
 from tests.llmobs._utils import _expected_llmobs_llm_span_event
 from tests.utils import DummyTracer
 

From c9ba90a6461e39f50972bfe3007566857b45fc35 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 10 Apr 2025 12:02:13 -0400
Subject: [PATCH 42/61] use wrapt.ObjectProxy for traced streams

---
 ddtrace/contrib/internal/litellm/patch.py |  4 ++--
 ddtrace/contrib/internal/litellm/utils.py | 23 ++++++++++++-----------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 8275fc0d049..02157c1690d 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -133,7 +133,7 @@ def patch():
     wrap("litellm", "text_completion", traced_text_completion(litellm))
     wrap("litellm", "atext_completion", traced_atext_completion(litellm))
     wrap("litellm", "get_llm_provider", traced_get_llm_provider(litellm))
-    wrap("litellm", "litellm.main.get_llm_provider", traced_get_llm_provider(litellm))
+    wrap("litellm", "main.get_llm_provider", traced_get_llm_provider(litellm))
 
 
 def unpatch():
@@ -147,6 +147,6 @@ def unpatch():
     unwrap(litellm, "text_completion")
     unwrap(litellm, "atext_completion")
     unwrap(litellm, "get_llm_provider")
-    unwrap(litellm.litellm.main, "get_llm_provider")
+    unwrap(litellm.main, "get_llm_provider")
 
     delattr(litellm, "_datadog_integration")
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index 1f36a8331f2..59c94b7d192 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -2,6 +2,7 @@
 from typing import Any
 from typing import Dict
 from typing import List
+import wrapt
 
 from ddtrace.internal.logger import get_logger
 from ddtrace.llmobs._integrations.utils import (
@@ -12,10 +13,10 @@
 log = get_logger(__name__)
 
 
-class BaseTracedLiteLLMStream:
-    def __init__(self, generator, integration, span, kwargs, is_completion=False):
+class BaseTracedLiteLLMStream(wrapt.ObjectProxy):
+    def __init__(self, wrapped, integration, span, kwargs, is_completion=False):
+        super().__init__(wrapped)
         n = kwargs.get("n", 1) or 1
-        self._generator = generator
         self._dd_integration = integration
         self._dd_span = span
         self._kwargs = kwargs
@@ -25,15 +26,15 @@ def __init__(self, generator, integration, span, kwargs, is_completion=False):
 
 class TracedLiteLLMStream(BaseTracedLiteLLMStream):
     def __enter__(self):
-        self._generator.__enter__()
+        self.__wrapped__.__enter__()
         return self
 
     def __exit__(self, exc_type, exc_val, exc_tb):
-        self._generator.__exit__(exc_type, exc_val, exc_tb)
+        self.__wrapped__.__exit__(exc_type, exc_val, exc_tb)
 
     def __iter__(self):
         try:
-            for chunk in self._generator:
+            for chunk in self.__wrapped__:
                 yield chunk
                 _loop_handler(chunk, self._streamed_chunks)
         except Exception:
@@ -47,7 +48,7 @@ def __iter__(self):
 
     def __next__(self):
         try:
-            chunk = self._generator.__next__()
+            chunk = self.__wrapped__.__next__()
             _loop_handler(chunk, self._streamed_chunks)
             return chunk
         except StopIteration:
@@ -64,15 +65,15 @@ def __next__(self):
 
 class TracedLiteLLMAsyncStream(BaseTracedLiteLLMStream):
     async def __aenter__(self):
-        await self._generator.__aenter__()
+        await self.__wrapped__.__aenter__()
         return self
 
     async def __aexit__(self, exc_type, exc_val, exc_tb):
-        await self._generator.__aexit__(exc_type, exc_val, exc_tb)
+        await self.__wrapped__.__aexit__(exc_type, exc_val, exc_tb)
 
     async def __aiter__(self):
         try:
-            async for chunk in self._generator:
+            async for chunk in self.__wrapped__:
                 yield chunk
                 _loop_handler(chunk, self._streamed_chunks)
         except Exception:
@@ -86,7 +87,7 @@ async def __aiter__(self):
 
     async def __anext__(self):
         try:
-            chunk = await self._generator.__anext__()
+            chunk = await self.__wrapped__.__anext__()
             _loop_handler(chunk, self._streamed_chunks)
             return chunk
         except StopAsyncIteration:

From b2e8f59c41cd03c701d2daf436c8777ce00cd382 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 10 Apr 2025 12:27:22 -0400
Subject: [PATCH 43/61] small improvements to model name and metric extraction

---
 ddtrace/llmobs/_integrations/litellm.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index e262dca6d3b..b40df3a1b18 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -5,6 +5,7 @@
 from typing import Tuple
 
 import ddtrace
+from ddtrace.internal.utils import get_argument_value
 from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
@@ -41,7 +42,7 @@ def _llmobs_set_tags(
         response: Optional[Any] = None,
         operation: str = "",
     ) -> None:
-        model_name = span.get_tag("litellm.request.model")
+        model_name = get_argument_value(args, kwargs, 0, "model", None)
         model_name, model_provider = self._model_map.get(model_name, (model_name, ""))
 
         # use Open AI helpers since response format will match Open AI
@@ -57,18 +58,21 @@ def _llmobs_set_tags(
 
     @staticmethod
     def _extract_llmobs_metrics(resp: Any) -> Dict[str, Any]:
+        if not resp:
+            return {}
         if isinstance(resp, list):
             token_usage = _get_attr(resp[0], "usage", None)
         else:
             token_usage = _get_attr(resp, "usage", None)
-        if token_usage is not None:
-            prompt_tokens = _get_attr(token_usage, "prompt_tokens", 0)
-            completion_tokens = _get_attr(token_usage, "completion_tokens", 0)
-            return {
-                INPUT_TOKENS_METRIC_KEY: prompt_tokens,
-                OUTPUT_TOKENS_METRIC_KEY: completion_tokens,
-                TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens,
-            }
+        if token_usage is None:
+            return {}
+        prompt_tokens = _get_attr(token_usage, "prompt_tokens", 0)
+        completion_tokens = _get_attr(token_usage, "completion_tokens", 0)
+        return {
+            INPUT_TOKENS_METRIC_KEY: prompt_tokens,
+            OUTPUT_TOKENS_METRIC_KEY: completion_tokens,
+            TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens,
+        }
 
     def should_submit_to_llmobs(self, model: Optional[str] = None, kwargs: Dict[str, Any] = None) -> bool:
         """

From 4b149210f176eceafde8789859c8e16e255fec90 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 10 Apr 2025 14:09:29 -0400
Subject: [PATCH 44/61] cleaned up configuration for litellm tests

---
 tests/contrib/litellm/conftest.py            |  59 ++++---
 tests/contrib/litellm/test_litellm_llmobs.py | 161 ++++++++-----------
 2 files changed, 100 insertions(+), 120 deletions(-)

diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
index 3d4d300acd3..12311e95de4 100644
--- a/tests/contrib/litellm/conftest.py
+++ b/tests/contrib/litellm/conftest.py
@@ -1,15 +1,21 @@
-import mock
-
+from ddtrace.llmobs._writer import LLMObsSpanWriter
 import pytest
 from ddtrace.contrib.internal.litellm.patch import patch
 from ddtrace.trace import Pin
 from ddtrace.contrib.internal.litellm.patch import unpatch
 from tests.utils import DummyTracer
-from tests.utils import DummyWriter
 from tests.utils import override_global_config
 from tests.contrib.litellm.utils import get_request_vcr
-from ddtrace.llmobs import LLMObs
+from ddtrace.llmobs import LLMObs as llmobs_service
+from ddtrace.llmobs._constants import AGENTLESS_BASE_URL
+
+class TestLLMObsSpanWriter(LLMObsSpanWriter):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.events = []
 
+    def enqueue(self, event):
+        self.events.append(event)
 
 def default_global_config():
     return {}
@@ -19,17 +25,15 @@ def default_global_config():
 def ddtrace_global_config():
     return {}
 
+@pytest.fixture
+def llmobs_span_writer():
+    agentless_url = "{}.{}".format(AGENTLESS_BASE_URL, "datad0g.com")
+    yield TestLLMObsSpanWriter(is_agentless=True, agentless_url=agentless_url, interval=1.0, timeout=1.0)
 
-@pytest.fixture()
-def mock_llmobs_writer():
-    patcher = mock.patch("ddtrace.llmobs._llmobs.LLMObsSpanWriter")
-    try:
-        LLMObsSpanWriterMock = patcher.start()
-        m = mock.MagicMock()
-        LLMObsSpanWriterMock.return_value = m
-        yield m
-    finally:
-        patcher.stop()
+
+@pytest.fixture
+def llmobs_events(litellm_llmobs, llmobs_span_writer):
+    return llmobs_span_writer.events
 
 
 @pytest.fixture
@@ -46,23 +50,28 @@ def litellm(ddtrace_global_config, monkeypatch):
         yield litellm
         unpatch()
 
+@pytest.fixture
+def litellm_llmobs(tracer, mock_tracer, llmobs_span_writer, ddtrace_global_config, monkeypatch):
+    llmobs_service.disable()
+    with override_global_config(
+        {
+            "_llmobs_ml_app": "<ml-app-name>",
+            "_dd_api_key": "<not-a-real-key>",
+        }
+    ):
+        enable_integrations = ddtrace_global_config.get("_llmobs_integrations_enabled", False)
+        llmobs_service.enable(_tracer=mock_tracer, integrations_enabled=enable_integrations)
+        llmobs_service._instance._llmobs_span_writer = llmobs_span_writer
+        yield llmobs_service
+    llmobs_service.disable()
 
 @pytest.fixture
-def mock_tracer(litellm, ddtrace_global_config):
+def mock_tracer(litellm):
+    mock_tracer = DummyTracer()
     pin = Pin.get_from(litellm)
-    mock_tracer = DummyTracer(writer=DummyWriter(trace_flush_enabled=False))
     pin._override(litellm, tracer=mock_tracer)
-
-    if ddtrace_global_config.get("_llmobs_enabled", False):
-        # Have to disable and re-enable LLMObs to use the mock tracer.
-        LLMObs.disable()
-        enable_integrations = ddtrace_global_config.get("_integrations_enabled", False)
-        LLMObs.enable(_tracer=mock_tracer, integrations_enabled=enable_integrations)
-
     yield mock_tracer
 
-    LLMObs.disable()
-
 
 @pytest.fixture
 def request_vcr():
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index baac526c941..258cfab86d5 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -11,17 +11,6 @@
 from tests.utils import DummyTracer
 
 
-@pytest.mark.parametrize(
-    "ddtrace_global_config",
-    [
-        dict(
-            _llmobs_enabled=True,
-            _llmobs_sample_rate=1.0,
-            _llmobs_ml_app="<ml-app-name>",
-            _dd_api_key="<not-a-real-key>",
-        )
-    ],
-)
 @pytest.mark.parametrize(
     "stream,n,include_usage",
     [
@@ -36,7 +25,7 @@
     ],
 )
 class TestLLMObsLiteLLM:
-    def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
+    def test_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage):
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]
             resp = litellm.completion(
@@ -52,22 +41,20 @@ def test_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer,
                 output_messages, token_metrics = parse_response(resp)
 
         span = mock_tracer.pop_traces()[0][0]
-        assert mock_llmobs_writer.enqueue.call_count == 1
-        mock_llmobs_writer.enqueue.assert_called_with(
-            _expected_llmobs_llm_span_event(
-                span,
-                model_name="gpt-3.5-turbo",
-                model_provider="openai",
-                input_messages=messages,
-                output_messages=output_messages,
-                metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
-                token_metrics=token_metrics,
-                tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
-            )
+        assert len(llmobs_events) == 1
+        assert llmobs_events[0] == _expected_llmobs_llm_span_event(
+            span,
+            model_name="gpt-3.5-turbo",
+            model_provider="openai",
+            input_messages=messages,
+            output_messages=output_messages,
+            metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
+            token_metrics=token_metrics,
+            tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
     def test_completion_with_tools(
-        self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage
+        self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage
     ):
         if stream and n > 1:
             pytest.skip(
@@ -90,26 +77,24 @@ def test_completion_with_tools(
                 output_messages, token_metrics = parse_response(resp)
 
         span = mock_tracer.pop_traces()[0][0]
-        assert mock_llmobs_writer.enqueue.call_count == 1
-        mock_llmobs_writer.enqueue.assert_called_with(
-            _expected_llmobs_llm_span_event(
-                span,
-                model_name="gpt-3.5-turbo",
-                model_provider="openai",
-                input_messages=messages,
-                output_messages=output_messages,
-                metadata={
-                    "stream": stream,
-                    "n": n,
-                    "stream_options": {"include_usage": include_usage},
-                    "tool_choice": "auto",
-                },
-                token_metrics=token_metrics,
-                tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
-            )
+        assert len(llmobs_events) == 1
+        assert llmobs_events[0] == _expected_llmobs_llm_span_event(
+            span,
+            model_name="gpt-3.5-turbo",
+            model_provider="openai",
+            input_messages=messages,
+            output_messages=output_messages,
+            metadata={
+                "stream": stream,
+                "n": n,
+                "stream_options": {"include_usage": include_usage},
+                "tool_choice": "auto",
+            },
+            token_metrics=token_metrics,
+            tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    async def test_acompletion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
+    async def test_acompletion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage):
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]
             resp = await litellm.acompletion(
@@ -125,21 +110,19 @@ async def test_acompletion(self, litellm, request_vcr, mock_llmobs_writer, mock_
                 output_messages, token_metrics = parse_response(resp)
 
         span = mock_tracer.pop_traces()[0][0]
-        assert mock_llmobs_writer.enqueue.call_count == 1
-        mock_llmobs_writer.enqueue.assert_called_with(
-            _expected_llmobs_llm_span_event(
-                span,
-                model_name="gpt-3.5-turbo",
-                model_provider="openai",
-                input_messages=messages,
-                output_messages=output_messages,
-                metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
-                token_metrics=token_metrics,
-                tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
-            )
+        assert len(llmobs_events) == 1
+        assert llmobs_events[0] == _expected_llmobs_llm_span_event(
+            span,
+            model_name="gpt-3.5-turbo",
+            model_provider="openai",
+            input_messages=messages,
+            output_messages=output_messages,
+            metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
+            token_metrics=token_metrics,
+            tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    def test_text_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage):
+    def test_text_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage):
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
             prompt = "Hey, what is up?"
             resp = litellm.text_completion(
@@ -155,22 +138,20 @@ def test_text_completion(self, litellm, request_vcr, mock_llmobs_writer, mock_tr
                 output_messages, token_metrics = parse_response(resp, is_completion=True)
 
         span = mock_tracer.pop_traces()[0][0]
-        assert mock_llmobs_writer.enqueue.call_count == 1
-        mock_llmobs_writer.enqueue.assert_called_with(
-            _expected_llmobs_llm_span_event(
-                span,
-                model_name="gpt-3.5-turbo",
-                model_provider="openai",
-                input_messages=[{"content": prompt}],
-                output_messages=output_messages,
-                metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
-                token_metrics=token_metrics,
-                tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
-            )
+        assert len(llmobs_events) == 1
+        assert llmobs_events[0] == _expected_llmobs_llm_span_event(
+            span,
+            model_name="gpt-3.5-turbo",
+            model_provider="openai",
+            input_messages=[{"content": prompt}],
+            output_messages=output_messages,
+            metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
+            token_metrics=token_metrics,
+            tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
     async def test_atext_completion(
-        self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage
+        self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage
     ):
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
             prompt = "Hey, what is up?"
@@ -187,33 +168,25 @@ async def test_atext_completion(
                 output_messages, token_metrics = parse_response(resp, is_completion=True)
 
         span = mock_tracer.pop_traces()[0][0]
-        assert mock_llmobs_writer.enqueue.call_count == 1
-        mock_llmobs_writer.enqueue.assert_called_with(
-            _expected_llmobs_llm_span_event(
-                span,
-                model_name="gpt-3.5-turbo",
-                model_provider="openai",
-                input_messages=[{"content": prompt}],
-                output_messages=output_messages,
-                metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
-                token_metrics=token_metrics,
-                tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
-            )
+        assert len(llmobs_events) == 1
+        assert llmobs_events[0] == _expected_llmobs_llm_span_event(
+            span,
+            model_name="gpt-3.5-turbo",
+            model_provider="openai",
+            input_messages=[{"content": prompt}],
+            output_messages=output_messages,
+            metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
+            token_metrics=token_metrics,
+            tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
+    @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)])
     def test_completion_integrations_enabled(
-        self, litellm, request_vcr, mock_llmobs_writer, mock_tracer, stream, n, include_usage
+        self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage
     ):
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
-            LLMObs.disable()
-
-            LLMObs.enable(integrations_enabled=True)
-            mock_tracer = DummyTracer()
-            import litellm
             import openai
-
             pin = Pin.get_from(litellm)
-            pin._override(litellm, tracer=mock_tracer)
             pin._override(openai, tracer=mock_tracer)
 
             messages = [{"content": "Hey, what is up?", "role": "user"}]
@@ -229,8 +202,6 @@ def test_completion_integrations_enabled(
             else:
                 output_messages, token_metrics = parse_response(resp)
 
-            LLMObs.disable()
-
         spans = mock_tracer.pop_traces()
         # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request
         if stream:
@@ -248,7 +219,7 @@ def test_completion_integrations_enabled(
                 "extra_headers": {"X-Stainless-Raw-Response": "true"},
             }
             model_name = "gpt-3.5-turbo-0125"
-        assert mock_llmobs_writer.enqueue.call_count == 1
+        assert len(llmobs_events) == 1
         expected_event = _expected_llmobs_llm_span_event(
             span,
             model_name=model_name,
@@ -259,10 +230,10 @@ def test_completion_integrations_enabled(
             token_metrics=token_metrics,
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
-        mock_llmobs_writer.enqueue.assert_called_with(expected_event)
+        assert llmobs_events[0] == expected_event
 
     def test_completion_proxy(
-        self, litellm, request_vcr_include_localhost, mock_llmobs_writer, mock_tracer, stream, n, include_usage
+        self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n, include_usage
     ):
         with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, include_usage, proxy=True)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]
@@ -278,4 +249,4 @@ def test_completion_proxy(
                 consume_stream(resp, n)
 
         # client side requests made to the proxy are not submitted to LLMObs
-        assert mock_llmobs_writer.enqueue.call_count == 0
+        assert len(llmobs_events) == 0

From 73417dbfc8504fc3bc5ee2ed12e3c84160e5e75f Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 10 Apr 2025 14:17:49 -0400
Subject: [PATCH 45/61] style fixes

---
 tests/contrib/litellm/conftest.py            | 7 ++++++-
 tests/contrib/litellm/test_litellm_llmobs.py | 9 +++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
index 12311e95de4..9415c45d682 100644
--- a/tests/contrib/litellm/conftest.py
+++ b/tests/contrib/litellm/conftest.py
@@ -9,6 +9,7 @@
 from ddtrace.llmobs import LLMObs as llmobs_service
 from ddtrace.llmobs._constants import AGENTLESS_BASE_URL
 
+
 class TestLLMObsSpanWriter(LLMObsSpanWriter):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -17,6 +18,7 @@ def __init__(self, *args, **kwargs):
     def enqueue(self, event):
         self.events.append(event)
 
+
 def default_global_config():
     return {}
 
@@ -25,6 +27,7 @@ def default_global_config():
 def ddtrace_global_config():
     return {}
 
+
 @pytest.fixture
 def llmobs_span_writer():
     agentless_url = "{}.{}".format(AGENTLESS_BASE_URL, "datad0g.com")
@@ -50,8 +53,9 @@ def litellm(ddtrace_global_config, monkeypatch):
         yield litellm
         unpatch()
 
+
 @pytest.fixture
-def litellm_llmobs(tracer, mock_tracer, llmobs_span_writer, ddtrace_global_config, monkeypatch):
+def litellm_llmobs(mock_tracer, llmobs_span_writer, ddtrace_global_config):
     llmobs_service.disable()
     with override_global_config(
         {
@@ -65,6 +69,7 @@ def litellm_llmobs(tracer, mock_tracer, llmobs_span_writer, ddtrace_global_confi
         yield llmobs_service
     llmobs_service.disable()
 
+
 @pytest.fixture
 def mock_tracer(litellm):
     mock_tracer = DummyTracer()
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 258cfab86d5..167cd0b0428 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -53,9 +53,7 @@ def test_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stre
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    def test_completion_with_tools(
-        self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage
-    ):
+    def test_completion_with_tools(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage):
         if stream and n > 1:
             pytest.skip(
                 "Streamed responses with multiple completions and tool calls are not supported: see open issue https://github.com/BerriAI/litellm/issues/8977"
@@ -150,9 +148,7 @@ def test_text_completion(self, litellm, request_vcr, llmobs_events, mock_tracer,
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    async def test_atext_completion(
-        self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage
-    ):
+    async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage):
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
             prompt = "Hey, what is up?"
             resp = await litellm.atext_completion(
@@ -186,6 +182,7 @@ def test_completion_integrations_enabled(
     ):
         with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
             import openai
+
             pin = Pin.get_from(litellm)
             pin._override(openai, tracer=mock_tracer)
 

From f163063ec938ca2813eb5aeae978ec87fd7c897e Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 10 Apr 2025 14:32:59 -0400
Subject: [PATCH 46/61] make test specifically for excluding usage

---
 ...completion_stream_exclude_usage_proxy.yaml | 144 -----------
 ...etion_stream_exclude_usage_with_tools.yaml | 130 ----------
 ..._multiple_choices_exclude_usage_proxy.yaml | 240 ------------------
 ...iple_choices_exclude_usage_with_tools.yaml | 162 ------------
 tests/contrib/litellm/test_litellm_llmobs.py  |  97 ++++---
 5 files changed, 61 insertions(+), 712 deletions(-)
 delete mode 100644 tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml
 delete mode 100644 tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml
 delete mode 100644 tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml
 delete mode 100644 tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml

diff --git a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml
deleted file mode 100644
index 39cf74be7cb..00000000000
--- a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_proxy.yaml
+++ /dev/null
@@ -1,144 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":false}}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '144'
-      content-type:
-      - application/json
-      host:
-      - 0.0.0.0:4000
-      user-agent:
-      - OpenAI/Python 1.68.2
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.68.2
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: http://0.0.0.0:4000/chat/completions
-  response:
-    body:
-      string: 'data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        much"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        just"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        here"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        to"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        help"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        you"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        with"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        anything"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        you"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        need"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        How"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        can"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        I"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        assist"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        you"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        today"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-3d9ae57d-5122-40ae-b390-7c0350f13703","created":1743453499,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":false}}
-
-
-        data: [DONE]
-
-
-        '
-    headers:
-      content-type:
-      - text/event-stream; charset=utf-8
-      date:
-      - Mon, 31 Mar 2025 20:38:18 GMT
-      server:
-      - uvicorn
-      transfer-encoding:
-      - chunked
-      x-litellm-call-id:
-      - a5a87fc0-874f-4432-b608-91b437b91fb2
-      x-litellm-key-spend:
-      - '0.0'
-      x-litellm-version:
-      - 1.63.11
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml
deleted file mode 100644
index 515680c5d04..00000000000
--- a/tests/contrib/litellm/cassettes/completion_stream_exclude_usage_with_tools.yaml
+++ /dev/null
@@ -1,130 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":1,"stream":true,"stream_options":{"include_usage":false},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get
-      the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The
-      city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '542'
-      content-type:
-      - application/json
-      cookie:
-      - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A;
-        _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.68.2
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.68.2
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: 'data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_ty5BH4ChPTiw8GnzCSqhxhoP","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
-        Francisco"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
-        CA"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OV3VY2xonnrNbpldoMhzUGrJwD","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]}
-
-
-        data: [DONE]
-
-
-        '
-    headers:
-      CF-RAY:
-      - 9278b292789f3ba6-BOS
-      Connection:
-      - keep-alive
-      Content-Type:
-      - text/event-stream; charset=utf-8
-      Date:
-      - Fri, 28 Mar 2025 17:02:51 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - datadog-4
-      openai-processing-ms:
-      - '281'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '15000'
-      x-ratelimit-limit-tokens:
-      - '2000000'
-      x-ratelimit-remaining-requests:
-      - '14999'
-      x-ratelimit-remaining-tokens:
-      - '1999985'
-      x-ratelimit-reset-requests:
-      - 4ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_78940dfd1e163cd37e49e666383b7944
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml
deleted file mode 100644
index 7cface6a716..00000000000
--- a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_proxy.yaml
+++ /dev/null
@@ -1,240 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"content":"Hey, what is up?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":false}}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '144'
-      content-type:
-      - application/json
-      host:
-      - 0.0.0.0:4000
-      user-agent:
-      - OpenAI/Python 1.68.2
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.68.2
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: http://0.0.0.0:4000/chat/completions
-  response:
-    body:
-      string: 'data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Not","role":"assistant"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"Not"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        much"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        much"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":","}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":","}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        just"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        just"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        here"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        here"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        to"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        to"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        chat"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        assist"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        and"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        you"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        help"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        with"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        with"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        any"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        anything"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        questions"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        you"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        or"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        need"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        tasks"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"."}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        you"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        How"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        may"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        can"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        have"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        I"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"."}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        assist"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        How"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        you"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        can"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"
-        today"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        I"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        help"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        you"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"
-        today"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"index":1,"delta":{"content":"?"}}],"stream_options":{"include_usage":false}}
-
-
-        data: {"id":"chatcmpl-0b587184-f589-4c2f-a646-1d85ad5659cd","created":1743453500,"model":"gpt-3.5-turbo","object":"chat.completion.chunk","choices":[{"finish_reason":"stop","index":0,"delta":{}}],"stream_options":{"include_usage":false}}
-
-
-        data: [DONE]
-
-
-        '
-    headers:
-      content-type:
-      - text/event-stream; charset=utf-8
-      date:
-      - Mon, 31 Mar 2025 20:38:19 GMT
-      server:
-      - uvicorn
-      transfer-encoding:
-      - chunked
-      x-litellm-call-id:
-      - b4f152d1-5074-4fb3-a79d-ad0529fa5aa1
-      x-litellm-key-spend:
-      - '0.0'
-      x-litellm-version:
-      - 1.63.11
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml b/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml
deleted file mode 100644
index fda11e5011a..00000000000
--- a/tests/contrib/litellm/cassettes/completion_stream_multiple_choices_exclude_usage_with_tools.yaml
+++ /dev/null
@@ -1,162 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"content":"What is the weather like in San Francisco, CA?","role":"user"}],"model":"gpt-3.5-turbo","n":2,"stream":true,"stream_options":{"include_usage":false},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_weather","description":"Get
-      the current weather in a given location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The
-      city and state, e.g. San Francisco, CA"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '542'
-      content-type:
-      - application/json
-      cookie:
-      - __cf_bm=fqRH4xQOdW97Lq48GENmqsxwBCogcBHyvs.wSdl_6s4-1743180731-1.0.1.1-B_IVCrWmMwz_73BA_ofPEYkGsvcpni7cwm0XECIoeyWGVbSzoqhwBVGKxzZq48KMqHJlXRKdFEOxh.ePotuzhSToDh1DIWbl56MScFCUe7A;
-        _cfuvid=o05iXMWuodXr6cP.2CnR.WYEJDQ3TAkCZDq3J3cQ_7c-1743180731202-0.0.1.1-604800000
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.68.2
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.68.2
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-read-timeout:
-      - '600.0'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.11.10
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: 'data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_bWoRSYt75lffFv8JkMI4uDdz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_bWoRSYt75lffFv8JkMI4uDdz","type":"function","function":{"name":"get_current_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
-        Francisco"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
-        Francisco"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
-        CA"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
-        CA"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]}
-
-
-        data: {"id":"chatcmpl-BG7OVIN7NH5bgKFuLulrtbKkiTHiW","object":"chat.completion.chunk","created":1743181371,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":1,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]}
-
-
-        data: [DONE]
-
-
-        '
-    headers:
-      CF-RAY:
-      - 9278b2960dbd3ba6-BOS
-      Connection:
-      - keep-alive
-      Content-Type:
-      - text/event-stream; charset=utf-8
-      Date:
-      - Fri, 28 Mar 2025 17:02:52 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - datadog-4
-      openai-processing-ms:
-      - '406'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '15000'
-      x-ratelimit-limit-tokens:
-      - '2000000'
-      x-ratelimit-remaining-requests:
-      - '14999'
-      x-ratelimit-remaining-tokens:
-      - '1999985'
-      x-ratelimit-reset-requests:
-      - 4ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_7e8b09694a1029b3eb2fecf93deef4a3
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 167cd0b0428..2d715c7b21b 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -12,28 +12,24 @@
 
 
 @pytest.mark.parametrize(
-    "stream,n,include_usage",
+    "stream,n",
     [
-        (True, 1, True),
-        (True, 2, True),
-        (False, 1, True),
-        (False, 2, True),
-        (True, 1, False),
-        (True, 2, False),
-        (False, 1, False),
-        (False, 2, False),
+        (True, 1),
+        (True, 2),
+        (False, 1),
+        (False, 2),
     ],
 )
 class TestLLMObsLiteLLM:
-    def test_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage):
-        with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
+    def test_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n):
+        with request_vcr.use_cassette(get_cassette_name(stream, n)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]
             resp = litellm.completion(
                 model="gpt-3.5-turbo",
                 messages=messages,
                 stream=stream,
                 n=n,
-                stream_options={"include_usage": include_usage},
+                stream_options={"include_usage": True},
             )
             if stream:
                 output_messages, token_metrics = consume_stream(resp, n)
@@ -48,24 +44,53 @@ def test_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stre
             model_provider="openai",
             input_messages=messages,
             output_messages=output_messages,
-            metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
+            metadata={"stream": stream, "n": n, "stream_options": {"include_usage": True}},
             token_metrics=token_metrics,
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    def test_completion_with_tools(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage):
+    def test_completion_exclude_usage(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n):
+        with request_vcr.use_cassette(get_cassette_name(stream, n, False)):
+            messages = [{"content": "Hey, what is up?", "role": "user"}]
+            resp = litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                stream=stream,
+                n=n,
+                stream_options={"include_usage": False},
+            )
+            if stream:
+                output_messages, token_metrics = consume_stream(resp, n)
+            else:
+                output_messages, token_metrics = parse_response(resp)
+
+        span = mock_tracer.pop_traces()[0][0]
+        assert len(llmobs_events) == 1
+        assert llmobs_events[0] == _expected_llmobs_llm_span_event(
+            span,
+            model_name="gpt-3.5-turbo",
+            model_provider="openai",
+            input_messages=messages,
+            output_messages=output_messages,
+            metadata={"stream": stream, "n": n, "stream_options": {"include_usage": False}},
+            token_metrics=token_metrics,
+            tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
+        )
+
+
+    def test_completion_with_tools(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n):
         if stream and n > 1:
             pytest.skip(
                 "Streamed responses with multiple completions and tool calls are not supported: see open issue https://github.com/BerriAI/litellm/issues/8977"
             )
-        with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage, tools=True)):
+        with request_vcr.use_cassette(get_cassette_name(stream, n, tools=True)):
             messages = [{"content": "What is the weather like in San Francisco, CA?", "role": "user"}]
             resp = litellm.completion(
                 model="gpt-3.5-turbo",
                 messages=messages,
                 stream=stream,
                 n=n,
-                stream_options={"include_usage": include_usage},
+                stream_options={"include_usage": True},
                 tools=tools,
                 tool_choice="auto",
             )
@@ -85,22 +110,22 @@ def test_completion_with_tools(self, litellm, request_vcr, llmobs_events, mock_t
             metadata={
                 "stream": stream,
                 "n": n,
-                "stream_options": {"include_usage": include_usage},
+                "stream_options": {"include_usage": True},
                 "tool_choice": "auto",
             },
             token_metrics=token_metrics,
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    async def test_acompletion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage):
-        with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
+    async def test_acompletion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n):
+        with request_vcr.use_cassette(get_cassette_name(stream, n)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]
             resp = await litellm.acompletion(
                 model="gpt-3.5-turbo",
                 messages=messages,
                 stream=stream,
                 n=n,
-                stream_options={"include_usage": include_usage},
+                stream_options={"include_usage": True},
             )
             if stream:
                 output_messages, token_metrics = await async_consume_stream(resp, n)
@@ -115,20 +140,20 @@ async def test_acompletion(self, litellm, request_vcr, llmobs_events, mock_trace
             model_provider="openai",
             input_messages=messages,
             output_messages=output_messages,
-            metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
+            metadata={"stream": stream, "n": n, "stream_options": {"include_usage": True}},
             token_metrics=token_metrics,
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    def test_text_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage):
-        with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
+    def test_text_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n):
+        with request_vcr.use_cassette(get_cassette_name(stream, n)):
             prompt = "Hey, what is up?"
             resp = litellm.text_completion(
                 model="gpt-3.5-turbo",
                 prompt=prompt,
                 stream=stream,
                 n=n,
-                stream_options={"include_usage": include_usage},
+                stream_options={"include_usage": True},
             )
             if stream:
                 output_messages, token_metrics = consume_stream(resp, n, is_completion=True)
@@ -143,20 +168,20 @@ def test_text_completion(self, litellm, request_vcr, llmobs_events, mock_tracer,
             model_provider="openai",
             input_messages=[{"content": prompt}],
             output_messages=output_messages,
-            metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
+            metadata={"stream": stream, "n": n, "stream_options": {"include_usage": True}},
             token_metrics=token_metrics,
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage):
-        with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
+    async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n):
+        with request_vcr.use_cassette(get_cassette_name(stream, n)):
             prompt = "Hey, what is up?"
             resp = await litellm.atext_completion(
                 model="gpt-3.5-turbo",
                 prompt=prompt,
                 stream=stream,
                 n=n,
-                stream_options={"include_usage": include_usage},
+                stream_options={"include_usage": True},
             )
             if stream:
                 output_messages, token_metrics = await async_consume_stream(resp, n, is_completion=True)
@@ -171,16 +196,16 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_
             model_provider="openai",
             input_messages=[{"content": prompt}],
             output_messages=output_messages,
-            metadata={"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}},
+            metadata={"stream": stream, "n": n, "stream_options": {"include_usage": True}},
             token_metrics=token_metrics,
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
     @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)])
     def test_completion_integrations_enabled(
-        self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, include_usage
+        self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n
     ):
-        with request_vcr.use_cassette(get_cassette_name(stream, n, include_usage)):
+        with request_vcr.use_cassette(get_cassette_name(stream, n)):
             import openai
 
             pin = Pin.get_from(litellm)
@@ -192,7 +217,7 @@ def test_completion_integrations_enabled(
                 messages=messages,
                 stream=stream,
                 n=n,
-                stream_options={"include_usage": include_usage},
+                stream_options={"include_usage": True},
             )
             if stream:
                 output_messages, token_metrics = consume_stream(resp, n)
@@ -203,7 +228,7 @@ def test_completion_integrations_enabled(
         # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request
         if stream:
             span = spans[0][0]
-            metadata = {"stream": stream, "n": n, "stream_options": {"include_usage": include_usage}}
+            metadata = {"stream": stream, "n": n, "stream_options": {"include_usage": True}}
             model_name = "gpt-3.5-turbo"
         else:
             span = spans[0][1]
@@ -230,16 +255,16 @@ def test_completion_integrations_enabled(
         assert llmobs_events[0] == expected_event
 
     def test_completion_proxy(
-        self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n, include_usage
+        self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n
     ):
-        with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, include_usage, proxy=True)):
+        with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]
             resp = litellm.completion(
                 model="gpt-3.5-turbo",
                 messages=messages,
                 stream=stream,
                 n=n,
-                stream_options={"include_usage": include_usage},
+                stream_options={"include_usage": True},
                 api_base="http://0.0.0.0:4000",
             )
             if stream:

From 59bc4d4dd45a89023a036df4b08c0482e59c1cfa Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 10 Apr 2025 14:59:23 -0400
Subject: [PATCH 47/61] style and type fixes

---
 ddtrace/contrib/internal/litellm/patch.py    | 4 ++--
 ddtrace/llmobs/_integrations/litellm.py      | 5 ++---
 ddtrace/llmobs/_integrations/utils.py        | 2 +-
 tests/contrib/litellm/test_litellm_llmobs.py | 9 ++-------
 4 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 02157c1690d..541faef1bbe 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -53,7 +53,7 @@ def _traced_completion(litellm, pin, func, instance, args, kwargs, is_completion
         func.__name__,
         model=model,
         host=host,
-        submit_to_llmobs=integration.should_submit_to_llmobs(model, kwargs),
+        submit_to_llmobs=integration.should_submit_to_llmobs(kwargs, model),
     )
     stream = kwargs.get("stream", False)
     resp = None
@@ -86,7 +86,7 @@ async def _traced_acompletion(litellm, pin, func, instance, args, kwargs, is_com
         func.__name__,
         model=model,
         host=host,
-        submit_to_llmobs=integration.should_submit_to_llmobs(model, kwargs),
+        submit_to_llmobs=integration.should_submit_to_llmobs(kwargs, model),
     )
     stream = kwargs.get("stream", False)
     resp = None
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index b40df3a1b18..cad49cd1cb9 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -4,7 +4,6 @@
 from typing import Optional
 from typing import Tuple
 
-import ddtrace
 from ddtrace.internal.utils import get_argument_value
 from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
@@ -42,7 +41,7 @@ def _llmobs_set_tags(
         response: Optional[Any] = None,
         operation: str = "",
     ) -> None:
-        model_name = get_argument_value(args, kwargs, 0, "model", None)
+        model_name = get_argument_value(args, kwargs, 0, "model", False) or ""
         model_name, model_provider = self._model_map.get(model_name, (model_name, ""))
 
         # use Open AI helpers since response format will match Open AI
@@ -74,7 +73,7 @@ def _extract_llmobs_metrics(resp: Any) -> Dict[str, Any]:
             TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens,
         }
 
-    def should_submit_to_llmobs(self, model: Optional[str] = None, kwargs: Dict[str, Any] = None) -> bool:
+    def should_submit_to_llmobs(self, kwargs: Dict[str, Any], model: Optional[str] = None) -> bool:
         """
         Span should be NOT submitted to LLMObs if:
             - base_url is not None: is a proxy request and we will capture the LLM request downstream
diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py
index c1d600a9713..dcd8edd24ba 100644
--- a/ddtrace/llmobs/_integrations/utils.py
+++ b/ddtrace/llmobs/_integrations/utils.py
@@ -433,7 +433,7 @@ def openai_construct_tool_call_from_streamed_chunk(stored_tool_calls, tool_call_
     stored_tool_calls[list_idx]["arguments"] += getattr(function_call, "arguments", "")
 
 
-def openai_construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, str]:
+def openai_construct_message_from_streamed_chunks(streamed_chunks: List[Any]) -> Dict[str, Any]:
     """Constructs a chat completion message dictionary from streamed chunks.
     The resulting message dictionary is of form:
     {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."}
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 2d715c7b21b..eaa87a49bc8 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -77,7 +77,6 @@ def test_completion_exclude_usage(self, litellm, request_vcr, llmobs_events, moc
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-
     def test_completion_with_tools(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n):
         if stream and n > 1:
             pytest.skip(
@@ -202,9 +201,7 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_
         )
 
     @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)])
-    def test_completion_integrations_enabled(
-        self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n
-    ):
+    def test_completion_integrations_enabled(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n):
         with request_vcr.use_cassette(get_cassette_name(stream, n)):
             import openai
 
@@ -254,9 +251,7 @@ def test_completion_integrations_enabled(
         )
         assert llmobs_events[0] == expected_event
 
-    def test_completion_proxy(
-        self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n
-    ):
+    def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n):
         with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]
             resp = litellm.completion(

From 65297a3157ef756e033d447289ada2c525c8e740 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 10 Apr 2025 16:09:52 -0400
Subject: [PATCH 48/61] add typing for message variable

---
 ddtrace/llmobs/_integrations/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py
index dcd8edd24ba..464480bd643 100644
--- a/ddtrace/llmobs/_integrations/utils.py
+++ b/ddtrace/llmobs/_integrations/utils.py
@@ -438,7 +438,7 @@ def openai_construct_message_from_streamed_chunks(streamed_chunks: List[Any]) ->
     The resulting message dictionary is of form:
     {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."}
     """
-    message = {"content": "", "tool_calls": []}
+    message: Dict[str, Any] = {"content": "", "tool_calls": []}
     for chunk in streamed_chunks:
         if getattr(chunk, "usage", None):
             message["usage"] = chunk.usage

From dd01f975f145e44f57a41eaf25698664beffb623 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 10 Apr 2025 16:36:37 -0400
Subject: [PATCH 49/61] ruff fixes

---
 ddtrace/contrib/internal/litellm/patch.py    |  7 +++----
 ddtrace/contrib/internal/litellm/utils.py    | 11 ++++-------
 ddtrace/contrib/internal/openai/utils.py     |  7 ++-----
 ddtrace/llmobs/_integrations/litellm.py      | 12 +++++++-----
 ddtrace/llmobs/_integrations/openai.py       |  9 +--------
 ddtrace/llmobs/_integrations/utils.py        | 15 +++++++++------
 tests/contrib/litellm/conftest.py            | 11 ++++++-----
 tests/contrib/litellm/test_litellm_llmobs.py |  6 ++----
 tests/contrib/litellm/utils.py               |  6 ++++--
 9 files changed, 38 insertions(+), 46 deletions(-)

diff --git a/ddtrace/contrib/internal/litellm/patch.py b/ddtrace/contrib/internal/litellm/patch.py
index 541faef1bbe..6aed01be783 100644
--- a/ddtrace/contrib/internal/litellm/patch.py
+++ b/ddtrace/contrib/internal/litellm/patch.py
@@ -1,17 +1,16 @@
-import os
 import sys
 
 import litellm
 
 from ddtrace import config
+from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMAsyncStream
+from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMStream
 from ddtrace.contrib.trace_utils import unwrap
 from ddtrace.contrib.trace_utils import with_traced_module
 from ddtrace.contrib.trace_utils import wrap
-from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMStream
-from ddtrace.contrib.internal.litellm.utils import TracedLiteLLMAsyncStream
+from ddtrace.internal.utils import get_argument_value
 from ddtrace.llmobs._integrations import LiteLLMIntegration
 from ddtrace.trace import Pin
-from ddtrace.internal.utils import get_argument_value
 
 
 config._add("litellm", {})
diff --git a/ddtrace/contrib/internal/litellm/utils.py b/ddtrace/contrib/internal/litellm/utils.py
index 59c94b7d192..11b996891a4 100644
--- a/ddtrace/contrib/internal/litellm/utils.py
+++ b/ddtrace/contrib/internal/litellm/utils.py
@@ -1,14 +1,11 @@
 import sys
-from typing import Any
-from typing import Dict
-from typing import List
+
 import wrapt
 
 from ddtrace.internal.logger import get_logger
-from ddtrace.llmobs._integrations.utils import (
-    openai_construct_completion_from_streamed_chunks,
-    openai_construct_message_from_streamed_chunks,
-)
+from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks
+from ddtrace.llmobs._integrations.utils import openai_construct_message_from_streamed_chunks
+
 
 log = get_logger(__name__)
 
diff --git a/ddtrace/contrib/internal/openai/utils.py b/ddtrace/contrib/internal/openai/utils.py
index 58d3075cbbc..dca02cb8ed9 100644
--- a/ddtrace/contrib/internal/openai/utils.py
+++ b/ddtrace/contrib/internal/openai/utils.py
@@ -1,16 +1,13 @@
 import re
 import sys
-from typing import Any
 from typing import AsyncGenerator
-from typing import Dict
 from typing import Generator
-from typing import List
 
-from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks
-from ddtrace.llmobs._integrations.utils import openai_construct_message_from_streamed_chunks
 import wrapt
 
 from ddtrace.internal.logger import get_logger
+from ddtrace.llmobs._integrations.utils import openai_construct_completion_from_streamed_chunks
+from ddtrace.llmobs._integrations.utils import openai_construct_message_from_streamed_chunks
 from ddtrace.llmobs._utils import _get_attr
 
 
diff --git a/ddtrace/llmobs/_integrations/litellm.py b/ddtrace/llmobs/_integrations/litellm.py
index cad49cd1cb9..80107500afe 100644
--- a/ddtrace/llmobs/_integrations/litellm.py
+++ b/ddtrace/llmobs/_integrations/litellm.py
@@ -6,18 +6,18 @@
 
 from ddtrace.internal.utils import get_argument_value
 from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
-from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
-from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import METRICS
 from ddtrace.llmobs._constants import MODEL_NAME
 from ddtrace.llmobs._constants import MODEL_PROVIDER
+from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import SPAN_KIND
+from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
+from ddtrace.llmobs._integrations.base import BaseLLMIntegration
 from ddtrace.llmobs._integrations.openai import openai_set_meta_tags_from_chat
 from ddtrace.llmobs._integrations.openai import openai_set_meta_tags_from_completion
 from ddtrace.llmobs._llmobs import LLMObs
 from ddtrace.llmobs._utils import _get_attr
 from ddtrace.trace import Span
-from ddtrace.llmobs._integrations.base import BaseLLMIntegration
 
 
 class LiteLLMIntegration(BaseLLMIntegration):
@@ -77,14 +77,16 @@ def should_submit_to_llmobs(self, kwargs: Dict[str, Any], model: Optional[str] =
         """
         Span should be NOT submitted to LLMObs if:
             - base_url is not None: is a proxy request and we will capture the LLM request downstream
-            - non-streamed request and model provider is OpenAI/AzureOpenAI and the OpenAI integration is enabled: this request will be captured in the OpenAI integration instead
+            - non-streamed request and model provider is OpenAI/AzureOpenAI and the OpenAI integration
+                is enabled: this request will be captured in the OpenAI integration instead
         """
         base_url = kwargs.get("api_base", None)
         if base_url is not None:
             return False
         stream = kwargs.get("stream", False)
         model_lower = model.lower() if model else ""
-        # model provider is unknown until request completes; therefore, this is a best effort attempt to check if model provider is Open AI or Azure
+        # model provider is unknown until request completes; therefore, this is a best effort attempt to check
+        # if model provider is Open AI or Azure
         if (
             any(prefix in model_lower for prefix in ("gpt", "openai", "azure"))
             and not stream
diff --git a/ddtrace/llmobs/_integrations/openai.py b/ddtrace/llmobs/_integrations/openai.py
index 87e6a100e9d..aebe8ae3207 100644
--- a/ddtrace/llmobs/_integrations/openai.py
+++ b/ddtrace/llmobs/_integrations/openai.py
@@ -1,33 +1,26 @@
-import json
 from typing import Any
 from typing import Dict
 from typing import List
 from typing import Optional
 from typing import Tuple
 
-from ddtrace.internal import core
 from ddtrace.internal.constants import COMPONENT
-from ddtrace.internal.utils.formats import format_trace_id
 from ddtrace.internal.utils.version import parse_version
-from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE
-from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED
 from ddtrace.llmobs._constants import INPUT_DOCUMENTS
-from ddtrace.llmobs._constants import INPUT_MESSAGES
 from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import METADATA
 from ddtrace.llmobs._constants import METRICS
 from ddtrace.llmobs._constants import MODEL_NAME
 from ddtrace.llmobs._constants import MODEL_PROVIDER
-from ddtrace.llmobs._constants import OUTPUT_MESSAGES
 from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import OUTPUT_VALUE
 from ddtrace.llmobs._constants import SPAN_KIND
 from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._integrations.base import BaseLLMIntegration
 from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags
+from ddtrace.llmobs._integrations.utils import is_openai_default_base_url
 from ddtrace.llmobs._integrations.utils import openai_set_meta_tags_from_chat
 from ddtrace.llmobs._integrations.utils import openai_set_meta_tags_from_completion
-from ddtrace.llmobs._integrations.utils import is_openai_default_base_url
 from ddtrace.llmobs._utils import _get_attr
 from ddtrace.llmobs.utils import Document
 from ddtrace.trace import Pin
diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py
index 464480bd643..104b9c63f7c 100644
--- a/ddtrace/llmobs/_integrations/utils.py
+++ b/ddtrace/llmobs/_integrations/utils.py
@@ -10,15 +10,18 @@
 from typing import Tuple
 from typing import Union
 from urllib.parse import urlparse
-from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE
-from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED
-from ddtrace.internal import core
-from ddtrace.internal.utils.formats import format_trace_id
 
+from ddtrace._trace.span import Span
+from ddtrace.internal import core
 from ddtrace.internal.logger import get_logger
+from ddtrace.internal.utils.formats import format_trace_id
+from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE
+from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED
+from ddtrace.llmobs._constants import INPUT_MESSAGES
+from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
+from ddtrace.llmobs._constants import METADATA
 from ddtrace.llmobs._constants import OAI_HANDOFF_TOOL_ARG
-from ddtrace._trace.span import Span
-from ddtrace.llmobs._constants import INPUT_MESSAGES, INPUT_TOKENS_METRIC_KEY, METADATA, OUTPUT_MESSAGES
+from ddtrace.llmobs._constants import OUTPUT_MESSAGES
 from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
 from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._utils import _get_attr
diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
index 9415c45d682..7d6d7684914 100644
--- a/tests/contrib/litellm/conftest.py
+++ b/tests/contrib/litellm/conftest.py
@@ -1,13 +1,14 @@
-from ddtrace.llmobs._writer import LLMObsSpanWriter
 import pytest
+
 from ddtrace.contrib.internal.litellm.patch import patch
-from ddtrace.trace import Pin
 from ddtrace.contrib.internal.litellm.patch import unpatch
-from tests.utils import DummyTracer
-from tests.utils import override_global_config
-from tests.contrib.litellm.utils import get_request_vcr
 from ddtrace.llmobs import LLMObs as llmobs_service
 from ddtrace.llmobs._constants import AGENTLESS_BASE_URL
+from ddtrace.llmobs._writer import LLMObsSpanWriter
+from ddtrace.trace import Pin
+from tests.contrib.litellm.utils import get_request_vcr
+from tests.utils import DummyTracer
+from tests.utils import override_global_config
 
 
 class TestLLMObsSpanWriter(LLMObsSpanWriter):
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index eaa87a49bc8..764886d981a 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -1,14 +1,12 @@
-from ddtrace._trace.pin import Pin
-from ddtrace.llmobs._llmobs import LLMObs
 import pytest
 
+from ddtrace._trace.pin import Pin
 from tests.contrib.litellm.utils import async_consume_stream
-from tests.contrib.litellm.utils import get_cassette_name
 from tests.contrib.litellm.utils import consume_stream
+from tests.contrib.litellm.utils import get_cassette_name
 from tests.contrib.litellm.utils import parse_response
 from tests.contrib.litellm.utils import tools
 from tests.llmobs._utils import _expected_llmobs_llm_span_event
-from tests.utils import DummyTracer
 
 
 @pytest.mark.parametrize(
diff --git a/tests/contrib/litellm/utils.py b/tests/contrib/litellm/utils.py
index a9b6309770b..1779f5dfda9 100644
--- a/tests/contrib/litellm/utils.py
+++ b/tests/contrib/litellm/utils.py
@@ -1,6 +1,8 @@
-import vcr
-import os
 import json
+import os
+
+import vcr
+
 
 CASETTE_EXTENSION = ".yaml"
 

From 72a6fe5f29840843aa327cfee75bf03a02ee400c Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Fri, 11 Apr 2025 13:25:46 -0400
Subject: [PATCH 50/61] add mock tracer to openai pin instead of litellm

---
 tests/contrib/litellm/test_litellm_llmobs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 764886d981a..5e75cb00be2 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -203,7 +203,7 @@ def test_completion_integrations_enabled(self, litellm, request_vcr, llmobs_even
         with request_vcr.use_cassette(get_cassette_name(stream, n)):
             import openai
 
-            pin = Pin.get_from(litellm)
+            pin = Pin.get_from(openai)
             pin._override(openai, tracer=mock_tracer)
 
             messages = [{"content": "Hey, what is up?", "role": "user"}]

From 5b2e62e368e06e22fb595dc42d45554e959fdac2 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Sun, 20 Apr 2025 17:55:31 +0200
Subject: [PATCH 51/61] add argument for parametrized config

---
 tests/contrib/litellm/test_litellm_llmobs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 5e75cb00be2..631a0f2cf45 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -199,7 +199,7 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_
         )
 
     @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)])
-    def test_completion_integrations_enabled(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n):
+    def test_completion_integrations_enabled(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, ddtrace_global_config):
         with request_vcr.use_cassette(get_cassette_name(stream, n)):
             import openai
 

From a8ec73551e02cc2f027b978ae37c645140defc03 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Sun, 20 Apr 2025 18:28:09 +0200
Subject: [PATCH 52/61] run black

---
 tests/contrib/litellm/test_litellm_llmobs.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 631a0f2cf45..331c2051c2d 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -199,7 +199,9 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_
         )
 
     @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)])
-    def test_completion_integrations_enabled(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, ddtrace_global_config):
+    def test_completion_integrations_enabled(
+        self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, ddtrace_global_config
+    ):
         with request_vcr.use_cassette(get_cassette_name(stream, n)):
             import openai
 

From bcfbd4ec1d972a5fef6c1caa38e31d51a472a0c5 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Mon, 21 Apr 2025 15:46:39 +0200
Subject: [PATCH 53/61] patch openai manually

---
 tests/contrib/litellm/conftest.py            | 5 ++---
 tests/contrib/litellm/test_litellm_llmobs.py | 7 ++++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
index 7d6d7684914..8cc6417386e 100644
--- a/tests/contrib/litellm/conftest.py
+++ b/tests/contrib/litellm/conftest.py
@@ -56,7 +56,7 @@ def litellm(ddtrace_global_config, monkeypatch):
 
 
 @pytest.fixture
-def litellm_llmobs(mock_tracer, llmobs_span_writer, ddtrace_global_config):
+def litellm_llmobs(mock_tracer, llmobs_span_writer):
     llmobs_service.disable()
     with override_global_config(
         {
@@ -64,8 +64,7 @@ def litellm_llmobs(mock_tracer, llmobs_span_writer, ddtrace_global_config):
             "_dd_api_key": "<not-a-real-key>",
         }
     ):
-        enable_integrations = ddtrace_global_config.get("_llmobs_integrations_enabled", False)
-        llmobs_service.enable(_tracer=mock_tracer, integrations_enabled=enable_integrations)
+        llmobs_service.enable(_tracer=mock_tracer, integrations_enabled=False)
         llmobs_service._instance._llmobs_span_writer = llmobs_span_writer
         yield llmobs_service
     llmobs_service.disable()
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 331c2051c2d..a7b6c7b9a3f 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -1,6 +1,7 @@
 import pytest
 
 from ddtrace._trace.pin import Pin
+from ddtrace._monkey import patch
 from tests.contrib.litellm.utils import async_consume_stream
 from tests.contrib.litellm.utils import consume_stream
 from tests.contrib.litellm.utils import get_cassette_name
@@ -198,11 +199,11 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)])
-    def test_completion_integrations_enabled(
-        self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n, ddtrace_global_config
+    def test_completion_openai_enabled(
+        self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n
     ):
         with request_vcr.use_cassette(get_cassette_name(stream, n)):
+            patch(openai=True)
             import openai
 
             pin = Pin.get_from(openai)

From 22b052de58a1cba41801e6e5459a95637ce46fb4 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 22 Apr 2025 13:54:45 +0200
Subject: [PATCH 54/61] try moving flaky test

---
 tests/contrib/litellm/test_litellm_llmobs.py | 49 ++++++++++++--------
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index a7b6c7b9a3f..17757c56dce 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -199,8 +199,34 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    def test_completion_openai_enabled(
-        self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n
+    def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n):
+        with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)):
+            messages = [{"content": "Hey, what is up?", "role": "user"}]
+            resp = litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                stream=stream,
+                n=n,
+                stream_options={"include_usage": True},
+                api_base="http://0.0.0.0:4000",
+            )
+            if stream:
+                consume_stream(resp, n)
+
+        # client side requests made to the proxy are not submitted to LLMObs
+        assert len(llmobs_events) == 0
+
+@pytest.mark.parametrize(
+    "stream,n",
+    [
+        (True, 1),
+        (True, 2),
+        (False, 1),
+        (False, 2),
+    ],
+)
+def test_completion_openai_enabled(
+        litellm, request_vcr, llmobs_events, mock_tracer, stream, n
     ):
         with request_vcr.use_cassette(get_cassette_name(stream, n)):
             patch(openai=True)
@@ -250,21 +276,4 @@ def test_completion_openai_enabled(
             token_metrics=token_metrics,
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
-        assert llmobs_events[0] == expected_event
-
-    def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n):
-        with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)):
-            messages = [{"content": "Hey, what is up?", "role": "user"}]
-            resp = litellm.completion(
-                model="gpt-3.5-turbo",
-                messages=messages,
-                stream=stream,
-                n=n,
-                stream_options={"include_usage": True},
-                api_base="http://0.0.0.0:4000",
-            )
-            if stream:
-                consume_stream(resp, n)
-
-        # client side requests made to the proxy are not submitted to LLMObs
-        assert len(llmobs_events) == 0
+        assert llmobs_events[0] == expected_event
\ No newline at end of file

From e5c18f03d9d815b0d5b3c5d586a93d6ebb4099ce Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 22 Apr 2025 14:12:39 +0200
Subject: [PATCH 55/61] simplify flaky openai enabled test

---
 tests/contrib/litellm/test_litellm_llmobs.py | 79 ++++++--------------
 1 file changed, 21 insertions(+), 58 deletions(-)

diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 17757c56dce..a081bd5daa2 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -199,8 +199,16 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n):
-        with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)):
+    def test_completion_openai_enabled(
+        self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n
+    ):
+        with request_vcr.use_cassette(get_cassette_name(stream, n)):
+            patch(openai=True)
+            import openai
+
+            pin = Pin.get_from(openai)
+            pin._override(openai, tracer=mock_tracer)
+
             messages = [{"content": "Hey, what is up?", "role": "user"}]
             resp = litellm.completion(
                 model="gpt-3.5-turbo",
@@ -208,33 +216,16 @@ def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_e
                 stream=stream,
                 n=n,
                 stream_options={"include_usage": True},
-                api_base="http://0.0.0.0:4000",
             )
             if stream:
-                consume_stream(resp, n)
-
-        # client side requests made to the proxy are not submitted to LLMObs
-        assert len(llmobs_events) == 0
-
-@pytest.mark.parametrize(
-    "stream,n",
-    [
-        (True, 1),
-        (True, 2),
-        (False, 1),
-        (False, 2),
-    ],
-)
-def test_completion_openai_enabled(
-        litellm, request_vcr, llmobs_events, mock_tracer, stream, n
-    ):
-        with request_vcr.use_cassette(get_cassette_name(stream, n)):
-            patch(openai=True)
-            import openai
-
-            pin = Pin.get_from(openai)
-            pin._override(openai, tracer=mock_tracer)
+                for _ in resp:
+                    pass
 
+        assert len(llmobs_events) == 1
+        assert llmobs_events[0]["name"] == "OpenAI.createChatCompletion" if not stream else "litellm.request"
+    
+    def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n):
+        with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]
             resp = litellm.completion(
                 model="gpt-3.5-turbo",
@@ -242,38 +233,10 @@ def test_completion_openai_enabled(
                 stream=stream,
                 n=n,
                 stream_options={"include_usage": True},
+                api_base="http://0.0.0.0:4000",
             )
             if stream:
-                output_messages, token_metrics = consume_stream(resp, n)
-            else:
-                output_messages, token_metrics = parse_response(resp)
+                consume_stream(resp, n)
 
-        spans = mock_tracer.pop_traces()
-        # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request
-        if stream:
-            span = spans[0][0]
-            metadata = {"stream": stream, "n": n, "stream_options": {"include_usage": True}}
-            model_name = "gpt-3.5-turbo"
-        else:
-            span = spans[0][1]
-            # remove parent span since LiteLLM request span will not be submitted to LLMObs
-            span._parent = None
-            metadata = {
-                "n": n,
-                "extra_body": {},
-                "timeout": 600.0,
-                "extra_headers": {"X-Stainless-Raw-Response": "true"},
-            }
-            model_name = "gpt-3.5-turbo-0125"
-        assert len(llmobs_events) == 1
-        expected_event = _expected_llmobs_llm_span_event(
-            span,
-            model_name=model_name,
-            model_provider="openai",
-            input_messages=messages,
-            output_messages=output_messages,
-            metadata=metadata,
-            token_metrics=token_metrics,
-            tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
-        )
-        assert llmobs_events[0] == expected_event
\ No newline at end of file
+        # client side requests made to the proxy are not submitted to LLMObs
+        assert len(llmobs_events) == 0

From 80ee6c676fd67678a630a6874417ca1d89c2172f Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 22 Apr 2025 18:50:58 +0200
Subject: [PATCH 56/61] revert to passing test

---
 tests/contrib/litellm/test_litellm_llmobs.py | 54 ++++++++++++++++----
 1 file changed, 43 insertions(+), 11 deletions(-)

diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index a081bd5daa2..1114c285382 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -11,12 +11,16 @@
 
 
 @pytest.mark.parametrize(
-    "stream,n",
+    "stream,n, include_usage",
     [
-        (True, 1),
-        (True, 2),
-        (False, 1),
-        (False, 2),
+        (True, 1, True),
+        (True, 2, True),
+        (False, 1, True),
+        (False, 2, True),
+        (True, 1, False),
+        (True, 2, False),
+        (False, 1, False),
+        (False, 2, False),
     ],
 )
 class TestLLMObsLiteLLM:
@@ -199,14 +203,14 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    def test_completion_openai_enabled(
+    @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)])
+    def test_completion_integrations_enabled(
         self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n
     ):
         with request_vcr.use_cassette(get_cassette_name(stream, n)):
-            patch(openai=True)
             import openai
 
-            pin = Pin.get_from(openai)
+            pin = Pin.get_from(litellm)
             pin._override(openai, tracer=mock_tracer)
 
             messages = [{"content": "Hey, what is up?", "role": "user"}]
@@ -218,11 +222,39 @@ def test_completion_openai_enabled(
                 stream_options={"include_usage": True},
             )
             if stream:
-                for _ in resp:
-                    pass
+                output_messages, token_metrics = consume_stream(resp, n)
+            else:
+                output_messages, token_metrics = parse_response(resp)
 
+        spans = mock_tracer.pop_traces()
+        # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request
+        if stream:
+            span = spans[0][0]
+            metadata = {"stream": stream, "n": n, "stream_options": {"include_usage": True}}
+            model_name = "gpt-3.5-turbo"
+        else:
+            span = spans[0][1]
+            # remove parent span since LiteLLM request span will not be submitted to LLMObs
+            span._parent = None
+            metadata = {
+                "n": n,
+                "extra_body": {},
+                "timeout": 600.0,
+                "extra_headers": {"X-Stainless-Raw-Response": "true"},
+            }
+            model_name = "gpt-3.5-turbo-0125"
         assert len(llmobs_events) == 1
-        assert llmobs_events[0]["name"] == "OpenAI.createChatCompletion" if not stream else "litellm.request"
+        expected_event = _expected_llmobs_llm_span_event(
+            span,
+            model_name=model_name,
+            model_provider="openai",
+            input_messages=messages,
+            output_messages=output_messages,
+            metadata=metadata,
+            token_metrics=token_metrics,
+            tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
+        )
+        assert llmobs_events[0] == expected_event
     
     def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n):
         with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)):

From 7813adabf06c2630908f3be59718f599a93e300b Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Tue, 22 Apr 2025 19:03:16 +0200
Subject: [PATCH 57/61] Revert "revert to passing test"

This reverts commit 80ee6c676fd67678a630a6874417ca1d89c2172f.
---
 tests/contrib/litellm/test_litellm_llmobs.py | 54 ++++----------------
 1 file changed, 11 insertions(+), 43 deletions(-)

diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 1114c285382..a081bd5daa2 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -11,16 +11,12 @@
 
 
 @pytest.mark.parametrize(
-    "stream,n, include_usage",
+    "stream,n",
     [
-        (True, 1, True),
-        (True, 2, True),
-        (False, 1, True),
-        (False, 2, True),
-        (True, 1, False),
-        (True, 2, False),
-        (False, 1, False),
-        (False, 2, False),
+        (True, 1),
+        (True, 2),
+        (False, 1),
+        (False, 2),
     ],
 )
 class TestLLMObsLiteLLM:
@@ -203,14 +199,14 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    @pytest.mark.parametrize("ddtrace_global_config", [dict(_llmobs_integrations_enabled=True)])
-    def test_completion_integrations_enabled(
+    def test_completion_openai_enabled(
         self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n
     ):
         with request_vcr.use_cassette(get_cassette_name(stream, n)):
+            patch(openai=True)
             import openai
 
-            pin = Pin.get_from(litellm)
+            pin = Pin.get_from(openai)
             pin._override(openai, tracer=mock_tracer)
 
             messages = [{"content": "Hey, what is up?", "role": "user"}]
@@ -222,39 +218,11 @@ def test_completion_integrations_enabled(
                 stream_options={"include_usage": True},
             )
             if stream:
-                output_messages, token_metrics = consume_stream(resp, n)
-            else:
-                output_messages, token_metrics = parse_response(resp)
+                for _ in resp:
+                    pass
 
-        spans = mock_tracer.pop_traces()
-        # if streaming, grab the LiteLLM request, otherwise, grab the OpenAI request
-        if stream:
-            span = spans[0][0]
-            metadata = {"stream": stream, "n": n, "stream_options": {"include_usage": True}}
-            model_name = "gpt-3.5-turbo"
-        else:
-            span = spans[0][1]
-            # remove parent span since LiteLLM request span will not be submitted to LLMObs
-            span._parent = None
-            metadata = {
-                "n": n,
-                "extra_body": {},
-                "timeout": 600.0,
-                "extra_headers": {"X-Stainless-Raw-Response": "true"},
-            }
-            model_name = "gpt-3.5-turbo-0125"
         assert len(llmobs_events) == 1
-        expected_event = _expected_llmobs_llm_span_event(
-            span,
-            model_name=model_name,
-            model_provider="openai",
-            input_messages=messages,
-            output_messages=output_messages,
-            metadata=metadata,
-            token_metrics=token_metrics,
-            tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
-        )
-        assert llmobs_events[0] == expected_event
+        assert llmobs_events[0]["name"] == "OpenAI.createChatCompletion" if not stream else "litellm.request"
     
     def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n):
         with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)):

From efefbb2097ad8096247bf4908a417b8bd5469b44 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Wed, 23 Apr 2025 13:25:54 +0200
Subject: [PATCH 58/61] use common test llmobs span writer

---
 tests/contrib/litellm/conftest.py | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
index 8cc6417386e..e89039adbbf 100644
--- a/tests/contrib/litellm/conftest.py
+++ b/tests/contrib/litellm/conftest.py
@@ -3,21 +3,11 @@
 from ddtrace.contrib.internal.litellm.patch import patch
 from ddtrace.contrib.internal.litellm.patch import unpatch
 from ddtrace.llmobs import LLMObs as llmobs_service
-from ddtrace.llmobs._constants import AGENTLESS_BASE_URL
-from ddtrace.llmobs._writer import LLMObsSpanWriter
 from ddtrace.trace import Pin
 from tests.contrib.litellm.utils import get_request_vcr
 from tests.utils import DummyTracer
 from tests.utils import override_global_config
-
-
-class TestLLMObsSpanWriter(LLMObsSpanWriter):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.events = []
-
-    def enqueue(self, event):
-        self.events.append(event)
+from tests.llmobs._utils import TestLLMObsSpanWriter
 
 
 def default_global_config():
@@ -31,8 +21,7 @@ def ddtrace_global_config():
 
 @pytest.fixture
 def llmobs_span_writer():
-    agentless_url = "{}.{}".format(AGENTLESS_BASE_URL, "datad0g.com")
-    yield TestLLMObsSpanWriter(is_agentless=True, agentless_url=agentless_url, interval=1.0, timeout=1.0)
+    yield TestLLMObsSpanWriter(is_agentless=True, interval=1.0, timeout=1.0)
 
 
 @pytest.fixture

From 83b68fced34bb74e325906e4362358f1a4f62b5a Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 24 Apr 2025 11:08:30 +0200
Subject: [PATCH 59/61] run black

---
 tests/contrib/litellm/test_litellm_llmobs.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index a081bd5daa2..5e64fc33e91 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -199,9 +199,7 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    def test_completion_openai_enabled(
-        self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n
-    ):
+    def test_completion_openai_enabled(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n):
         with request_vcr.use_cassette(get_cassette_name(stream, n)):
             patch(openai=True)
             import openai
@@ -223,7 +221,7 @@ def test_completion_openai_enabled(
 
         assert len(llmobs_events) == 1
         assert llmobs_events[0]["name"] == "OpenAI.createChatCompletion" if not stream else "litellm.request"
-    
+
     def test_completion_proxy(self, litellm, request_vcr_include_localhost, llmobs_events, mock_tracer, stream, n):
         with request_vcr_include_localhost.use_cassette(get_cassette_name(stream, n, proxy=True)):
             messages = [{"content": "Hey, what is up?", "role": "user"}]

From e982630fdcdcc9bc713e14520e0878b2ebf16911 Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 24 Apr 2025 11:13:35 +0200
Subject: [PATCH 60/61] run ruff

---
 tests/contrib/litellm/conftest.py            | 2 +-
 tests/contrib/litellm/test_litellm_llmobs.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/contrib/litellm/conftest.py b/tests/contrib/litellm/conftest.py
index e89039adbbf..bd7be20fd48 100644
--- a/tests/contrib/litellm/conftest.py
+++ b/tests/contrib/litellm/conftest.py
@@ -5,9 +5,9 @@
 from ddtrace.llmobs import LLMObs as llmobs_service
 from ddtrace.trace import Pin
 from tests.contrib.litellm.utils import get_request_vcr
+from tests.llmobs._utils import TestLLMObsSpanWriter
 from tests.utils import DummyTracer
 from tests.utils import override_global_config
-from tests.llmobs._utils import TestLLMObsSpanWriter
 
 
 def default_global_config():
diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index 5e64fc33e91..d68597730b7 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -1,7 +1,7 @@
 import pytest
 
-from ddtrace._trace.pin import Pin
 from ddtrace._monkey import patch
+from ddtrace._trace.pin import Pin
 from tests.contrib.litellm.utils import async_consume_stream
 from tests.contrib.litellm.utils import consume_stream
 from tests.contrib.litellm.utils import get_cassette_name

From a6d8500a22aee82d2b523029bdb32b29758b2e9a Mon Sep 17 00:00:00 2001
From: Nicole Cybul <nicole.cybul@datadoghq.com>
Date: Thu, 24 Apr 2025 16:57:31 +0200
Subject: [PATCH 61/61] manually override tracer for litellm and openai

---
 tests/contrib/litellm/test_litellm_llmobs.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/tests/contrib/litellm/test_litellm_llmobs.py b/tests/contrib/litellm/test_litellm_llmobs.py
index d68597730b7..587b7837eaa 100644
--- a/tests/contrib/litellm/test_litellm_llmobs.py
+++ b/tests/contrib/litellm/test_litellm_llmobs.py
@@ -8,6 +8,7 @@
 from tests.contrib.litellm.utils import parse_response
 from tests.contrib.litellm.utils import tools
 from tests.llmobs._utils import _expected_llmobs_llm_span_event
+from ddtrace.llmobs._llmobs import LLMObs
 
 
 @pytest.mark.parametrize(
@@ -199,13 +200,18 @@ async def test_atext_completion(self, litellm, request_vcr, llmobs_events, mock_
             tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.litellm"},
         )
 
-    def test_completion_openai_enabled(self, litellm, request_vcr, llmobs_events, mock_tracer, stream, n):
+    def test_completion_openai_enabled(self, litellm, request_vcr, llmobs_span_writer, llmobs_events, mock_tracer, stream, n):
         with request_vcr.use_cassette(get_cassette_name(stream, n)):
             patch(openai=True)
+            LLMObs.enable(integrations_enabled=True, _tracer=mock_tracer)
+            LLMObs._instance._llmobs_span_writer = llmobs_span_writer
             import openai
+            import litellm
 
-            pin = Pin.get_from(openai)
-            pin._override(openai, tracer=mock_tracer)
+            litellm_pin = Pin.get_from(litellm)
+            litellm_pin._override(litellm, tracer=mock_tracer)
+            openai_pin = Pin.get_from(openai)
+            openai_pin._override(openai, tracer=mock_tracer)
 
             messages = [{"content": "Hey, what is up?", "role": "user"}]
             resp = litellm.completion(
@@ -218,6 +224,7 @@ def test_completion_openai_enabled(self, litellm, request_vcr, llmobs_events, mo
             if stream:
                 for _ in resp:
                     pass
+            LLMObs.disable()
 
         assert len(llmobs_events) == 1
         assert llmobs_events[0]["name"] == "OpenAI.createChatCompletion" if not stream else "litellm.request"