BerriAI · keith-decker · Dec 23, 2025
diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py
@@ -59,6 +59,7 @@ def _get_litellm_resource():
     while maintaining backward compatibility with LiteLLM-specific environment variables.
     """
     from opentelemetry.sdk.resources import OTELResourceDetector, Resource
+    from opentelemetry.semconv.schemas import Schemas
 
     # Create base resource attributes with LiteLLM-specific defaults
     # These will be overridden by OTEL_RESOURCE_ATTRIBUTES if present
@@ -72,7 +73,7 @@ def _get_litellm_resource():
     }
 
     # Create base resource with LiteLLM-specific defaults
-    base_resource = Resource.create(base_attributes)  # type: ignore
+    base_resource = Resource.create(base_attributes, Schemas.V1_25_0.value)  # type: ignore
 
     # Create resource from OTEL_RESOURCE_ATTRIBUTES using the detector
     otel_resource_detector = OTELResourceDetector()
@@ -557,9 +558,9 @@ def get_tracer_to_use_for_request(self, kwargs: dict) -> Tracer:
 
     def _get_dynamic_otel_headers_from_kwargs(self, kwargs) -> Optional[dict]:
         """Extract dynamic headers from kwargs if available."""
-        standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
-            kwargs.get("standard_callback_dynamic_params")
-        )
+        standard_callback_dynamic_params: Optional[
+            StandardCallbackDynamicParams
+        ] = kwargs.get("standard_callback_dynamic_params")
 
         if not standard_callback_dynamic_params:
             return None
@@ -764,34 +765,34 @@ def _to_timestamp(val: Optional[Union[datetime, float, str]]) -> Optional[float]
             return float(val)
         # isinstance(val, str) - parse datetime string (with or without microseconds)
         try:
-            return datetime.strptime(val, '%Y-%m-%d %H:%M:%S.%f').timestamp()
+            return datetime.strptime(val, "%Y-%m-%d %H:%M:%S.%f").timestamp()
         except ValueError:
             try:
-                return datetime.strptime(val, '%Y-%m-%d %H:%M:%S').timestamp()
+                return datetime.strptime(val, "%Y-%m-%d %H:%M:%S").timestamp()
             except ValueError:
                 return None
 
     def _record_time_to_first_token_metric(self, kwargs: dict, common_attrs: dict):
         """Record Time to First Token (TTFT) metric for streaming requests."""
         optional_params = kwargs.get("optional_params", {})
         is_streaming = optional_params.get("stream", False)
-        
+
         if not (self._time_to_first_token_histogram and is_streaming):
             return
-        
+
         # Use api_call_start_time for precision (matches Prometheus implementation)
         # This excludes LiteLLM overhead and measures pure LLM API latency
         api_call_start_time = kwargs.get("api_call_start_time", None)
         completion_start_time = kwargs.get("completion_start_time", None)
-        
+
         if api_call_start_time is not None and completion_start_time is not None:
             # Convert to timestamps if needed (handles datetime, float, and string)
             api_call_start_ts = self._to_timestamp(api_call_start_time)
             completion_start_ts = self._to_timestamp(completion_start_time)
-            
+
             if api_call_start_ts is None or completion_start_ts is None:
                 return  # Skip recording if conversion failed
-            
+
             time_to_first_token_seconds = completion_start_ts - api_call_start_ts
             self._time_to_first_token_histogram.record(
                 time_to_first_token_seconds, attributes=common_attrs
@@ -806,38 +807,40 @@ def _record_time_per_output_token_metric(
         common_attrs: dict,
     ):
         """Record Time Per Output Token (TPOT) metric.
-        
+
         Calculated as: generation_time / completion_tokens
         - For streaming: uses end_time - completion_start_time (time to generate all tokens after first)
         - For non-streaming: uses end_time - api_call_start_time (total generation time)
         """
         if not self._time_per_output_token_histogram:
             return
-        
+
         # Get completion tokens from response_obj
         completion_tokens = None
         if response_obj and (usage := response_obj.get("usage")):
             completion_tokens = usage.get("completion_tokens")
-        
+
         if completion_tokens is None or completion_tokens <= 0:
             return
-        
+
         # Calculate generation time
         completion_start_time = kwargs.get("completion_start_time", None)
         api_call_start_time = kwargs.get("api_call_start_time", None)
-        
+
         # Convert end_time to timestamp (handles datetime, float, and string)
         end_time_ts = self._to_timestamp(end_time)
         if end_time_ts is None:
             # Fallback to duration_s if conversion failed
             generation_time_seconds = duration_s
             if generation_time_seconds > 0:
-                time_per_output_token_seconds = generation_time_seconds / completion_tokens
+                time_per_output_token_seconds = (
+                    generation_time_seconds / completion_tokens
+                )
                 self._time_per_output_token_histogram.record(
                     time_per_output_token_seconds, attributes=common_attrs
                 )
             return
-        
+
         if completion_start_time is not None:
             # Streaming: use completion_start_time (when first token arrived)
             # This measures time to generate all tokens after the first one
@@ -858,7 +861,7 @@ def _record_time_per_output_token_metric(
         else:
             # Fallback: use duration_s (already calculated as (end_time - start_time).total_seconds())
             generation_time_seconds = duration_s
-        
+
         if generation_time_seconds > 0:
             time_per_output_token_seconds = generation_time_seconds / completion_tokens
             self._time_per_output_token_histogram.record(
@@ -872,37 +875,37 @@ def _record_response_duration_metric(
         common_attrs: dict,
     ):
         """Record Total Generation Time (response duration) metric.
-        
+
         Measures pure LLM API generation time: end_time - api_call_start_time
         This excludes LiteLLM overhead and measures only the LLM provider's response time.
         Works for both streaming and non-streaming requests.
-        
+
         Mirrors Prometheus's litellm_llm_api_latency_metric.
         Uses kwargs.get("end_time") with fallback to parameter for consistency with Prometheus.
         """
         if not self._response_duration_histogram:
             return
-        
+
         api_call_start_time = kwargs.get("api_call_start_time", None)
         if api_call_start_time is None:
             return
-        
+
         # Use end_time from kwargs if available (matches Prometheus), otherwise use parameter
         # For streaming: end_time is when the stream completes (final chunk received)
         # For non-streaming: end_time is when the response is received
         _end_time = kwargs.get("end_time") or end_time
         if _end_time is None:
             _end_time = datetime.now()
-        
+
         # Convert to timestamps if needed (handles datetime, float, and string)
         api_call_start_ts = self._to_timestamp(api_call_start_time)
         end_time_ts = self._to_timestamp(_end_time)
-        
+
         if api_call_start_ts is None or end_time_ts is None:
             return  # Skip recording if conversion failed
-        
+
         response_duration_seconds = end_time_ts - api_call_start_ts
-        
+
         if response_duration_seconds > 0:
             self._response_duration_histogram.record(
                 response_duration_seconds, attributes=common_attrs
@@ -1263,7 +1266,9 @@ def set_attributes(  # noqa: PLR0915
                 )
                 return
             elif self.callback_name == "weave_otel":
-                from litellm.integrations.weave.weave_otel import set_weave_otel_attributes
+                from litellm.integrations.weave.weave_otel import (
+                    set_weave_otel_attributes,
+                )
 
                 set_weave_otel_attributes(span, kwargs, response_obj)
                 return
@@ -1407,7 +1412,7 @@ def set_attributes(  # noqa: PLR0915
                     value=usage.get("prompt_tokens"),
                 )
 
-                ########################################################################
+            ########################################################################
             ########## LLM Request Medssages / tools / content Attributes ###########
             #########################################################################
 
@@ -1994,7 +1999,7 @@ def create_litellm_proxy_request_started_span(
         """
         Create a span for the received proxy server request.
         """
-        
+
         return self.tracer.start_span(
             name="Received Proxy Server Request",
             start_time=self._to_ns(start_time),

diff --git a/tests/test_litellm/integrations/test_opentelemetry.py b/tests/test_litellm/integrations/test_opentelemetry.py
@@ -15,6 +15,7 @@
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor
 from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+from opentelemetry.semconv.schemas import Schemas
 
 from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
 from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
@@ -448,7 +449,7 @@ def test_get_litellm_resource_with_defaults(
             "deployment.environment": "production",
             "model_id": "litellm",
         }
-        mock_resource_create.assert_called_once_with(expected_attributes)
+        mock_resource_create.assert_called_once_with(expected_attributes, Schemas.V1_25_0.value)
         mock_detector.detect.assert_called_once()
         mock_base_resource.merge.assert_called_once_with(mock_env_resource)
         self.assertEqual(result, mock_merged_resource)
@@ -493,7 +494,7 @@ def test_get_litellm_resource_with_litellm_env_vars(
             "deployment.environment": "staging",
             "model_id": "test-model",
         }
-        mock_resource_create.assert_called_once_with(expected_attributes)
+        mock_resource_create.assert_called_once_with(expected_attributes, Schemas.V1_25_0.value)
         mock_detector.detect.assert_called_once()
         mock_base_resource.merge.assert_called_once_with(mock_env_resource)
         self.assertEqual(result, mock_merged_resource)
@@ -539,7 +540,7 @@ def test_get_litellm_resource_with_otel_resource_attributes(
             "deployment.environment": "production",
             "model_id": "should-be-overridden",
         }
-        mock_resource_create.assert_called_once_with(expected_attributes)
+        mock_resource_create.assert_called_once_with(expected_attributes, Schemas.V1_25_0.value)
         mock_detector.detect.assert_called_once()
         mock_base_resource.merge.assert_called_once_with(mock_env_resource)
         self.assertEqual(result, mock_merged_resource)