From f29f8dd646223d2023c293645dfcf59af5de1546 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Mon, 25 Aug 2025 23:25:46 -0700 Subject: [PATCH 1/5] first draft --- .../examples/manual/main.py | 47 +- .../instrumentation/openai_v2/__init__.py | 17 +- .../instrumentation/openai_v2/patch.py | 249 ++- .../instrumentation/openai_v2/utils.py | 302 ++- .../tests/conftest.py | 48 +- .../tests/test_async_chat_completions.py | 1694 ++++++++++------ .../tests/test_chat_completions.py | 1731 +++++++++++------ .../tests/test_chat_metrics.py | 301 +-- 8 files changed, 2869 insertions(+), 1520 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/main.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/main.py index 4b0c121b7a..1f642ca264 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/main.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/main.py @@ -4,10 +4,13 @@ from openai import OpenAI # NOTE: OpenTelemetry Python Logs and Events APIs are in beta -from opentelemetry import _events, _logs, trace +from opentelemetry import _events, _logs, metrics, trace from opentelemetry.exporter.otlp.proto.grpc._log_exporter import ( OTLPLogExporter, ) +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( + OTLPMetricExporter, +) from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( OTLPSpanExporter, ) @@ -15,6 +18,8 @@ from opentelemetry.sdk._events import EventLoggerProvider from opentelemetry.sdk._logs import LoggerProvider from opentelemetry.sdk._logs.export import BatchLogRecordProcessor +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor @@ -31,22 +36,42 @@ ) _events.set_event_logger_provider(EventLoggerProvider()) +# configure metrics +metrics.set_meter_provider( + MeterProvider( + metric_readers=[ + PeriodicExportingMetricReader( + OTLPMetricExporter(), + ), + ] + ) +) + +from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor + +HTTPXClientInstrumentor().instrument() + # instrument OpenAI OpenAIInstrumentor().instrument() +tracer = trace.get_tracer(__name__) + def main(): client = OpenAI() - chat_completion = client.chat.completions.create( - model=os.getenv("CHAT_MODEL", "gpt-4o-mini"), - messages=[ - { - "role": "user", - "content": "Write a short poem on OpenTelemetry.", - }, - ], - ) - print(chat_completion.choices[0].message.content) + + for u in range(10): + with tracer.start_as_current_span("main"): + chat_completion = client.chat.completions.create( + model=os.getenv("CHAT_MODEL", "gpt-4o-mini"), + messages=[ + { + "role": "user", + "content": "Write a haiku on OpenTelemetry.", + }, + ], + ) + print(chat_completion.choices[0].message.content) if __name__ == "__main__": diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py index ab4b6f9d7b..15b6b8b1ef 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py @@ -47,7 +47,10 @@ from opentelemetry._events import get_event_logger from opentelemetry.instrumentation.instrumentor import BaseInstrumentor from opentelemetry.instrumentation.openai_v2.package import _instruments -from opentelemetry.instrumentation.openai_v2.utils import is_content_enabled +from opentelemetry.instrumentation.openai_v2.utils import ( + is_content_enabled, + is_latest_experimental_enabled, +) from opentelemetry.instrumentation.utils import unwrap from opentelemetry.metrics import get_meter from opentelemetry.semconv.schemas import Schemas @@ -94,7 +97,11 @@ def _instrument(self, **kwargs): module="openai.resources.chat.completions", name="Completions.create", wrapper=chat_completions_create( - tracer, event_logger, instruments, is_content_enabled() + tracer, + event_logger, + instruments, + is_content_enabled(), + is_latest_experimental_enabled(), ), ) @@ -102,7 +109,11 @@ def _instrument(self, **kwargs): module="openai.resources.chat.completions", name="AsyncCompletions.create", wrapper=async_chat_completions_create( - tracer, event_logger, instruments, is_content_enabled() + tracer, + event_logger, + instruments, + is_content_enabled(), + is_latest_experimental_enabled(), ), ) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py index 072365abb7..dee44648be 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py @@ -13,6 +13,7 @@ # limitations under the License. +import json from timeit import default_timer from typing import Optional @@ -29,11 +30,15 @@ from .instruments import Instruments from .utils import ( - choice_to_event, + DataclassEncoder, + OutputMessage, + TextPart, + ToolCallRequestPart, get_llm_request_attributes, handle_span_exception, is_streaming, - message_to_event, + record_input_messages, + record_output_messages, set_span_attribute, ) @@ -43,11 +48,19 @@ def chat_completions_create( event_logger: EventLogger, instruments: Instruments, capture_content: bool, + latest_experimental_enabled: bool, ): """Wrap the `create` method of the `ChatCompletion` class to trace it.""" def traced_method(wrapped, instance, args, kwargs): - span_attributes = {**get_llm_request_attributes(kwargs, instance)} + span_attributes = { + **get_llm_request_attributes( + kwargs, + instance, + GenAIAttributes.GenAiOperationNameValues.CHAT.value, + latest_experimental_enabled, + ) + } span_name = f"{span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]} {span_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL]}" with tracer.start_as_current_span( @@ -56,8 +69,13 @@ def traced_method(wrapped, instance, args, kwargs): attributes=span_attributes, end_on_exit=False, ) as span: - for message in kwargs.get("messages", []): - event_logger.emit(message_to_event(message, capture_content)) + record_input_messages( + kwargs.get("messages", []), + capture_content, + latest_experimental_enabled, + span, + event_logger, + ) start = default_timer() result = None @@ -66,15 +84,24 @@ def traced_method(wrapped, instance, args, kwargs): result = wrapped(*args, **kwargs) if is_streaming(kwargs): return StreamWrapper( - result, span, event_logger, capture_content + result, + span, + event_logger, + capture_content, + latest_experimental_enabled, ) if span.is_recording(): _set_response_attributes( - span, result, event_logger, capture_content + span, result, latest_experimental_enabled ) - for choice in getattr(result, "choices", []): - event_logger.emit(choice_to_event(choice, capture_content)) + record_output_messages( + getattr(result, "choices", []), + capture_content, + latest_experimental_enabled, + span, + event_logger, + ) span.end() return result @@ -91,6 +118,7 @@ def traced_method(wrapped, instance, args, kwargs): result, span_attributes, error_type, + latest_experimental_enabled, ) return traced_method @@ -101,11 +129,19 @@ def async_chat_completions_create( event_logger: EventLogger, instruments: Instruments, capture_content: bool, + latest_experimental_enabled: bool, ): """Wrap the `create` method of the `AsyncChatCompletion` class to trace it.""" async def traced_method(wrapped, instance, args, kwargs): - span_attributes = {**get_llm_request_attributes(kwargs, instance)} + span_attributes = { + **get_llm_request_attributes( + kwargs, + instance, + GenAIAttributes.GenAiOperationNameValues.CHAT.value, + latest_experimental_enabled, + ) + } span_name = f"{span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]} {span_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL]}" with tracer.start_as_current_span( @@ -114,8 +150,13 @@ async def traced_method(wrapped, instance, args, kwargs): attributes=span_attributes, end_on_exit=False, ) as span: - for message in kwargs.get("messages", []): - event_logger.emit(message_to_event(message, capture_content)) + record_input_messages( + kwargs.get("messages", []), + capture_content, + latest_experimental_enabled, + span, + event_logger, + ) start = default_timer() result = None @@ -124,15 +165,24 @@ async def traced_method(wrapped, instance, args, kwargs): result = await wrapped(*args, **kwargs) if is_streaming(kwargs): return StreamWrapper( - result, span, event_logger, capture_content + result, + span, + event_logger, + capture_content, + latest_experimental_enabled, ) if span.is_recording(): _set_response_attributes( - span, result, event_logger, capture_content + span, result, latest_experimental_enabled ) - for choice in getattr(result, "choices", []): - event_logger.emit(choice_to_event(choice, capture_content)) + record_output_messages( + getattr(result, "choices", []), + capture_content, + latest_experimental_enabled, + span, + event_logger, + ) span.end() return result @@ -149,6 +199,7 @@ async def traced_method(wrapped, instance, args, kwargs): result, span_attributes, error_type, + latest_experimental_enabled, ) return traced_method @@ -160,10 +211,16 @@ def _record_metrics( result, span_attributes: dict, error_type: Optional[str], + latest_experimental_enabled: bool, ): + provider_name_attr_name = ( + "gen_ai.provider.name" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_SYSTEM + ) common_attributes = { GenAIAttributes.GEN_AI_OPERATION_NAME: GenAIAttributes.GenAiOperationNameValues.CHAT.value, - GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value, + provider_name_attr_name: GenAIAttributes.GenAiSystemValues.OPENAI.value, GenAIAttributes.GEN_AI_REQUEST_MODEL: span_attributes[ GenAIAttributes.GEN_AI_REQUEST_MODEL ], @@ -175,13 +232,21 @@ def _record_metrics( if result and getattr(result, "model", None): common_attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] = result.model + service_tier_attr_key = ( + "openai.response.service_tier" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER + ) if result and getattr(result, "service_tier", None): - common_attributes[ - GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER - ] = result.service_tier + common_attributes[service_tier_attr_key] = result.service_tier + system_fingerprint_attr_key = ( + "openai.response.system_fingerprint" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SYSTEM_FINGERPRINT + ) if result and getattr(result, "system_fingerprint", None): - common_attributes["gen_ai.openai.response.system_fingerprint"] = ( + common_attributes[system_fingerprint_attr_key] = ( result.system_fingerprint ) @@ -220,9 +285,7 @@ def _record_metrics( ) -def _set_response_attributes( - span, result, event_logger: EventLogger, capture_content: bool -): +def _set_response_attributes(span, result, latest_experimental_enabled: bool): set_span_attribute( span, GenAIAttributes.GEN_AI_RESPONSE_MODEL, result.model ) @@ -241,10 +304,15 @@ def _set_response_attributes( if getattr(result, "id", None): set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_ID, result.id) + service_tier_attr_key = ( + "openai.response.service_tier" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER + ) if getattr(result, "service_tier", None): set_span_attribute( span, - GenAIAttributes.GEN_AI_OPENAI_REQUEST_SERVICE_TIER, + service_tier_attr_key, result.service_tier, ) @@ -313,12 +381,14 @@ def __init__( span: Span, event_logger: EventLogger, capture_content: bool, + latest_experimental_enabled: bool, ): self.stream = stream self.span = span self.choice_buffers = [] self._span_started = False self.capture_content = capture_content + self.latest_experimental_enabled = latest_experimental_enabled self.event_logger = event_logger self.setup() @@ -355,9 +425,14 @@ def cleanup(self): self.completion_tokens, ) + service_tier_attr_key = ( + "openai.response.service_tier" + if self.latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER + ) set_span_attribute( self.span, - GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER, + service_tier_attr_key, self.service_tier, ) @@ -367,48 +442,90 @@ def cleanup(self): self.finish_reasons, ) - for idx, choice in enumerate(self.choice_buffers): - message = {"role": "assistant"} - if self.capture_content and choice.text_content: - message["content"] = "".join(choice.text_content) - if choice.tool_calls_buffers: - tool_calls = [] - for tool_call in choice.tool_calls_buffers: - function = {"name": tool_call.function_name} - if self.capture_content: - function["arguments"] = "".join( - tool_call.arguments + if self.latest_experimental_enabled: + if not self.capture_content: + pass + else: + output_messages = [] + for choice in self.choice_buffers: + message = OutputMessage( + finish_reason=choice.finish_reason or "error", + role="assistant", + ) + output_messages.append(message) + + if self.capture_content and choice.text_content: + message.parts.append( + TextPart(content="".join(choice.text_content)) ) - tool_call_dict = { - "id": tool_call.tool_call_id, - "type": "function", - "function": function, - } - tool_calls.append(tool_call_dict) - message["tool_calls"] = tool_calls - - body = { - "index": idx, - "finish_reason": choice.finish_reason or "error", - "message": message, - } - - event_attributes = { - GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value - } - - # this span is not current, so we need to manually set the context on event - span_ctx = self.span.get_span_context() - self.event_logger.emit( - Event( - name="gen_ai.choice", - attributes=event_attributes, - body=body, - trace_id=span_ctx.trace_id, - span_id=span_ctx.span_id, - trace_flags=span_ctx.trace_flags, + if choice.tool_calls_buffers: + for tool_call in choice.tool_calls_buffers: + part = ToolCallRequestPart( + name=tool_call.function_name, + id=tool_call.tool_call_id, + ) + arguments = "".join(tool_call.arguments) + if arguments: + try: + part.arguments = json.loads(arguments) + except json.JSONDecodeError: + part.arguments = arguments + + message.parts.append(part) + # TODO: config between spans and events + # also if spans and span is not recording, let's not do it all + if self.span.is_recording(): + self.span.set_attribute( + "gen_ai.output.messages", + json.dumps( + output_messages, + ensure_ascii=False, + cls=DataclassEncoder, + ), + ) + else: + for idx, choice in enumerate(self.choice_buffers): + message = {"role": "assistant"} + if self.capture_content and choice.text_content: + message["content"] = "".join(choice.text_content) + if choice.tool_calls_buffers: + tool_calls = [] + for tool_call in choice.tool_calls_buffers: + function = {"name": tool_call.function_name} + if self.capture_content: + function["arguments"] = "".join( + tool_call.arguments + ) + tool_call_dict = { + "id": tool_call.tool_call_id, + "type": "function", + "function": function, + } + tool_calls.append(tool_call_dict) + message["tool_calls"] = tool_calls + + body = { + "index": idx, + "finish_reason": choice.finish_reason or "error", + "message": message, + } + + event_attributes = { + GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value + } + + # this span is not current, so we need to manually set the context on event + span_ctx = self.span.get_span_context() + self.event_logger.emit( + Event( + name="gen_ai.choice", + attributes=event_attributes, + body=body, + trace_id=span_ctx.trace_id, + span_id=span_ctx.span_id, + trace_flags=span_ctx.trace_flags, + ) ) - ) self.span.end() self._span_started = False diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py index f8a837259e..322966ca5f 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py @@ -12,14 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import dataclasses +import json +from collections.abc import Iterable +from dataclasses import dataclass, field +from enum import Enum from os import environ -from typing import Mapping, Optional, Union +from typing import Any, List, Mapping, Optional, Union from urllib.parse import urlparse from httpx import URL from openai import NOT_GIVEN -from opentelemetry._events import Event +from opentelemetry._events import Event, EventLogger from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) @@ -29,11 +34,14 @@ from opentelemetry.semconv.attributes import ( error_attributes as ErrorAttributes, ) +from opentelemetry.trace import Span from opentelemetry.trace.status import Status, StatusCode OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT = ( "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT" ) +# TODO: reuse common code +OTEL_SEMCONV_STABILITY_OPT_IN = "OTEL_SEMCONV_STABILITY_OPT_IN" def is_content_enabled() -> bool: @@ -44,7 +52,16 @@ def is_content_enabled() -> bool: return capture_content.lower() == "true" -def extract_tool_calls(item, capture_content): +def is_latest_experimental_enabled() -> bool: + stability_opt_in = environ.get(OTEL_SEMCONV_STABILITY_OPT_IN, None) + + return ( + stability_opt_in is not None + and stability_opt_in.lower() == "gen_ai_latest_experimental" + ) + + +def extract_tool_calls_old(item, capture_content): tool_calls = get_property_value(item, "tool_calls") if tool_calls is None: return None @@ -78,6 +95,33 @@ def extract_tool_calls(item, capture_content): return calls +def extract_tool_calls_new(tool_calls) -> list["ToolCallRequestPart"]: + parts = [] + for tool_call in tool_calls: + tool_call_part = ToolCallRequestPart() + call_id = get_property_value(tool_call, "id") + if call_id: + tool_call_part.id = call_id + + func = get_property_value(tool_call, "function") + if func: + tool_call_part.function = {} + name = get_property_value(func, "name") + if name: + tool_call_part.name = name + + arguments = get_property_value(func, "arguments") + if arguments: + try: + tool_call_part.arguments = json.loads(arguments) + except json.JSONDecodeError: + tool_call_part.arguments = arguments + + # TODO: support custom + parts.append(tool_call_part) + return parts + + def set_server_address_and_port(client_instance, attributes): base_client = getattr(client_instance, "_client", None) base_url = getattr(base_client, "base_url", None) @@ -104,7 +148,113 @@ def get_property_value(obj, property_name): return getattr(obj, property_name, None) -def message_to_event(message, capture_content): +def record_input_messages( + messages, + capture_content: bool, + latest_experimental_enabled: bool, + span: Span, + event_logger: EventLogger, +): + if latest_experimental_enabled: + if not capture_content: + return + + chat_messages = [] + for message in messages: + role = get_property_value(message, "role") + chat_message = ChatMessage(role=role, parts=[]) + chat_messages.append(chat_message) + + content = get_property_value(message, "content") + + if role == "assistant": + tool_calls = get_property_value(message, "tool_calls") + if tool_calls: + chat_message.parts += extract_tool_calls_new(tool_calls) + if _is_text_part(content): + chat_message.parts.append(TextPart(content=content)) + + elif role == "tool": + tool_call_id = get_property_value(message, "tool_call_id") + chat_message.parts.append( + ToolCallResponsePart(id=tool_call_id, response=content) + ) + + else: + # system, developer, user, fallback + if _is_text_part(content): + chat_message.parts.append(TextPart(content=content)) + # continue? + + # TODO: config between spans and events + # also if spans and span is not recording, let's not do it all + if span.is_recording(): + span.set_attribute( + "gen_ai.input.messages", + json.dumps( + chat_messages, ensure_ascii=False, cls=DataclassEncoder + ), + ) + + else: + for message in messages: + event_logger.emit(_message_to_event(message, capture_content)) + + +def _is_text_part(content: Any) -> bool: + return isinstance(content, str) or ( + isinstance(content, Iterable) + and all(isinstance(part, str) for part in content) + ) + + +def record_output_messages( + choices, + capture_content: bool, + latest_experimental_enabled: bool, + span: Span, + event_logger: EventLogger, +): + if latest_experimental_enabled: + if not capture_content: + return + + output_messages = [] + for choice in choices: + message = OutputMessage( + finish_reason=choice.finish_reason or "error", + role=( + choice.message.role + if choice.message and choice.message.role + else None + ), + ) + output_messages.append(message) + + if choice.message: + tool_calls = get_property_value(choice.message, "tool_calls") + if tool_calls: + message.parts += extract_tool_calls_new(tool_calls) + content = get_property_value(choice.message, "content") + if _is_text_part(content): + message.parts.append(TextPart(content=content)) + + # TODO: config between spans and events + # also if spans and span is not recording, let's not do it all + if span.is_recording(): + span.set_attribute( + "gen_ai.output.messages", + json.dumps( + output_messages, ensure_ascii=False, cls=DataclassEncoder + ), + ) + + else: + for choice in choices: + event_logger.emit(_choice_to_event(choice, capture_content)) + + +def _message_to_event(message, capture_content): attributes = { GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value } @@ -115,7 +265,7 @@ def message_to_event(message, capture_content): if capture_content and content: body["content"] = content if role == "assistant": - tool_calls = extract_tool_calls(message, capture_content) + tool_calls = extract_tool_calls_old(message, capture_content) if tool_calls: body = {"tool_calls": tool_calls} elif role == "tool": @@ -130,7 +280,7 @@ def message_to_event(message, capture_content): ) -def choice_to_event(choice, capture_content): +def _choice_to_event(choice, capture_content): attributes = { GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value } @@ -148,7 +298,7 @@ def choice_to_event(choice, capture_content): else None ) } - tool_calls = extract_tool_calls(choice.message, capture_content) + tool_calls = extract_tool_calls_old(choice.message, capture_content) if tool_calls: message["tool_calls"] = tool_calls content = get_property_value(choice.message, "content") @@ -184,13 +334,21 @@ def non_numerical_value_is_set(value: Optional[Union[bool, str]]): def get_llm_request_attributes( - kwargs, - client_instance, - operation_name=GenAIAttributes.GenAiOperationNameValues.CHAT.value, + kwargs, client_instance, operation_name, latest_experimental_enabled ): + provider_name_attr_key = ( + "gen_ai.provider.name" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_SYSTEM + ) + request_seed_attr_key = ( + "gen_ai.request.seed" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_REQUEST_SEED + ) attributes = { GenAIAttributes.GEN_AI_OPERATION_NAME: operation_name, - GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value, + provider_name_attr_key: GenAIAttributes.GenAiSystemValues.OPENAI.value, GenAIAttributes.GEN_AI_REQUEST_MODEL: kwargs.get("model"), GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE: kwargs.get("temperature"), GenAIAttributes.GEN_AI_REQUEST_TOP_P: kwargs.get("p") @@ -202,26 +360,52 @@ def get_llm_request_attributes( GenAIAttributes.GEN_AI_REQUEST_FREQUENCY_PENALTY: kwargs.get( "frequency_penalty" ), - GenAIAttributes.GEN_AI_OPENAI_REQUEST_SEED: kwargs.get("seed"), + request_seed_attr_key: kwargs.get("seed"), } + output_type_attr_key = ( + "gen_ai.output.type" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT + ) if (response_format := kwargs.get("response_format")) is not None: # response_format may be string or object with a string in the `type` key if isinstance(response_format, Mapping): if ( response_format_type := response_format.get("type") ) is not None: - attributes[ - GenAIAttributes.GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT - ] = response_format_type + if response_format_type == "text": + attributes[output_type_attr_key] = ( + "text" # TODO there should be an enum + ) + elif ( + response_format_type == "json_schema" + or response_format_type == "json_object" + ): + attributes[output_type_attr_key] = "json" + else: + # should never happen with chat completion API + # TODO: internal log + pass else: - attributes[ - GenAIAttributes.GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT - ] = response_format + # should never happen with chat completion API + attributes[output_type_attr_key] = response_format set_server_address_and_port(client_instance, attributes) - service_tier = kwargs.get("service_tier") - attributes[GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER] = ( + + service_tier_attribute_key = ( + "openai.request.service_tier" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_REQUEST_SERVICE_TIER + ) + + extra_body = kwargs.get("extra_body", None) + if extra_body and isinstance(extra_body, dict): + service_tier = extra_body.get("service_tier", None) + else: + service_tier = kwargs.get("service_tier", None) + + attributes[service_tier_attribute_key] = ( service_tier if service_tier != "auto" else None ) @@ -236,3 +420,81 @@ def handle_span_exception(span, error): ErrorAttributes.ERROR_TYPE, type(error).__qualname__ ) span.end() + + +@dataclass +class TextPart: + type: str = "text" + content: str = None + + +@dataclass +class ToolCallRequestPart: + type: str = "tool_call" + id: Optional[str] = None + name: str = "" + arguments: Any = None + + +@dataclass +class ToolCallResponsePart: + type: str = "tool_call_response" + id: Optional[str] = None + response: Any = None + + +@dataclass +class GenericPart: + type: str = "" + + +MessagePart = Union[ + TextPart, + ToolCallRequestPart, + ToolCallResponsePart, + GenericPart, +] + + +class Role(str, Enum): + SYSTEM = "system" + USER = "user" + ASSISTANT = "assistant" + TOOL = "tool" + + +@dataclass +class ChatMessage: + role: Union[Role, str] + parts: List[MessagePart] = field(default_factory=list) + + +@dataclass +class InputMessages: + messages: List[ChatMessage] = field(default_factory=list) + + +class FinishReason(str, Enum): + STOP = "stop" + LENGTH = "length" + CONTENT_FILTER = "content_filter" + TOOL_CALL = "tool_call" + ERROR = "error" + + +@dataclass +class OutputMessage(ChatMessage): + finish_reason: Union[FinishReason, str] = "" + + +@dataclass +class OutputMessages: + messages: List[OutputMessage] = field(default_factory=list) + + +class DataclassEncoder(json.JSONEncoder): + def default(self, obj): + if dataclasses.is_dataclass(obj): + return dataclasses.asdict(obj) + else: + return super(DataclassEncoder, self).default(obj) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py index 87505046aa..c1242929fe 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py @@ -10,6 +10,7 @@ from opentelemetry.instrumentation.openai_v2 import OpenAIInstrumentor from opentelemetry.instrumentation.openai_v2.utils import ( OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, + OTEL_SEMCONV_STABILITY_OPT_IN, ) from opentelemetry.sdk._events import EventLoggerProvider from opentelemetry.sdk._logs import LoggerProvider @@ -104,14 +105,30 @@ def vcr_config(): } +@pytest.fixture(scope="function", params=[True, False]) +def latest_experimental_enabled(request): + return request.param + + @pytest.fixture(scope="function") def instrument_no_content( - tracer_provider, event_logger_provider, meter_provider + tracer_provider, + event_logger_provider, + meter_provider, + latest_experimental_enabled, ): os.environ.update( {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "False"} ) + os.environ.update( + { + OTEL_SEMCONV_STABILITY_OPT_IN: "gen_ai_latest_experimental" + if latest_experimental_enabled + else "" + } + ) + instrumentor = OpenAIInstrumentor() instrumentor.instrument( tracer_provider=tracer_provider, @@ -121,16 +138,28 @@ def instrument_no_content( yield instrumentor os.environ.pop(OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, None) + os.environ.pop(OTEL_SEMCONV_STABILITY_OPT_IN, None) instrumentor.uninstrument() @pytest.fixture(scope="function") def instrument_with_content( - tracer_provider, event_logger_provider, meter_provider + tracer_provider, + event_logger_provider, + meter_provider, + latest_experimental_enabled, ): os.environ.update( {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "True"} ) + + os.environ.update( + { + OTEL_SEMCONV_STABILITY_OPT_IN: "gen_ai_latest_experimental" + if latest_experimental_enabled + else "" + } + ) instrumentor = OpenAIInstrumentor() instrumentor.instrument( tracer_provider=tracer_provider, @@ -140,17 +169,29 @@ def instrument_with_content( yield instrumentor os.environ.pop(OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, None) + os.environ.pop(OTEL_SEMCONV_STABILITY_OPT_IN, None) instrumentor.uninstrument() @pytest.fixture(scope="function") def instrument_with_content_unsampled( - span_exporter, event_logger_provider, meter_provider + span_exporter, + event_logger_provider, + meter_provider, + latest_experimental_enabled, ): os.environ.update( {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "True"} ) + os.environ.update( + { + OTEL_SEMCONV_STABILITY_OPT_IN: "gen_ai_latest_experimental" + if latest_experimental_enabled + else "" + } + ) + tracer_provider = TracerProvider(sampler=ALWAYS_OFF) tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter)) @@ -163,6 +204,7 @@ def instrument_with_content_unsampled( yield instrumentor os.environ.pop(OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, None) + os.environ.pop(OTEL_SEMCONV_STABILITY_OPT_IN, None) instrumentor.uninstrument() diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_async_chat_completions.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_async_chat_completions.py index 468caa232c..617beaa442 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_async_chat_completions.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_async_chat_completions.py @@ -13,13 +13,13 @@ # limitations under the License. # pylint: disable=too-many-locals -from typing import Optional import pytest from openai import APIConnectionError, AsyncOpenAI, NotFoundError -from openai.resources.chat.completions import ChatCompletion -from opentelemetry.sdk.trace import ReadableSpan +from opentelemetry.instrumentation.openai_v2.utils import ( + is_latest_experimental_enabled, +) from opentelemetry.semconv._incubating.attributes import ( error_attributes as ErrorAttributes, ) @@ -32,222 +32,388 @@ from opentelemetry.semconv._incubating.attributes import ( server_attributes as ServerAttributes, ) +from tests.test_utils import ( + assert_all_attributes, + assert_completion_attributes, + assert_log_parent, + assert_messages_attribute, + get_current_weather_tool_definition, + remove_none_values, +) @pytest.mark.vcr() @pytest.mark.asyncio() async def test_async_chat_completion_with_content( - span_exporter, log_exporter, async_openai_client, instrument_with_content + span_exporter, + log_exporter, + async_openai_client, + instrument_with_content, + vcr, ): - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] + with vcr.use_cassette("test_async_chat_completion_with_content.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] - response = await async_openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, stream=False - ) - - spans = span_exporter.get_finished_spans() - assert_completion_attributes(spans[0], llm_model_value, response) - - logs = log_exporter.get_finished_logs() - assert len(logs) == 2 + response = await async_openai_client.chat.completions.create( + messages=messages_value, model=llm_model_value, stream=False + ) - user_message = {"content": messages_value[0]["content"]} - assert_message_in_logs( - logs[0], "gen_ai.user.message", user_message, spans[0] - ) + spans = span_exporter.get_finished_spans() + assert_completion_attributes( + spans[0], llm_model_value, response, latest_experimental_enabled + ) - choice_event = { - "index": 0, - "finish_reason": "stop", - "message": { - "role": "assistant", - "content": response.choices[0].message.content, - }, - } - assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0]) + if latest_experimental_enabled: + assert_messages_attribute( + spans[0].attributes["gen_ai.input.messages"], + [ + { + "role": "user", + "parts": [ + { + "type": "text", + "content": messages_value[0]["content"], + } + ], + } + ], + ) + assert_messages_attribute( + spans[0].attributes["gen_ai.output.messages"], + [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": response.choices[0].message.content, + } + ], + "finish_reason": "stop", + } + ], + ) + else: + logs = log_exporter.get_finished_logs() + assert len(logs) == 2 + + user_message = {"content": messages_value[0]["content"]} + assert_message_in_logs( + logs[0], "gen_ai.user.message", user_message, spans[0] + ) + + choice_event = { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": response.choices[0].message.content, + }, + } + assert_message_in_logs( + logs[1], "gen_ai.choice", choice_event, spans[0] + ) @pytest.mark.vcr() @pytest.mark.asyncio() async def test_async_chat_completion_no_content( - span_exporter, log_exporter, async_openai_client, instrument_no_content + span_exporter, + log_exporter, + async_openai_client, + instrument_no_content, + vcr, ): - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] + with vcr.use_cassette("test_async_chat_completion_no_content.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] - response = await async_openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, stream=False - ) + response = await async_openai_client.chat.completions.create( + messages=messages_value, model=llm_model_value, stream=False + ) - spans = span_exporter.get_finished_spans() - assert_completion_attributes(spans[0], llm_model_value, response) + spans = span_exporter.get_finished_spans() + assert_completion_attributes( + spans[0], llm_model_value, response, latest_experimental_enabled + ) - logs = log_exporter.get_finished_logs() - assert len(logs) == 2 + logs = log_exporter.get_finished_logs() + if latest_experimental_enabled: + assert len(logs) == 0 + assert "gen_ai.input.messages" not in spans[0].attributes + assert "gen_ai.output.messages" not in spans[0].attributes + else: + assert len(logs) == 2 - assert_message_in_logs(logs[0], "gen_ai.user.message", None, spans[0]) + assert_message_in_logs( + logs[0], "gen_ai.user.message", None, spans[0] + ) - choice_event = { - "index": 0, - "finish_reason": "stop", - "message": {"role": "assistant"}, - } - assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0]) + choice_event = { + "index": 0, + "finish_reason": "stop", + "message": {"role": "assistant"}, + } + assert_message_in_logs( + logs[1], "gen_ai.choice", choice_event, spans[0] + ) @pytest.mark.asyncio() async def test_async_chat_completion_bad_endpoint( - span_exporter, instrument_no_content + span_exporter, + instrument_no_content, + vcr, ): - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] - - client = AsyncOpenAI(base_url="http://localhost:4242") - - with pytest.raises(APIConnectionError): - await client.chat.completions.create( - messages=messages_value, - model=llm_model_value, - timeout=0.1, + with vcr.use_cassette("test_async_chat_completion_bad_endpoint.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] + + client = AsyncOpenAI(base_url="http://localhost:4242") + + with pytest.raises(APIConnectionError): + await client.chat.completions.create( + messages=messages_value, + model=llm_model_value, + timeout=0.1, + ) + + spans = span_exporter.get_finished_spans() + assert_all_attributes( + spans[0], + llm_model_value, + latest_experimental_enabled, + server_address="localhost", + ) + assert 4242 == spans[0].attributes[ServerAttributes.SERVER_PORT] + assert ( + "APIConnectionError" + == spans[0].attributes[ErrorAttributes.ERROR_TYPE] ) - - spans = span_exporter.get_finished_spans() - assert_all_attributes( - spans[0], llm_model_value, server_address="localhost" - ) - assert 4242 == spans[0].attributes[ServerAttributes.SERVER_PORT] - assert ( - "APIConnectionError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE] - ) @pytest.mark.vcr() @pytest.mark.asyncio() async def test_async_chat_completion_404( - span_exporter, async_openai_client, instrument_no_content + span_exporter, + async_openai_client, + instrument_no_content, + vcr, ): - llm_model_value = "this-model-does-not-exist" - messages_value = [{"role": "user", "content": "Say this is a test"}] + with vcr.use_cassette("test_async_chat_completion_404.yaml"): + llm_model_value = "this-model-does-not-exist" + messages_value = [{"role": "user", "content": "Say this is a test"}] - with pytest.raises(NotFoundError): - await async_openai_client.chat.completions.create( - messages=messages_value, - model=llm_model_value, - ) + with pytest.raises(NotFoundError): + await async_openai_client.chat.completions.create( + messages=messages_value, + model=llm_model_value, + ) - spans = span_exporter.get_finished_spans() + spans = span_exporter.get_finished_spans() - assert_all_attributes(spans[0], llm_model_value) - assert "NotFoundError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE] + assert_all_attributes( + spans[0], llm_model_value, is_latest_experimental_enabled() + ) + assert ( + "NotFoundError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE] + ) @pytest.mark.vcr() @pytest.mark.asyncio() async def test_async_chat_completion_extra_params( - span_exporter, async_openai_client, instrument_no_content + span_exporter, + async_openai_client, + instrument_no_content, + vcr, ): - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] + with vcr.use_cassette("test_async_chat_completion_extra_params.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] - response = await async_openai_client.chat.completions.create( - messages=messages_value, - model=llm_model_value, - seed=42, - temperature=0.5, - max_tokens=50, - stream=False, - extra_body={"service_tier": "default"}, - response_format={"type": "text"}, - ) + response = await async_openai_client.chat.completions.create( + messages=messages_value, + model=llm_model_value, + seed=42, + temperature=0.5, + max_tokens=50, + stream=False, + extra_body={"service_tier": "default"}, + response_format={"type": "text"}, + ) - spans = span_exporter.get_finished_spans() - assert_completion_attributes(spans[0], llm_model_value, response) - assert ( - spans[0].attributes[GenAIAttributes.GEN_AI_OPENAI_REQUEST_SEED] == 42 - ) - assert ( - spans[0].attributes[GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE] == 0.5 - ) - assert spans[0].attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 50 - assert ( - spans[0].attributes[GenAIAttributes.GEN_AI_OPENAI_REQUEST_SERVICE_TIER] - == "default" - ) - assert ( - spans[0].attributes[ - GenAIAttributes.GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT - ] - == "text" - ) + spans = span_exporter.get_finished_spans() + assert_completion_attributes( + spans[0], llm_model_value, response, latest_experimental_enabled + ) + + request_seed_attr_key = ( + "gen_ai.request.seed" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_REQUEST_SEED + ) + assert spans[0].attributes[request_seed_attr_key] == 42 + assert ( + spans[0].attributes[GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE] + == 0.5 + ) + assert ( + spans[0].attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] + == 50 + ) + + service_tier_attr_key = ( + "openai.request.service_tier" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_REQUEST_SERVICE_TIER + ) + assert spans[0].attributes[service_tier_attr_key] == "default" + + output_type_attr_key = ( + "gen_ai.output.type" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT + ) + assert spans[0].attributes[output_type_attr_key] == "text" @pytest.mark.vcr() @pytest.mark.asyncio() async def test_async_chat_completion_multiple_choices( - span_exporter, log_exporter, async_openai_client, instrument_with_content + span_exporter, + log_exporter, + async_openai_client, + instrument_with_content, + vcr, ): - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] + with vcr.use_cassette("test_async_chat_completion_multiple_choices.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] - response = await async_openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, n=2, stream=False - ) - - spans = span_exporter.get_finished_spans() - assert_completion_attributes(spans[0], llm_model_value, response) - - logs = log_exporter.get_finished_logs() - assert len(logs) == 3 # 1 user message + 2 choice messages - - user_message = {"content": messages_value[0]["content"]} - assert_message_in_logs( - logs[0], "gen_ai.user.message", user_message, spans[0] - ) + response = await async_openai_client.chat.completions.create( + messages=messages_value, model=llm_model_value, n=2, stream=False + ) - choice_event_0 = { - "index": 0, - "finish_reason": "stop", - "message": { - "role": "assistant", - "content": response.choices[0].message.content, - }, - } - assert_message_in_logs(logs[1], "gen_ai.choice", choice_event_0, spans[0]) + spans = span_exporter.get_finished_spans() + assert_completion_attributes( + spans[0], llm_model_value, response, latest_experimental_enabled + ) - choice_event_1 = { - "index": 1, - "finish_reason": "stop", - "message": { - "role": "assistant", - "content": response.choices[1].message.content, - }, - } - assert_message_in_logs(logs[2], "gen_ai.choice", choice_event_1, spans[0]) + if latest_experimental_enabled: + assert_messages_attribute( + spans[0].attributes["gen_ai.output.messages"], + [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": response.choices[0].message.content, + } + ], + "finish_reason": "stop", + }, + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": response.choices[1].message.content, + } + ], + "finish_reason": "stop", + }, + ], + ) + else: + logs = log_exporter.get_finished_logs() + assert len(logs) == 3 # 1 user message + 2 choice messages + + user_message = {"content": messages_value[0]["content"]} + assert_message_in_logs( + logs[0], "gen_ai.user.message", user_message, spans[0] + ) + + choice_event_0 = { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": response.choices[0].message.content, + }, + } + assert_message_in_logs( + logs[1], "gen_ai.choice", choice_event_0, spans[0] + ) + + choice_event_1 = { + "index": 1, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": response.choices[1].message.content, + }, + } + assert_message_in_logs( + logs[2], "gen_ai.choice", choice_event_1, spans[0] + ) @pytest.mark.vcr() @pytest.mark.asyncio() async def test_async_chat_completion_tool_calls_with_content( - span_exporter, log_exporter, async_openai_client, instrument_with_content + span_exporter, + log_exporter, + async_openai_client, + instrument_with_content, + vcr, ): - await chat_completion_tool_call( - span_exporter, log_exporter, async_openai_client, True - ) + with vcr.use_cassette( + "test_async_chat_completion_tool_calls_with_content.yaml" + ): + await chat_completion_tool_call( + span_exporter, + log_exporter, + async_openai_client, + True, + is_latest_experimental_enabled(), + ) @pytest.mark.vcr() @pytest.mark.asyncio() async def test_async_chat_completion_tool_calls_no_content( - span_exporter, log_exporter, async_openai_client, instrument_no_content + span_exporter, + log_exporter, + async_openai_client, + instrument_no_content, + vcr, ): - await chat_completion_tool_call( - span_exporter, log_exporter, async_openai_client, False - ) + with vcr.use_cassette( + "test_async_chat_completion_tool_calls_no_content.yaml" + ): + await chat_completion_tool_call( + span_exporter, + log_exporter, + async_openai_client, + False, + is_latest_experimental_enabled(), + ) async def chat_completion_tool_call( - span_exporter, log_exporter, async_openai_client, expect_content + span_exporter, + log_exporter, + async_openai_client, + expect_content, + latest_experimental_enabled, ): llm_model_value = "gpt-4o-mini" messages_value = [ @@ -302,335 +468,636 @@ async def chat_completion_tool_call( # validate both calls spans = span_exporter.get_finished_spans() assert len(spans) == 2 - assert_completion_attributes(spans[0], llm_model_value, response_0) - assert_completion_attributes(spans[1], llm_model_value, response_1) - - logs = log_exporter.get_finished_logs() - assert len(logs) == 9 # 3 logs for first completion, 6 for second - - # call one - system_message = ( - {"content": messages_value[0]["content"]} if expect_content else None - ) - assert_message_in_logs( - logs[0], "gen_ai.system.message", system_message, spans[0] - ) - - user_message = ( - {"content": messages_value[1]["content"]} if expect_content else None + assert_completion_attributes( + spans[0], llm_model_value, response_0, latest_experimental_enabled ) - assert_message_in_logs( - logs[1], "gen_ai.user.message", user_message, spans[0] + assert_completion_attributes( + spans[1], llm_model_value, response_1, latest_experimental_enabled ) - function_call_0 = {"name": "get_current_weather"} - function_call_1 = {"name": "get_current_weather"} - if expect_content: - function_call_0["arguments"] = ( - response_0.choices[0] - .message.tool_calls[0] - .function.arguments.replace("\n", "") - ) - function_call_1["arguments"] = ( - response_0.choices[0] - .message.tool_calls[1] - .function.arguments.replace("\n", "") - ) + if latest_experimental_enabled: + if not expect_content: + pass + else: + # first call + first_input = [ + { + "role": "system", + "parts": [ + { + "type": "text", + "content": messages_value[0]["content"], + } + ], + }, + { + "role": "user", + "parts": [ + { + "type": "text", + "content": messages_value[1]["content"], + } + ], + }, + ] + assert_messages_attribute( + spans[0].attributes["gen_ai.input.messages"], first_input + ) - choice_event = { - "index": 0, - "finish_reason": "tool_calls", - "message": { - "role": "assistant", - "tool_calls": [ + first_output = [ { - "id": response_0.choices[0].message.tool_calls[0].id, - "type": "function", - "function": function_call_0, + "role": "assistant", + "parts": [ + { + "type": "tool_call", + "id": response_0.choices[0] + .message.tool_calls[0] + .id, + "name": "get_current_weather", + "arguments": {"location": "Seattle, WA"}, + }, + { + "type": "tool_call", + "id": response_0.choices[0] + .message.tool_calls[1] + .id, + "name": "get_current_weather", + "arguments": {"location": "San Francisco, CA"}, + }, + ], + "finish_reason": "tool_calls", + } + ] + assert_messages_attribute( + spans[0].attributes["gen_ai.output.messages"], first_output + ) + + # second call + del first_output[0]["finish_reason"] + second_input = [] + second_input += first_input + second_input += first_output + second_input += [ + { + "role": "tool", + "parts": [ + { + "type": "tool_call_response", + "id": response_0.choices[0] + .message.tool_calls[0] + .id, + "response": tool_call_result_0["content"], + } + ], }, { - "id": response_0.choices[0].message.tool_calls[1].id, - "type": "function", - "function": function_call_1, + "role": "tool", + "parts": [ + { + "type": "tool_call_response", + "id": response_0.choices[0] + .message.tool_calls[1] + .id, + "response": tool_call_result_1["content"], + } + ], }, - ], - }, - } - assert_message_in_logs(logs[2], "gen_ai.choice", choice_event, spans[0]) + ] + + assert_messages_attribute( + spans[1].attributes["gen_ai.input.messages"], second_input + ) + + assert_messages_attribute( + spans[1].attributes["gen_ai.output.messages"], + [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": response_1.choices[ + 0 + ].message.content, + }, + ], + "finish_reason": "stop", + } + ], + ) + else: + logs = log_exporter.get_finished_logs() + assert len(logs) == 9 # 3 logs for first completion, 6 for second + + # call one + system_message = ( + {"content": messages_value[0]["content"]} + if expect_content + else None + ) + assert_message_in_logs( + logs[0], "gen_ai.system.message", system_message, spans[0] + ) - # call two - system_message = ( - {"content": messages_value[0]["content"]} if expect_content else None - ) - assert_message_in_logs( - logs[3], "gen_ai.system.message", system_message, spans[1] - ) + user_message = ( + {"content": messages_value[1]["content"]} + if expect_content + else None + ) + assert_message_in_logs( + logs[1], "gen_ai.user.message", user_message, spans[0] + ) - user_message = ( - {"content": messages_value[1]["content"]} if expect_content else None - ) - assert_message_in_logs( - logs[4], "gen_ai.user.message", user_message, spans[1] - ) + function_call_0 = {"name": "get_current_weather"} + function_call_1 = {"name": "get_current_weather"} + if expect_content: + function_call_0["arguments"] = ( + response_0.choices[0] + .message.tool_calls[0] + .function.arguments.replace("\n", "") + ) + function_call_1["arguments"] = ( + response_0.choices[0] + .message.tool_calls[1] + .function.arguments.replace("\n", "") + ) + + choice_event = { + "index": 0, + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "tool_calls": [ + { + "id": response_0.choices[0].message.tool_calls[0].id, + "type": "function", + "function": function_call_0, + }, + { + "id": response_0.choices[0].message.tool_calls[1].id, + "type": "function", + "function": function_call_1, + }, + ], + }, + } + assert_message_in_logs( + logs[2], "gen_ai.choice", choice_event, spans[0] + ) - assistant_tool_call = {"tool_calls": messages_value[2]["tool_calls"]} - if not expect_content: - assistant_tool_call["tool_calls"][0]["function"]["arguments"] = None - assistant_tool_call["tool_calls"][1]["function"]["arguments"] = None + # call two + system_message = ( + {"content": messages_value[0]["content"]} + if expect_content + else None + ) + assert_message_in_logs( + logs[3], "gen_ai.system.message", system_message, spans[1] + ) - assert_message_in_logs( - logs[5], "gen_ai.assistant.message", assistant_tool_call, spans[1] - ) + user_message = ( + {"content": messages_value[1]["content"]} + if expect_content + else None + ) + assert_message_in_logs( + logs[4], "gen_ai.user.message", user_message, spans[1] + ) - tool_message_0 = { - "id": tool_call_result_0["tool_call_id"], - "content": tool_call_result_0["content"] if expect_content else None, - } + assistant_tool_call = {"tool_calls": messages_value[2]["tool_calls"]} + if not expect_content: + assistant_tool_call["tool_calls"][0]["function"]["arguments"] = ( + None + ) + assistant_tool_call["tool_calls"][1]["function"]["arguments"] = ( + None + ) + + assert_message_in_logs( + logs[5], "gen_ai.assistant.message", assistant_tool_call, spans[1] + ) - assert_message_in_logs( - logs[6], "gen_ai.tool.message", tool_message_0, spans[1] - ) + tool_message_0 = { + "id": tool_call_result_0["tool_call_id"], + "content": tool_call_result_0["content"] + if expect_content + else None, + } - tool_message_1 = { - "id": tool_call_result_1["tool_call_id"], - "content": tool_call_result_1["content"] if expect_content else None, - } + assert_message_in_logs( + logs[6], "gen_ai.tool.message", tool_message_0, spans[1] + ) - assert_message_in_logs( - logs[7], "gen_ai.tool.message", tool_message_1, spans[1] - ) + tool_message_1 = { + "id": tool_call_result_1["tool_call_id"], + "content": tool_call_result_1["content"] + if expect_content + else None, + } - message = { - "role": "assistant", - "content": response_1.choices[0].message.content - if expect_content - else None, - } - choice = { - "index": 0, - "finish_reason": "stop", - "message": message, - } - assert_message_in_logs(logs[8], "gen_ai.choice", choice, spans[1]) + assert_message_in_logs( + logs[7], "gen_ai.tool.message", tool_message_1, spans[1] + ) + + message = { + "role": "assistant", + "content": response_1.choices[0].message.content + if expect_content + else None, + } + choice = { + "index": 0, + "finish_reason": "stop", + "message": message, + } + assert_message_in_logs(logs[8], "gen_ai.choice", choice, spans[1]) @pytest.mark.vcr() @pytest.mark.asyncio() async def test_async_chat_completion_streaming( - span_exporter, log_exporter, async_openai_client, instrument_with_content + span_exporter, + log_exporter, + async_openai_client, + instrument_with_content, + vcr, ): - llm_model_value = "gpt-4" - messages_value = [{"role": "user", "content": "Say this is a test"}] - - kwargs = { - "model": llm_model_value, - "messages": messages_value, - "stream": True, - "stream_options": {"include_usage": True}, - } - - response_stream_usage = None - response_stream_model = None - response_stream_id = None - response_stream_result = "" - response = await async_openai_client.chat.completions.create(**kwargs) - async for chunk in response: - if chunk.choices: - response_stream_result += chunk.choices[0].delta.content or "" - - # get the last chunk - if getattr(chunk, "usage", None): - response_stream_usage = chunk.usage - response_stream_model = chunk.model - response_stream_id = chunk.id - - spans = span_exporter.get_finished_spans() - assert_all_attributes( - spans[0], - llm_model_value, - response_stream_id, - response_stream_model, - response_stream_usage.prompt_tokens, - response_stream_usage.completion_tokens, - ) - - logs = log_exporter.get_finished_logs() - assert len(logs) == 2 - - user_message = {"content": "Say this is a test"} - assert_message_in_logs( - logs[0], "gen_ai.user.message", user_message, spans[0] - ) + with vcr.use_cassette("test_async_chat_completion_streaming.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4" + messages_value = [{"role": "user", "content": "Say this is a test"}] + + kwargs = { + "model": llm_model_value, + "messages": messages_value, + "stream": True, + "stream_options": {"include_usage": True}, + } - choice_event = { - "index": 0, - "finish_reason": "stop", - "message": {"role": "assistant", "content": response_stream_result}, - } - assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0]) + response_stream_usage = None + response_stream_model = None + response_stream_id = None + response_stream_result = "" + response = await async_openai_client.chat.completions.create(**kwargs) + async for chunk in response: + if chunk.choices: + response_stream_result += chunk.choices[0].delta.content or "" + + # get the last chunk + if getattr(chunk, "usage", None): + response_stream_usage = chunk.usage + response_stream_model = chunk.model + response_stream_id = chunk.id + + spans = span_exporter.get_finished_spans() + assert_all_attributes( + spans[0], + llm_model_value, + latest_experimental_enabled, + response_stream_id, + response_stream_model, + response_stream_usage.prompt_tokens, + response_stream_usage.completion_tokens, + ) + if latest_experimental_enabled: + assert_messages_attribute( + spans[0].attributes["gen_ai.input.messages"], + [ + { + "role": "user", + "parts": [ + { + "type": "text", + "content": messages_value[0]["content"], + } + ], + } + ], + ) + assert_messages_attribute( + spans[0].attributes["gen_ai.output.messages"], + [ + { + "role": "assistant", + "parts": [ + {"type": "text", "content": response_stream_result} + ], + "finish_reason": "stop", + } + ], + ) + else: + logs = log_exporter.get_finished_logs() + assert len(logs) == 2 + + user_message = {"content": "Say this is a test"} + assert_message_in_logs( + logs[0], "gen_ai.user.message", user_message, spans[0] + ) + + choice_event = { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": response_stream_result, + }, + } + assert_message_in_logs( + logs[1], "gen_ai.choice", choice_event, spans[0] + ) @pytest.mark.vcr() @pytest.mark.asyncio() async def test_async_chat_completion_streaming_not_complete( - span_exporter, log_exporter, async_openai_client, instrument_with_content + span_exporter, + log_exporter, + async_openai_client, + instrument_with_content, + vcr, ): - llm_model_value = "gpt-4" - messages_value = [{"role": "user", "content": "Say this is a test"}] - - kwargs = { - "model": llm_model_value, - "messages": messages_value, - "stream": True, - } - - response_stream_model = None - response_stream_id = None - response_stream_result = "" - response = await async_openai_client.chat.completions.create(**kwargs) - idx = 0 - async for chunk in response: - if chunk.choices: - response_stream_result += chunk.choices[0].delta.content or "" - if idx == 1: - # fake a stop - break - - if chunk.model: - response_stream_model = chunk.model - if chunk.id: - response_stream_id = chunk.id - idx += 1 - - response.close() - spans = span_exporter.get_finished_spans() - assert_all_attributes( - spans[0], llm_model_value, response_stream_id, response_stream_model - ) - - logs = log_exporter.get_finished_logs() - assert len(logs) == 2 - - user_message = {"content": "Say this is a test"} - assert_message_in_logs( - logs[0], "gen_ai.user.message", user_message, spans[0] - ) + with vcr.use_cassette( + "test_async_chat_completion_streaming_not_complete.yaml" + ): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4" + messages_value = [{"role": "user", "content": "Say this is a test"}] + + kwargs = { + "model": llm_model_value, + "messages": messages_value, + "stream": True, + } - choice_event = { - "index": 0, - "finish_reason": "error", - "message": {"role": "assistant", "content": response_stream_result}, - } - assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0]) + response_stream_model = None + response_stream_id = None + response_stream_result = "" + response = await async_openai_client.chat.completions.create(**kwargs) + idx = 0 + async for chunk in response: + if chunk.choices: + response_stream_result += chunk.choices[0].delta.content or "" + if idx == 1: + # fake a stop + break + + if chunk.model: + response_stream_model = chunk.model + if chunk.id: + response_stream_id = chunk.id + idx += 1 + + response.close() + spans = span_exporter.get_finished_spans() + assert_all_attributes( + spans[0], + llm_model_value, + latest_experimental_enabled, + response_stream_id, + response_stream_model, + ) + if latest_experimental_enabled: + assert_messages_attribute( + spans[0].attributes["gen_ai.input.messages"], + [ + { + "role": "user", + "parts": [ + { + "type": "text", + "content": messages_value[0]["content"], + } + ], + } + ], + ) + assert_messages_attribute( + spans[0].attributes["gen_ai.output.messages"], + [ + { + "role": "assistant", + "parts": [ + {"type": "text", "content": response_stream_result} + ], + "finish_reason": "error", + } + ], + ) + else: + logs = log_exporter.get_finished_logs() + assert len(logs) == 2 + + user_message = {"content": "Say this is a test"} + assert_message_in_logs( + logs[0], "gen_ai.user.message", user_message, spans[0] + ) + + choice_event = { + "index": 0, + "finish_reason": "error", + "message": { + "role": "assistant", + "content": response_stream_result, + }, + } + assert_message_in_logs( + logs[1], "gen_ai.choice", choice_event, spans[0] + ) @pytest.mark.vcr() @pytest.mark.asyncio() async def test_async_chat_completion_multiple_choices_streaming( - span_exporter, log_exporter, async_openai_client, instrument_with_content + span_exporter, + log_exporter, + async_openai_client, + instrument_with_content, + vcr, ): - llm_model_value = "gpt-4o-mini" - messages_value = [ - {"role": "system", "content": "You're a helpful assistant."}, - { - "role": "user", - "content": "What's the weather in Seattle and San Francisco today?", - }, - ] - - response_0 = await async_openai_client.chat.completions.create( - messages=messages_value, - model=llm_model_value, - n=2, - stream=True, - stream_options={"include_usage": True}, - ) - - # two strings for each choice - response_stream_result = ["", ""] - finish_reasons = ["", ""] - async for chunk in response_0: - if chunk.choices: - for choice in chunk.choices: - response_stream_result[choice.index] += ( - choice.delta.content or "" - ) - if choice.finish_reason: - finish_reasons[choice.index] = choice.finish_reason - - # get the last chunk - if getattr(chunk, "usage", None): - response_stream_usage = chunk.usage - response_stream_model = chunk.model - response_stream_id = chunk.id - - # sanity check - assert "stop" == finish_reasons[0] - - spans = span_exporter.get_finished_spans() - assert_all_attributes( - spans[0], - llm_model_value, - response_stream_id, - response_stream_model, - response_stream_usage.prompt_tokens, - response_stream_usage.completion_tokens, - ) - - logs = log_exporter.get_finished_logs() - assert len(logs) == 4 - - system_message = {"content": messages_value[0]["content"]} - assert_message_in_logs( - logs[0], "gen_ai.system.message", system_message, spans[0] - ) + with vcr.use_cassette( + "test_async_chat_completion_multiple_choices_streaming.yaml" + ): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [ + {"role": "system", "content": "You're a helpful assistant."}, + { + "role": "user", + "content": "What's the weather in Seattle and San Francisco today?", + }, + ] - user_message = { - "content": "What's the weather in Seattle and San Francisco today?" - } - assert_message_in_logs( - logs[1], "gen_ai.user.message", user_message, spans[0] - ) + response_0 = await async_openai_client.chat.completions.create( + messages=messages_value, + model=llm_model_value, + n=2, + stream=True, + stream_options={"include_usage": True}, + ) - choice_event_0 = { - "index": 0, - "finish_reason": "stop", - "message": { - "role": "assistant", - "content": "".join(response_stream_result[0]), - }, - } - assert_message_in_logs(logs[2], "gen_ai.choice", choice_event_0, spans[0]) + # two strings for each choice + response_stream_result = ["", ""] + finish_reasons = ["", ""] + async for chunk in response_0: + if chunk.choices: + for choice in chunk.choices: + response_stream_result[choice.index] += ( + choice.delta.content or "" + ) + if choice.finish_reason: + finish_reasons[choice.index] = choice.finish_reason + + # get the last chunk + if getattr(chunk, "usage", None): + response_stream_usage = chunk.usage + response_stream_model = chunk.model + response_stream_id = chunk.id + + # sanity check + assert "stop" == finish_reasons[0] + + spans = span_exporter.get_finished_spans() + assert_all_attributes( + spans[0], + llm_model_value, + latest_experimental_enabled, + response_stream_id, + response_stream_model, + response_stream_usage.prompt_tokens, + response_stream_usage.completion_tokens, + ) - choice_event_1 = { - "index": 1, - "finish_reason": "stop", - "message": { - "role": "assistant", - "content": "".join(response_stream_result[1]), - }, - } - assert_message_in_logs(logs[3], "gen_ai.choice", choice_event_1, spans[0]) + if latest_experimental_enabled: + assert_messages_attribute( + spans[0].attributes["gen_ai.input.messages"], + [ + { + "role": "system", + "parts": [ + { + "type": "text", + "content": messages_value[0]["content"], + } + ], + }, + { + "role": "user", + "parts": [ + { + "type": "text", + "content": messages_value[1]["content"], + } + ], + }, + ], + ) + assert_messages_attribute( + spans[0].attributes["gen_ai.output.messages"], + [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "".join(response_stream_result[0]), + } + ], + "finish_reason": "stop", + }, + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "".join(response_stream_result[1]), + } + ], + "finish_reason": "stop", + }, + ], + ) + else: + logs = log_exporter.get_finished_logs() + assert len(logs) == 4 + + system_message = {"content": messages_value[0]["content"]} + assert_message_in_logs( + logs[0], "gen_ai.system.message", system_message, spans[0] + ) + + user_message = { + "content": "What's the weather in Seattle and San Francisco today?" + } + assert_message_in_logs( + logs[1], "gen_ai.user.message", user_message, spans[0] + ) + + choice_event_0 = { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "".join(response_stream_result[0]), + }, + } + assert_message_in_logs( + logs[2], "gen_ai.choice", choice_event_0, spans[0] + ) + + choice_event_1 = { + "index": 1, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "".join(response_stream_result[1]), + }, + } + assert_message_in_logs( + logs[3], "gen_ai.choice", choice_event_1, spans[0] + ) @pytest.mark.vcr() @pytest.mark.asyncio() async def test_async_chat_completion_multiple_tools_streaming_with_content( - span_exporter, log_exporter, async_openai_client, instrument_with_content + span_exporter, + log_exporter, + async_openai_client, + instrument_with_content, + vcr, ): - await async_chat_completion_multiple_tools_streaming( - span_exporter, log_exporter, async_openai_client, True - ) + with vcr.use_cassette( + "test_async_chat_completion_multiple_tools_streaming_with_content.yaml" + ): + await async_chat_completion_multiple_tools_streaming( + span_exporter, + log_exporter, + async_openai_client, + True, + is_latest_experimental_enabled(), + ) @pytest.mark.vcr() @pytest.mark.asyncio() async def test_async_chat_completion_multiple_tools_streaming_no_content( - span_exporter, log_exporter, async_openai_client, instrument_no_content + span_exporter, + log_exporter, + async_openai_client, + instrument_no_content, + vcr, ): - await async_chat_completion_multiple_tools_streaming( - span_exporter, log_exporter, async_openai_client, False - ) + with vcr.use_cassette( + "test_async_chat_completion_multiple_tools_streaming_no_content.yaml" + ): + await async_chat_completion_multiple_tools_streaming( + span_exporter, + log_exporter, + async_openai_client, + False, + is_latest_experimental_enabled(), + ) @pytest.mark.vcr() @@ -640,50 +1107,73 @@ async def test_async_chat_completion_streaming_unsampled( log_exporter, async_openai_client, instrument_with_content_unsampled, + vcr, ): - llm_model_value = "gpt-4" - messages_value = [{"role": "user", "content": "Say this is a test"}] - - kwargs = { - "model": llm_model_value, - "messages": messages_value, - "stream": True, - "stream_options": {"include_usage": True}, - } - - response_stream_result = "" - response = await async_openai_client.chat.completions.create(**kwargs) - async for chunk in response: - if chunk.choices: - response_stream_result += chunk.choices[0].delta.content or "" - - spans = span_exporter.get_finished_spans() - assert len(spans) == 0 + with vcr.use_cassette( + "test_async_chat_completion_streaming_unsampled.yaml" + ): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4" + messages_value = [{"role": "user", "content": "Say this is a test"}] + + kwargs = { + "model": llm_model_value, + "messages": messages_value, + "stream": True, + "stream_options": {"include_usage": True}, + } - logs = log_exporter.get_finished_logs() - assert len(logs) == 2 + response_stream_result = "" + response = await async_openai_client.chat.completions.create(**kwargs) + async for chunk in response: + if chunk.choices: + response_stream_result += chunk.choices[0].delta.content or "" - user_message = {"content": "Say this is a test"} - assert_message_in_logs(logs[0], "gen_ai.user.message", user_message, None) + spans = span_exporter.get_finished_spans() + assert len(spans) == 0 - choice_event = { - "index": 0, - "finish_reason": "stop", - "message": {"role": "assistant", "content": response_stream_result}, - } - assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, None) + logs = log_exporter.get_finished_logs() + if latest_experimental_enabled: + assert len(logs) == 0 + # TODO: new event + else: + assert len(logs) == 2 + + user_message = {"content": "Say this is a test"} + assert_message_in_logs( + logs[0], "gen_ai.user.message", user_message, None + ) + + choice_event = { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": response_stream_result, + }, + } + assert_message_in_logs( + logs[1], "gen_ai.choice", choice_event, None + ) - assert logs[0].log_record.trace_id is not None - assert logs[0].log_record.span_id is not None - assert logs[0].log_record.trace_flags == 0 + assert logs[0].log_record.trace_id is not None + assert logs[0].log_record.span_id is not None + assert logs[0].log_record.trace_flags == 0 - assert logs[0].log_record.trace_id == logs[1].log_record.trace_id - assert logs[0].log_record.span_id == logs[1].log_record.span_id - assert logs[0].log_record.trace_flags == logs[1].log_record.trace_flags + assert logs[0].log_record.trace_id == logs[1].log_record.trace_id + assert logs[0].log_record.span_id == logs[1].log_record.span_id + assert ( + logs[0].log_record.trace_flags + == logs[1].log_record.trace_flags + ) async def async_chat_completion_multiple_tools_streaming( - span_exporter, log_exporter, async_openai_client, expect_content + span_exporter, + log_exporter, + async_openai_client, + expect_content, + latest_experimental_enabled, ): llm_model_value = "gpt-4o-mini" messages_value = [ @@ -735,68 +1225,127 @@ async def async_chat_completion_multiple_tools_streaming( assert_all_attributes( spans[0], llm_model_value, + latest_experimental_enabled, response_stream_id, response_stream_model, response_stream_usage.prompt_tokens, response_stream_usage.completion_tokens, ) - logs = log_exporter.get_finished_logs() - assert len(logs) == 3 + if latest_experimental_enabled: + if expect_content: + # first call + first_input = [ + { + "role": "system", + "parts": [ + { + "type": "text", + "content": messages_value[0]["content"], + } + ], + }, + { + "role": "user", + "parts": [ + { + "type": "text", + "content": messages_value[1]["content"], + } + ], + }, + ] + assert_messages_attribute( + spans[0].attributes["gen_ai.input.messages"], first_input + ) - system_message = ( - {"content": messages_value[0]["content"]} if expect_content else None - ) - assert_message_in_logs( - logs[0], "gen_ai.system.message", system_message, spans[0] - ) + first_output = [ + { + "role": "assistant", + "parts": [ + { + "type": "tool_call", + "id": tool_call_ids[0], + "name": "get_current_weather", + "arguments": {"location": "Seattle, WA"}, + }, + { + "type": "tool_call", + "id": tool_call_ids[1], + "name": "get_current_weather", + "arguments": {"location": "San Francisco, CA"}, + }, + ], + "finish_reason": "tool_calls", + } + ] + assert_messages_attribute( + spans[0].attributes["gen_ai.output.messages"], first_output + ) + else: + logs = log_exporter.get_finished_logs() + assert len(logs) == 3 - user_message = ( - {"content": "What's the weather in Seattle and San Francisco today?"} - if expect_content - else None - ) - assert_message_in_logs( - logs[1], "gen_ai.user.message", user_message, spans[0] - ) + system_message = ( + {"content": messages_value[0]["content"]} + if expect_content + else None + ) + assert_message_in_logs( + logs[0], "gen_ai.system.message", system_message, spans[0] + ) - choice_event = { - "index": 0, - "finish_reason": "tool_calls", - "message": { - "role": "assistant", - "tool_calls": [ - { - "id": tool_call_ids[0], - "type": "function", - "function": { - "name": tool_names[0], - "arguments": ( - tool_args[0].replace("\n", "") - if expect_content - else None - ), + user_message = ( + { + "content": "What's the weather in Seattle and San Francisco today?" + } + if expect_content + else None + ) + assert_message_in_logs( + logs[1], "gen_ai.user.message", user_message, spans[0] + ) + + choice_event = { + "index": 0, + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "tool_calls": [ + { + "id": tool_call_ids[0], + "type": "function", + "function": { + "name": tool_names[0], + "arguments": ( + tool_args[0].replace("\n", "") + if expect_content + else None + ), + }, }, - }, - { - "id": tool_call_ids[1], - "type": "function", - "function": { - "name": tool_names[1], - "arguments": ( - tool_args[1].replace("\n", "") - if expect_content - else None - ), + { + "id": tool_call_ids[1], + "type": "function", + "function": { + "name": tool_names[1], + "arguments": ( + tool_args[1].replace("\n", "") + if expect_content + else None + ), + }, }, - }, - ], - }, - } - assert_message_in_logs(logs[2], "gen_ai.choice", choice_event, spans[0]) + ], + }, + } + assert_message_in_logs( + logs[2], "gen_ai.choice", choice_event, spans[0] + ) def assert_message_in_logs(log, event_name, expected_content, parent_span): + # TODO: switch to top-level eventName under latest-experimental flag assert log.log_record.attributes[EventAttributes.EVENT_NAME] == event_name assert ( log.log_record.attributes[GenAIAttributes.GEN_AI_SYSTEM] @@ -811,124 +1360,3 @@ def assert_message_in_logs(log, event_name, expected_content, parent_span): expected_content ) assert_log_parent(log, parent_span) - - -def remove_none_values(body): - result = {} - for key, value in body.items(): - if value is None: - continue - if isinstance(value, dict): - result[key] = remove_none_values(value) - elif isinstance(value, list): - result[key] = [remove_none_values(i) for i in value] - else: - result[key] = value - return result - - -def assert_completion_attributes( - span: ReadableSpan, - request_model: str, - response: ChatCompletion, - operation_name: str = "chat", - server_address: str = "api.openai.com", -): - return assert_all_attributes( - span, - request_model, - response.id, - response.model, - response.usage.prompt_tokens, - response.usage.completion_tokens, - operation_name, - server_address, - ) - - -def assert_all_attributes( - span: ReadableSpan, - request_model: str, - response_id: str = None, - response_model: str = None, - input_tokens: Optional[int] = None, - output_tokens: Optional[int] = None, - operation_name: str = "chat", - server_address: str = "api.openai.com", -): - assert span.name == f"{operation_name} {request_model}" - assert ( - operation_name - == span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] - ) - assert ( - GenAIAttributes.GenAiSystemValues.OPENAI.value - == span.attributes[GenAIAttributes.GEN_AI_SYSTEM] - ) - assert ( - request_model == span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] - ) - if response_model: - assert ( - response_model - == span.attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] - ) - else: - assert GenAIAttributes.GEN_AI_RESPONSE_MODEL not in span.attributes - - if response_id: - assert ( - response_id == span.attributes[GenAIAttributes.GEN_AI_RESPONSE_ID] - ) - else: - assert GenAIAttributes.GEN_AI_RESPONSE_ID not in span.attributes - - if input_tokens: - assert ( - input_tokens - == span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] - ) - else: - assert GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS not in span.attributes - - if output_tokens: - assert ( - output_tokens - == span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] - ) - else: - assert ( - GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS not in span.attributes - ) - - assert server_address == span.attributes[ServerAttributes.SERVER_ADDRESS] - - -def assert_log_parent(log, span): - if span: - assert log.log_record.trace_id == span.get_span_context().trace_id - assert log.log_record.span_id == span.get_span_context().span_id - assert ( - log.log_record.trace_flags == span.get_span_context().trace_flags - ) - - -def get_current_weather_tool_definition(): - return { - "type": "function", - "function": { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. Boston, MA", - }, - }, - "required": ["location"], - "additionalProperties": False, - }, - }, - } diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py index 914d5b5b98..b676f37c68 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py @@ -13,13 +13,13 @@ # limitations under the License. # pylint: disable=too-many-locals -from typing import Optional import pytest from openai import APIConnectionError, NotFoundError, OpenAI -from openai.resources.chat.completions import ChatCompletion -from opentelemetry.sdk.trace import ReadableSpan +from opentelemetry.instrumentation.openai_v2.utils import ( + is_latest_experimental_enabled, +) from opentelemetry.semconv._incubating.attributes import ( error_attributes as ErrorAttributes, ) @@ -33,254 +33,419 @@ server_attributes as ServerAttributes, ) from opentelemetry.semconv._incubating.metrics import gen_ai_metrics +from tests.test_utils import ( + assert_all_attributes, + assert_completion_attributes, + assert_log_parent, + assert_messages_attribute, + get_current_weather_tool_definition, +) @pytest.mark.vcr() def test_chat_completion_with_content( - span_exporter, log_exporter, openai_client, instrument_with_content + span_exporter, + log_exporter, + openai_client, + instrument_with_content, + vcr, ): - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] + with vcr.use_cassette("test_chat_completion_with_content.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] - response = openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, stream=False - ) - - spans = span_exporter.get_finished_spans() - assert_completion_attributes(spans[0], llm_model_value, response) - - logs = log_exporter.get_finished_logs() - assert len(logs) == 2 + response = openai_client.chat.completions.create( + messages=messages_value, model=llm_model_value, stream=False + ) - user_message = {"content": messages_value[0]["content"]} - assert_message_in_logs( - logs[0], "gen_ai.user.message", user_message, spans[0] - ) + spans = span_exporter.get_finished_spans() + assert_completion_attributes( + spans[0], llm_model_value, response, latest_experimental_enabled + ) - choice_event = { - "index": 0, - "finish_reason": "stop", - "message": { - "role": "assistant", - "content": response.choices[0].message.content, - }, - } - assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0]) + if latest_experimental_enabled: + assert_messages_attribute( + spans[0].attributes["gen_ai.input.messages"], + [ + { + "role": "user", + "parts": [ + { + "type": "text", + "content": messages_value[0]["content"], + } + ], + } + ], + ) + assert_messages_attribute( + spans[0].attributes["gen_ai.output.messages"], + [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": response.choices[0].message.content, + } + ], + "finish_reason": "stop", + } + ], + ) + else: + logs = log_exporter.get_finished_logs() + assert len(logs) == 2 + + user_message = {"content": messages_value[0]["content"]} + assert_message_in_logs( + logs[0], "gen_ai.user.message", user_message, spans[0] + ) + + choice_event = { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": response.choices[0].message.content, + }, + } + assert_message_in_logs( + logs[1], "gen_ai.choice", choice_event, spans[0] + ) @pytest.mark.vcr() def test_chat_completion_no_content( - span_exporter, log_exporter, openai_client, instrument_no_content + span_exporter, + log_exporter, + openai_client, + instrument_no_content, + vcr, ): - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] + with vcr.use_cassette("test_chat_completion_no_content.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] - response = openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, stream=False - ) + response = openai_client.chat.completions.create( + messages=messages_value, model=llm_model_value, stream=False + ) - spans = span_exporter.get_finished_spans() - assert_completion_attributes(spans[0], llm_model_value, response) + spans = span_exporter.get_finished_spans() + assert_completion_attributes( + spans[0], llm_model_value, response, latest_experimental_enabled + ) - logs = log_exporter.get_finished_logs() - assert len(logs) == 2 + logs = log_exporter.get_finished_logs() + if latest_experimental_enabled: + assert len(logs) == 0 + assert "gen_ai.input.messages" not in spans[0].attributes + assert "gen_ai.output.messages" not in spans[0].attributes + else: + assert len(logs) == 2 - assert_message_in_logs(logs[0], "gen_ai.user.message", None, spans[0]) + assert_message_in_logs( + logs[0], "gen_ai.user.message", None, spans[0] + ) - choice_event = { - "index": 0, - "finish_reason": "stop", - "message": {"role": "assistant"}, - } - assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0]) + choice_event = { + "index": 0, + "finish_reason": "stop", + "message": {"role": "assistant"}, + } + assert_message_in_logs( + logs[1], "gen_ai.choice", choice_event, spans[0] + ) def test_chat_completion_bad_endpoint( - span_exporter, metric_reader, instrument_no_content + span_exporter, + metric_reader, + instrument_no_content, + vcr, ): - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] - - client = OpenAI(base_url="http://localhost:4242") - - with pytest.raises(APIConnectionError): - client.chat.completions.create( - messages=messages_value, - model=llm_model_value, - timeout=0.1, + with vcr.use_cassette("test_chat_completion_bad_endpoint.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] + + client = OpenAI(base_url="http://localhost:4242") + + with pytest.raises(APIConnectionError): + client.chat.completions.create( + messages=messages_value, + model=llm_model_value, + timeout=0.1, + ) + + spans = span_exporter.get_finished_spans() + assert_all_attributes( + spans[0], + llm_model_value, + latest_experimental_enabled, + server_address="localhost", + ) + assert 4242 == spans[0].attributes[ServerAttributes.SERVER_PORT] + assert ( + "APIConnectionError" + == spans[0].attributes[ErrorAttributes.ERROR_TYPE] ) - spans = span_exporter.get_finished_spans() - assert_all_attributes( - spans[0], llm_model_value, server_address="localhost" - ) - assert 4242 == spans[0].attributes[ServerAttributes.SERVER_PORT] - assert ( - "APIConnectionError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE] - ) - - metrics = metric_reader.get_metrics_data().resource_metrics - assert len(metrics) == 1 - - metric_data = metrics[0].scope_metrics[0].metrics - duration_metric = next( - ( - m - for m in metric_data - if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION - ), - None, - ) - assert duration_metric is not None - assert duration_metric.data.data_points[0].sum > 0 - assert ( - duration_metric.data.data_points[0].attributes[ - ErrorAttributes.ERROR_TYPE - ] - == "APIConnectionError" - ) + metrics = metric_reader.get_metrics_data().resource_metrics + assert len(metrics) == 1 + + metric_data = metrics[0].scope_metrics[0].metrics + duration_metric = next( + ( + m + for m in metric_data + if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION + ), + None, + ) + assert duration_metric is not None + assert duration_metric.data.data_points[0].sum > 0 + assert ( + duration_metric.data.data_points[0].attributes[ + ErrorAttributes.ERROR_TYPE + ] + == "APIConnectionError" + ) @pytest.mark.vcr() def test_chat_completion_404( - span_exporter, openai_client, metric_reader, instrument_no_content + span_exporter, + openai_client, + metric_reader, + instrument_no_content, + vcr, ): - llm_model_value = "this-model-does-not-exist" - messages_value = [{"role": "user", "content": "Say this is a test"}] + with vcr.use_cassette("test_chat_completion_404.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "this-model-does-not-exist" + messages_value = [{"role": "user", "content": "Say this is a test"}] - with pytest.raises(NotFoundError): - openai_client.chat.completions.create( - messages=messages_value, - model=llm_model_value, - ) + with pytest.raises(NotFoundError): + openai_client.chat.completions.create( + messages=messages_value, + model=llm_model_value, + ) - spans = span_exporter.get_finished_spans() + spans = span_exporter.get_finished_spans() - assert_all_attributes(spans[0], llm_model_value) - assert "NotFoundError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE] - - metrics = metric_reader.get_metrics_data().resource_metrics - assert len(metrics) == 1 + assert_all_attributes( + spans[0], llm_model_value, latest_experimental_enabled + ) + assert ( + "NotFoundError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE] + ) - metric_data = metrics[0].scope_metrics[0].metrics - duration_metric = next( - ( - m - for m in metric_data - if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION - ), - None, - ) - assert duration_metric is not None - assert duration_metric.data.data_points[0].sum > 0 - assert ( - duration_metric.data.data_points[0].attributes[ - ErrorAttributes.ERROR_TYPE - ] - == "NotFoundError" - ) + metrics = metric_reader.get_metrics_data().resource_metrics + assert len(metrics) == 1 + + metric_data = metrics[0].scope_metrics[0].metrics + duration_metric = next( + ( + m + for m in metric_data + if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION + ), + None, + ) + assert duration_metric is not None + assert duration_metric.data.data_points[0].sum > 0 + assert ( + duration_metric.data.data_points[0].attributes[ + ErrorAttributes.ERROR_TYPE + ] + == "NotFoundError" + ) @pytest.mark.vcr() def test_chat_completion_extra_params( - span_exporter, openai_client, instrument_no_content + span_exporter, + openai_client, + instrument_no_content, + vcr, ): - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] + with vcr.use_cassette("test_chat_completion_extra_params.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] - response = openai_client.chat.completions.create( - messages=messages_value, - model=llm_model_value, - seed=42, - temperature=0.5, - max_tokens=50, - stream=False, - extra_body={"service_tier": "default"}, - response_format={"type": "text"}, - ) - - spans = span_exporter.get_finished_spans() - assert_completion_attributes(spans[0], llm_model_value, response) - assert ( - spans[0].attributes[GenAIAttributes.GEN_AI_OPENAI_REQUEST_SEED] == 42 - ) - assert ( - spans[0].attributes[GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE] == 0.5 - ) - assert spans[0].attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 50 - assert ( - spans[0].attributes[GenAIAttributes.GEN_AI_OPENAI_REQUEST_SERVICE_TIER] - == "default" - ) - assert ( - spans[0].attributes[ - GenAIAttributes.GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT - ] - == "text" - ) + response = openai_client.chat.completions.create( + messages=messages_value, + model=llm_model_value, + seed=42, + temperature=0.5, + max_tokens=50, + stream=False, + extra_body={"service_tier": "default"}, + response_format={"type": "text"}, + ) + spans = span_exporter.get_finished_spans() + assert_completion_attributes( + spans[0], llm_model_value, response, latest_experimental_enabled + ) -@pytest.mark.vcr() -def test_chat_completion_multiple_choices( - span_exporter, log_exporter, openai_client, instrument_with_content -): - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] + request_seed_attr_key = ( + "gen_ai.request.seed" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_REQUEST_SEED + ) + assert spans[0].attributes[request_seed_attr_key] == 42 + assert ( + spans[0].attributes[GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE] + == 0.5 + ) + assert ( + spans[0].attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] + == 50 + ) - response = openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, n=2, stream=False - ) + service_tier_attr_key = ( + "openai.request.service_tier" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_REQUEST_SERVICE_TIER + ) + assert spans[0].attributes[service_tier_attr_key] == "default" - spans = span_exporter.get_finished_spans() - assert_completion_attributes(spans[0], llm_model_value, response) + output_type_attr_key = ( + "gen_ai.output.type" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT + ) + assert spans[0].attributes[output_type_attr_key] == "text" - logs = log_exporter.get_finished_logs() - assert len(logs) == 3 # 1 user message + 2 choice messages - user_message = {"content": messages_value[0]["content"]} - assert_message_in_logs( - logs[0], "gen_ai.user.message", user_message, spans[0] - ) +@pytest.mark.vcr() +def test_chat_completion_multiple_choices( + span_exporter, + log_exporter, + openai_client, + instrument_with_content, + vcr, +): + with vcr.use_cassette("test_chat_completion_multiple_choices.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] - choice_event_0 = { - "index": 0, - "finish_reason": "stop", - "message": { - "role": "assistant", - "content": response.choices[0].message.content, - }, - } - assert_message_in_logs(logs[1], "gen_ai.choice", choice_event_0, spans[0]) + response = openai_client.chat.completions.create( + messages=messages_value, model=llm_model_value, n=2, stream=False + ) - choice_event_1 = { - "index": 1, - "finish_reason": "stop", - "message": { - "role": "assistant", - "content": response.choices[1].message.content, - }, - } - assert_message_in_logs(logs[2], "gen_ai.choice", choice_event_1, spans[0]) + spans = span_exporter.get_finished_spans() + assert_completion_attributes( + spans[0], llm_model_value, response, latest_experimental_enabled + ) + if latest_experimental_enabled: + assert_messages_attribute( + spans[0].attributes["gen_ai.output.messages"], + [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": response.choices[0].message.content, + } + ], + "finish_reason": "stop", + }, + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": response.choices[1].message.content, + } + ], + "finish_reason": "stop", + }, + ], + ) + else: + logs = log_exporter.get_finished_logs() + assert len(logs) == 3 # 1 user message + 2 choice messages + + user_message = {"content": messages_value[0]["content"]} + assert_message_in_logs( + logs[0], "gen_ai.user.message", user_message, spans[0] + ) + + choice_event_0 = { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": response.choices[0].message.content, + }, + } + assert_message_in_logs( + logs[1], "gen_ai.choice", choice_event_0, spans[0] + ) + + choice_event_1 = { + "index": 1, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": response.choices[1].message.content, + }, + } + assert_message_in_logs( + logs[2], "gen_ai.choice", choice_event_1, spans[0] + ) @pytest.mark.vcr() def test_chat_completion_tool_calls_with_content( - span_exporter, log_exporter, openai_client, instrument_with_content + span_exporter, + log_exporter, + openai_client, + instrument_with_content, + vcr, ): - chat_completion_tool_call(span_exporter, log_exporter, openai_client, True) + with vcr.use_cassette("test_chat_completion_tool_calls_with_content.yaml"): + chat_completion_tool_call( + span_exporter, + log_exporter, + openai_client, + True, + is_latest_experimental_enabled(), + ) @pytest.mark.vcr() def test_chat_completion_tool_calls_no_content( - span_exporter, log_exporter, openai_client, instrument_no_content + span_exporter, + log_exporter, + openai_client, + instrument_no_content, + vcr, ): - chat_completion_tool_call( - span_exporter, log_exporter, openai_client, False - ) + with vcr.use_cassette("test_chat_completion_tool_calls_no_content.yaml"): + chat_completion_tool_call( + span_exporter, + log_exporter, + openai_client, + False, + is_latest_experimental_enabled(), + ) def chat_completion_tool_call( - span_exporter, log_exporter, openai_client, expect_content + span_exporter, + log_exporter, + openai_client, + expect_content, + latest_experimental_enabled, ): llm_model_value = "gpt-4o-mini" messages_value = [ @@ -335,328 +500,627 @@ def chat_completion_tool_call( # validate both calls spans = span_exporter.get_finished_spans() assert len(spans) == 2 - assert_completion_attributes(spans[0], llm_model_value, response_0) - assert_completion_attributes(spans[1], llm_model_value, response_1) - - logs = log_exporter.get_finished_logs() - assert len(logs) == 9 # 3 logs for first completion, 6 for second - - # call one - system_message = ( - {"content": messages_value[0]["content"]} if expect_content else None - ) - assert_message_in_logs( - logs[0], "gen_ai.system.message", system_message, spans[0] - ) - - user_message = ( - {"content": messages_value[1]["content"]} if expect_content else None + assert_completion_attributes( + spans[0], llm_model_value, response_0, latest_experimental_enabled ) - assert_message_in_logs( - logs[1], "gen_ai.user.message", user_message, spans[0] + assert_completion_attributes( + spans[1], llm_model_value, response_1, latest_experimental_enabled ) - function_call_0 = {"name": "get_current_weather"} - function_call_1 = {"name": "get_current_weather"} - if expect_content: - function_call_0["arguments"] = ( - response_0.choices[0] - .message.tool_calls[0] - .function.arguments.replace("\n", "") - ) - function_call_1["arguments"] = ( - response_0.choices[0] - .message.tool_calls[1] - .function.arguments.replace("\n", "") - ) - - choice_event = { - "index": 0, - "finish_reason": "tool_calls", - "message": { - "role": "assistant", - "tool_calls": [ + if latest_experimental_enabled: + if not expect_content: + pass + else: + # first call + first_input = [ { - "id": response_0.choices[0].message.tool_calls[0].id, - "type": "function", - "function": function_call_0, + "role": "system", + "parts": [ + { + "type": "text", + "content": messages_value[0]["content"], + } + ], }, { - "id": response_0.choices[0].message.tool_calls[1].id, - "type": "function", - "function": function_call_1, + "role": "user", + "parts": [ + { + "type": "text", + "content": messages_value[1]["content"], + } + ], }, - ], - }, - } - assert_message_in_logs(logs[2], "gen_ai.choice", choice_event, spans[0]) + ] + assert_messages_attribute( + spans[0].attributes["gen_ai.input.messages"], first_input + ) - # call two - system_message = ( - {"content": messages_value[0]["content"]} if expect_content else None - ) - assert_message_in_logs( - logs[3], "gen_ai.system.message", system_message, spans[1] - ) - - user_message = ( - {"content": messages_value[1]["content"]} if expect_content else None - ) - assert_message_in_logs( - logs[4], "gen_ai.user.message", user_message, spans[1] - ) + first_output = [ + { + "role": "assistant", + "parts": [ + { + "type": "tool_call", + "id": response_0.choices[0] + .message.tool_calls[0] + .id, + "name": "get_current_weather", + "arguments": {"location": "Seattle, WA"}, + }, + { + "type": "tool_call", + "id": response_0.choices[0] + .message.tool_calls[1] + .id, + "name": "get_current_weather", + "arguments": {"location": "San Francisco, CA"}, + }, + ], + "finish_reason": "tool_calls", + } + ] + assert_messages_attribute( + spans[0].attributes["gen_ai.output.messages"], first_output + ) + + # second call + del first_output[0]["finish_reason"] + second_input = [] + second_input += first_input + second_input += first_output + second_input += [ + { + "role": "tool", + "parts": [ + { + "type": "tool_call_response", + "id": response_0.choices[0] + .message.tool_calls[0] + .id, + "response": tool_call_result_0["content"], + } + ], + }, + { + "role": "tool", + "parts": [ + { + "type": "tool_call_response", + "id": response_0.choices[0] + .message.tool_calls[1] + .id, + "response": tool_call_result_1["content"], + } + ], + }, + ] + + assert_messages_attribute( + spans[1].attributes["gen_ai.input.messages"], second_input + ) + + assert_messages_attribute( + spans[1].attributes["gen_ai.output.messages"], + [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": response_1.choices[ + 0 + ].message.content, + }, + ], + "finish_reason": "stop", + } + ], + ) + else: + logs = log_exporter.get_finished_logs() - assistant_tool_call = {"tool_calls": messages_value[2]["tool_calls"]} - if not expect_content: - assistant_tool_call["tool_calls"][0]["function"]["arguments"] = None - assistant_tool_call["tool_calls"][1]["function"]["arguments"] = None + assert len(logs) == 9 # 3 logs for first completion, 6 for second - assert_message_in_logs( - logs[5], "gen_ai.assistant.message", assistant_tool_call, spans[1] - ) + # call one + system_message = ( + {"content": messages_value[0]["content"]} + if expect_content + else None + ) + assert_message_in_logs( + logs[0], "gen_ai.system.message", system_message, spans[0] + ) - tool_message_0 = { - "id": tool_call_result_0["tool_call_id"], - "content": tool_call_result_0["content"] if expect_content else None, - } + user_message = ( + {"content": messages_value[1]["content"]} + if expect_content + else None + ) + assert_message_in_logs( + logs[1], "gen_ai.user.message", user_message, spans[0] + ) - assert_message_in_logs( - logs[6], "gen_ai.tool.message", tool_message_0, spans[1] - ) + function_call_0 = {"name": "get_current_weather"} + function_call_1 = {"name": "get_current_weather"} + if expect_content: + function_call_0["arguments"] = ( + response_0.choices[0] + .message.tool_calls[0] + .function.arguments.replace("\n", "") + ) + function_call_1["arguments"] = ( + response_0.choices[0] + .message.tool_calls[1] + .function.arguments.replace("\n", "") + ) + + choice_event = { + "index": 0, + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "tool_calls": [ + { + "id": response_0.choices[0].message.tool_calls[0].id, + "type": "function", + "function": function_call_0, + }, + { + "id": response_0.choices[0].message.tool_calls[1].id, + "type": "function", + "function": function_call_1, + }, + ], + }, + } + assert_message_in_logs( + logs[2], "gen_ai.choice", choice_event, spans[0] + ) - tool_message_1 = { - "id": tool_call_result_1["tool_call_id"], - "content": tool_call_result_1["content"] if expect_content else None, - } + # call two + system_message = ( + {"content": messages_value[0]["content"]} + if expect_content + else None + ) + assert_message_in_logs( + logs[3], "gen_ai.system.message", system_message, spans[1] + ) - assert_message_in_logs( - logs[7], "gen_ai.tool.message", tool_message_1, spans[1] - ) + user_message = ( + {"content": messages_value[1]["content"]} + if expect_content + else None + ) + assert_message_in_logs( + logs[4], "gen_ai.user.message", user_message, spans[1] + ) - message = { - "role": "assistant", - "content": response_1.choices[0].message.content - if expect_content - else None, - } - choice = { - "index": 0, - "finish_reason": "stop", - "message": message, - } - assert_message_in_logs(logs[8], "gen_ai.choice", choice, spans[1]) + assistant_tool_call = {"tool_calls": messages_value[2]["tool_calls"]} + if not expect_content: + assistant_tool_call["tool_calls"][0]["function"]["arguments"] = ( + None + ) + assistant_tool_call["tool_calls"][1]["function"]["arguments"] = ( + None + ) + + assert_message_in_logs( + logs[5], "gen_ai.assistant.message", assistant_tool_call, spans[1] + ) + tool_message_0 = { + "id": tool_call_result_0["tool_call_id"], + "content": tool_call_result_0["content"] + if expect_content + else None, + } -@pytest.mark.vcr() -def test_chat_completion_streaming( - span_exporter, log_exporter, openai_client, instrument_with_content -): - llm_model_value = "gpt-4" - messages_value = [{"role": "user", "content": "Say this is a test"}] - - kwargs = { - "model": llm_model_value, - "messages": messages_value, - "stream": True, - "stream_options": {"include_usage": True}, - } + assert_message_in_logs( + logs[6], "gen_ai.tool.message", tool_message_0, spans[1] + ) - response_stream_usage = None - response_stream_model = None - response_stream_id = None - response_stream_result = "" - response = openai_client.chat.completions.create(**kwargs) - for chunk in response: - if chunk.choices: - response_stream_result += chunk.choices[0].delta.content or "" + tool_message_1 = { + "id": tool_call_result_1["tool_call_id"], + "content": tool_call_result_1["content"] + if expect_content + else None, + } - # get the last chunk - if getattr(chunk, "usage", None): - response_stream_usage = chunk.usage - response_stream_model = chunk.model - response_stream_id = chunk.id + assert_message_in_logs( + logs[7], "gen_ai.tool.message", tool_message_1, spans[1] + ) - spans = span_exporter.get_finished_spans() - assert_all_attributes( - spans[0], - llm_model_value, - response_stream_id, - response_stream_model, - response_stream_usage.prompt_tokens, - response_stream_usage.completion_tokens, - ) + message = { + "role": "assistant", + "content": response_1.choices[0].message.content + if expect_content + else None, + } + choice = { + "index": 0, + "finish_reason": "stop", + "message": message, + } + assert_message_in_logs(logs[8], "gen_ai.choice", choice, spans[1]) - logs = log_exporter.get_finished_logs() - assert len(logs) == 2 - user_message = {"content": "Say this is a test"} - assert_message_in_logs( - logs[0], "gen_ai.user.message", user_message, spans[0] - ) +@pytest.mark.vcr() +def test_chat_completion_streaming( + span_exporter, + log_exporter, + openai_client, + instrument_with_content, + vcr, +): + with vcr.use_cassette("test_chat_completion_streaming.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4" + messages_value = [{"role": "user", "content": "Say this is a test"}] + + kwargs = { + "model": llm_model_value, + "messages": messages_value, + "stream": True, + "stream_options": {"include_usage": True}, + } - choice_event = { - "index": 0, - "finish_reason": "stop", - "message": {"role": "assistant", "content": response_stream_result}, - } - assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0]) + response_stream_usage = None + response_stream_model = None + response_stream_id = None + response_stream_result = "" + response = openai_client.chat.completions.create(**kwargs) + for chunk in response: + if chunk.choices: + response_stream_result += chunk.choices[0].delta.content or "" + + # get the last chunk + if getattr(chunk, "usage", None): + response_stream_usage = chunk.usage + response_stream_model = chunk.model + response_stream_id = chunk.id + + spans = span_exporter.get_finished_spans() + assert_all_attributes( + spans[0], + llm_model_value, + latest_experimental_enabled, + response_stream_id, + response_stream_model, + response_stream_usage.prompt_tokens, + response_stream_usage.completion_tokens, + ) + if latest_experimental_enabled: + assert_messages_attribute( + spans[0].attributes["gen_ai.input.messages"], + [ + { + "role": "user", + "parts": [ + { + "type": "text", + "content": messages_value[0]["content"], + } + ], + } + ], + ) + assert_messages_attribute( + spans[0].attributes["gen_ai.output.messages"], + [ + { + "role": "assistant", + "parts": [ + {"type": "text", "content": response_stream_result} + ], + "finish_reason": "stop", + } + ], + ) + else: + logs = log_exporter.get_finished_logs() + assert len(logs) == 2 + + user_message = {"content": "Say this is a test"} + assert_message_in_logs( + logs[0], "gen_ai.user.message", user_message, spans[0] + ) + + choice_event = { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": response_stream_result, + }, + } + assert_message_in_logs( + logs[1], "gen_ai.choice", choice_event, spans[0] + ) @pytest.mark.vcr() def test_chat_completion_streaming_not_complete( - span_exporter, log_exporter, openai_client, instrument_with_content + span_exporter, + log_exporter, + openai_client, + instrument_with_content, + vcr, ): - llm_model_value = "gpt-4" - messages_value = [{"role": "user", "content": "Say this is a test"}] - - kwargs = { - "model": llm_model_value, - "messages": messages_value, - "stream": True, - } - - response_stream_model = None - response_stream_id = None - response_stream_result = "" - response = openai_client.chat.completions.create(**kwargs) - for idx, chunk in enumerate(response): - if chunk.choices: - response_stream_result += chunk.choices[0].delta.content or "" - if idx == 1: - # fake a stop - break - - if chunk.model: - response_stream_model = chunk.model - if chunk.id: - response_stream_id = chunk.id - - response.close() - spans = span_exporter.get_finished_spans() - assert_all_attributes( - spans[0], llm_model_value, response_stream_id, response_stream_model - ) - - logs = log_exporter.get_finished_logs() - assert len(logs) == 2 - - user_message = {"content": "Say this is a test"} - assert_message_in_logs( - logs[0], "gen_ai.user.message", user_message, spans[0] - ) + with vcr.use_cassette("test_chat_completion_streaming_not_complete.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4" + messages_value = [{"role": "user", "content": "Say this is a test"}] + + kwargs = { + "model": llm_model_value, + "messages": messages_value, + "stream": True, + } - choice_event = { - "index": 0, - "finish_reason": "error", - "message": {"role": "assistant", "content": response_stream_result}, - } - assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0]) + response_stream_model = None + response_stream_id = None + response_stream_result = "" + response = openai_client.chat.completions.create(**kwargs) + for idx, chunk in enumerate(response): + if chunk.choices: + response_stream_result += chunk.choices[0].delta.content or "" + if idx == 1: + # fake a stop + break + + if chunk.model: + response_stream_model = chunk.model + if chunk.id: + response_stream_id = chunk.id + + response.close() + spans = span_exporter.get_finished_spans() + assert_all_attributes( + spans[0], + llm_model_value, + latest_experimental_enabled, + response_stream_id, + response_stream_model, + ) + if latest_experimental_enabled: + assert_messages_attribute( + spans[0].attributes["gen_ai.input.messages"], + [ + { + "role": "user", + "parts": [ + { + "type": "text", + "content": messages_value[0]["content"], + } + ], + } + ], + ) + assert_messages_attribute( + spans[0].attributes["gen_ai.output.messages"], + [ + { + "role": "assistant", + "parts": [ + {"type": "text", "content": response_stream_result} + ], + "finish_reason": "error", + } + ], + ) + else: + logs = log_exporter.get_finished_logs() + assert len(logs) == 2 + + user_message = {"content": "Say this is a test"} + assert_message_in_logs( + logs[0], "gen_ai.user.message", user_message, spans[0] + ) + + choice_event = { + "index": 0, + "finish_reason": "error", + "message": { + "role": "assistant", + "content": response_stream_result, + }, + } + assert_message_in_logs( + logs[1], "gen_ai.choice", choice_event, spans[0] + ) @pytest.mark.vcr() def test_chat_completion_multiple_choices_streaming( - span_exporter, log_exporter, openai_client, instrument_with_content + span_exporter, + log_exporter, + openai_client, + instrument_with_content, + vcr, ): - llm_model_value = "gpt-4o-mini" - messages_value = [ - {"role": "system", "content": "You're a helpful assistant."}, - { - "role": "user", - "content": "What's the weather in Seattle and San Francisco today?", - }, - ] - - response_0 = openai_client.chat.completions.create( - messages=messages_value, - model=llm_model_value, - n=2, - stream=True, - stream_options={"include_usage": True}, - ) - - # two strings for each choice - response_stream_result = ["", ""] - finish_reasons = ["", ""] - for chunk in response_0: - if chunk.choices: - for choice in chunk.choices: - response_stream_result[choice.index] += ( - choice.delta.content or "" - ) - if choice.finish_reason: - finish_reasons[choice.index] = choice.finish_reason - - # get the last chunk - if getattr(chunk, "usage", None): - response_stream_usage = chunk.usage - response_stream_model = chunk.model - response_stream_id = chunk.id - - # sanity check - assert "stop" == finish_reasons[0] - - spans = span_exporter.get_finished_spans() - assert_all_attributes( - spans[0], - llm_model_value, - response_stream_id, - response_stream_model, - response_stream_usage.prompt_tokens, - response_stream_usage.completion_tokens, - ) - - logs = log_exporter.get_finished_logs() - assert len(logs) == 4 - - system_message = {"content": messages_value[0]["content"]} - assert_message_in_logs( - logs[0], "gen_ai.system.message", system_message, spans[0] - ) - - user_message = { - "content": "What's the weather in Seattle and San Francisco today?" - } - assert_message_in_logs( - logs[1], "gen_ai.user.message", user_message, spans[0] - ) + with vcr.use_cassette( + "test_chat_completion_multiple_choices_streaming.yaml" + ): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [ + {"role": "system", "content": "You're a helpful assistant."}, + { + "role": "user", + "content": "What's the weather in Seattle and San Francisco today?", + }, + ] - choice_event_0 = { - "index": 0, - "finish_reason": "stop", - "message": { - "role": "assistant", - "content": "".join(response_stream_result[0]), - }, - } - assert_message_in_logs(logs[2], "gen_ai.choice", choice_event_0, spans[0]) + response_0 = openai_client.chat.completions.create( + messages=messages_value, + model=llm_model_value, + n=2, + stream=True, + stream_options={"include_usage": True}, + ) - choice_event_1 = { - "index": 1, - "finish_reason": "stop", - "message": { - "role": "assistant", - "content": "".join(response_stream_result[1]), - }, - } - assert_message_in_logs(logs[3], "gen_ai.choice", choice_event_1, spans[0]) + # two strings for each choice + response_stream_result = ["", ""] + finish_reasons = ["", ""] + for chunk in response_0: + if chunk.choices: + for choice in chunk.choices: + response_stream_result[choice.index] += ( + choice.delta.content or "" + ) + if choice.finish_reason: + finish_reasons[choice.index] = choice.finish_reason + + # get the last chunk + if getattr(chunk, "usage", None): + response_stream_usage = chunk.usage + response_stream_model = chunk.model + response_stream_id = chunk.id + + # sanity check + assert "stop" == finish_reasons[0] + + spans = span_exporter.get_finished_spans() + assert_all_attributes( + spans[0], + llm_model_value, + latest_experimental_enabled, + response_stream_id, + response_stream_model, + response_stream_usage.prompt_tokens, + response_stream_usage.completion_tokens, + ) + if latest_experimental_enabled: + assert_messages_attribute( + spans[0].attributes["gen_ai.input.messages"], + [ + { + "role": "system", + "parts": [ + { + "type": "text", + "content": messages_value[0]["content"], + } + ], + }, + { + "role": "user", + "parts": [ + { + "type": "text", + "content": messages_value[1]["content"], + } + ], + }, + ], + ) + assert_messages_attribute( + spans[0].attributes["gen_ai.output.messages"], + [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "".join(response_stream_result[0]), + } + ], + "finish_reason": "stop", + }, + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "".join(response_stream_result[1]), + } + ], + "finish_reason": "stop", + }, + ], + ) + else: + logs = log_exporter.get_finished_logs() + assert len(logs) == 4 + + system_message = {"content": messages_value[0]["content"]} + assert_message_in_logs( + logs[0], "gen_ai.system.message", system_message, spans[0] + ) + + user_message = { + "content": "What's the weather in Seattle and San Francisco today?" + } + assert_message_in_logs( + logs[1], "gen_ai.user.message", user_message, spans[0] + ) + + choice_event_0 = { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "".join(response_stream_result[0]), + }, + } + assert_message_in_logs( + logs[2], "gen_ai.choice", choice_event_0, spans[0] + ) + + choice_event_1 = { + "index": 1, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "".join(response_stream_result[1]), + }, + } + assert_message_in_logs( + logs[3], "gen_ai.choice", choice_event_1, spans[0] + ) @pytest.mark.vcr() def test_chat_completion_multiple_tools_streaming_with_content( - span_exporter, log_exporter, openai_client, instrument_with_content + span_exporter, + log_exporter, + openai_client, + instrument_with_content, + vcr, ): - chat_completion_multiple_tools_streaming( - span_exporter, log_exporter, openai_client, True - ) + with vcr.use_cassette( + "test_chat_completion_multiple_tools_streaming_with_content.yaml" + ): + chat_completion_multiple_tools_streaming( + span_exporter, + log_exporter, + openai_client, + True, + is_latest_experimental_enabled(), + ) @pytest.mark.vcr() def test_chat_completion_multiple_tools_streaming_no_content( - span_exporter, log_exporter, openai_client, instrument_no_content + span_exporter, + log_exporter, + openai_client, + instrument_no_content, + vcr, ): - chat_completion_multiple_tools_streaming( - span_exporter, log_exporter, openai_client, False - ) + with vcr.use_cassette( + "test_chat_completion_multiple_tools_streaming_no_content.yaml" + ): + chat_completion_multiple_tools_streaming( + span_exporter, + log_exporter, + openai_client, + False, + is_latest_experimental_enabled(), + ) @pytest.mark.vcr() @@ -665,44 +1129,64 @@ def test_chat_completion_with_content_span_unsampled( log_exporter, openai_client, instrument_with_content_unsampled, + vcr, ): - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] - - response = openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, stream=False - ) - - spans = span_exporter.get_finished_spans() - assert len(spans) == 0 - - logs = log_exporter.get_finished_logs() - assert len(logs) == 2 + with vcr.use_cassette( + "test_chat_completion_with_content_span_unsampled.yaml" + ): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] + + response = openai_client.chat.completions.create( + messages=messages_value, model=llm_model_value, stream=False + ) - user_message = {"content": messages_value[0]["content"]} - assert_message_in_logs(logs[0], "gen_ai.user.message", user_message, None) + spans = span_exporter.get_finished_spans() + assert len(spans) == 0 - choice_event = { - "index": 0, - "finish_reason": "stop", - "message": { - "role": "assistant", - "content": response.choices[0].message.content, - }, - } - assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, None) + logs = log_exporter.get_finished_logs() + if latest_experimental_enabled: + assert len(logs) == 0 + # TODO: check event + else: + assert len(logs) == 2 + + user_message = {"content": messages_value[0]["content"]} + assert_message_in_logs( + logs[0], "gen_ai.user.message", user_message, None + ) + + choice_event = { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": response.choices[0].message.content, + }, + } + assert_message_in_logs( + logs[1], "gen_ai.choice", choice_event, None + ) - assert logs[0].log_record.trace_id is not None - assert logs[0].log_record.span_id is not None - assert logs[0].log_record.trace_flags == 0 + assert logs[0].log_record.trace_id is not None + assert logs[0].log_record.span_id is not None + assert logs[0].log_record.trace_flags == 0 - assert logs[0].log_record.trace_id == logs[1].log_record.trace_id - assert logs[0].log_record.span_id == logs[1].log_record.span_id - assert logs[0].log_record.trace_flags == logs[1].log_record.trace_flags + assert logs[0].log_record.trace_id == logs[1].log_record.trace_id + assert logs[0].log_record.span_id == logs[1].log_record.span_id + assert ( + logs[0].log_record.trace_flags + == logs[1].log_record.trace_flags + ) def chat_completion_multiple_tools_streaming( - span_exporter, log_exporter, openai_client, expect_content + span_exporter, + log_exporter, + openai_client, + expect_content, + latest_experimental_enabled, ): llm_model_value = "gpt-4o-mini" messages_value = [ @@ -754,61 +1238,119 @@ def chat_completion_multiple_tools_streaming( assert_all_attributes( spans[0], llm_model_value, + latest_experimental_enabled, response_stream_id, response_stream_model, response_stream_usage.prompt_tokens, response_stream_usage.completion_tokens, ) - logs = log_exporter.get_finished_logs() - assert len(logs) == 3 + if latest_experimental_enabled: + if expect_content: + # first call + first_input = [ + { + "role": "system", + "parts": [ + { + "type": "text", + "content": messages_value[0]["content"], + } + ], + }, + { + "role": "user", + "parts": [ + { + "type": "text", + "content": messages_value[1]["content"], + } + ], + }, + ] + assert_messages_attribute( + spans[0].attributes["gen_ai.input.messages"], first_input + ) - system_message = ( - {"content": messages_value[0]["content"]} if expect_content else None - ) - assert_message_in_logs( - logs[0], "gen_ai.system.message", system_message, spans[0] - ) + first_output = [ + { + "role": "assistant", + "parts": [ + { + "type": "tool_call", + "id": tool_call_ids[0], + "name": "get_current_weather", + "arguments": {"location": "Seattle, WA"}, + }, + { + "type": "tool_call", + "id": tool_call_ids[1], + "name": "get_current_weather", + "arguments": {"location": "San Francisco, CA"}, + }, + ], + "finish_reason": "tool_calls", + } + ] + assert_messages_attribute( + spans[0].attributes["gen_ai.output.messages"], first_output + ) + else: + logs = log_exporter.get_finished_logs() + assert len(logs) == 3 - user_message = ( - {"content": "What's the weather in Seattle and San Francisco today?"} - if expect_content - else None - ) - assert_message_in_logs( - logs[1], "gen_ai.user.message", user_message, spans[0] - ) + system_message = ( + {"content": messages_value[0]["content"]} + if expect_content + else None + ) + assert_message_in_logs( + logs[0], "gen_ai.system.message", system_message, spans[0] + ) - choice_event = { - "index": 0, - "finish_reason": "tool_calls", - "message": { - "role": "assistant", - "tool_calls": [ - { - "id": tool_call_ids[0], - "type": "function", - "function": { - "name": tool_names[0], - "arguments": tool_args[0].replace("\n", "") - if expect_content - else None, + user_message = ( + { + "content": "What's the weather in Seattle and San Francisco today?" + } + if expect_content + else None + ) + assert_message_in_logs( + logs[1], "gen_ai.user.message", user_message, spans[0] + ) + + choice_event = { + "index": 0, + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "tool_calls": [ + { + "id": tool_call_ids[0], + "type": "function", + "function": { + "name": tool_names[0], + "arguments": tool_args[0].replace("\n", "") + if expect_content + else None, + }, }, - }, - { - "id": tool_call_ids[1], - "type": "function", - "function": { - "name": tool_names[1], - "arguments": tool_args[1].replace("\n", "") - if expect_content - else None, + { + "id": tool_call_ids[1], + "type": "function", + "function": { + "name": tool_names[1], + "arguments": tool_args[1].replace("\n", "") + if expect_content + else None, + }, }, - }, - ], - }, - } - assert_message_in_logs(logs[2], "gen_ai.choice", choice_event, spans[0]) + ], + }, + } + assert_message_in_logs( + logs[2], "gen_ai.choice", choice_event, spans[0] + ) def assert_message_in_logs(log, event_name, expected_content, parent_span): @@ -840,110 +1382,3 @@ def remove_none_values(body): else: result[key] = value return result - - -def assert_completion_attributes( - span: ReadableSpan, - request_model: str, - response: ChatCompletion, - operation_name: str = "chat", - server_address: str = "api.openai.com", -): - return assert_all_attributes( - span, - request_model, - response.id, - response.model, - response.usage.prompt_tokens, - response.usage.completion_tokens, - operation_name, - server_address, - ) - - -def assert_all_attributes( - span: ReadableSpan, - request_model: str, - response_id: str = None, - response_model: str = None, - input_tokens: Optional[int] = None, - output_tokens: Optional[int] = None, - operation_name: str = "chat", - server_address: str = "api.openai.com", -): - assert span.name == f"{operation_name} {request_model}" - assert ( - operation_name - == span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] - ) - assert ( - GenAIAttributes.GenAiSystemValues.OPENAI.value - == span.attributes[GenAIAttributes.GEN_AI_SYSTEM] - ) - assert ( - request_model == span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] - ) - if response_model: - assert ( - response_model - == span.attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] - ) - else: - assert GenAIAttributes.GEN_AI_RESPONSE_MODEL not in span.attributes - - if response_id: - assert ( - response_id == span.attributes[GenAIAttributes.GEN_AI_RESPONSE_ID] - ) - else: - assert GenAIAttributes.GEN_AI_RESPONSE_ID not in span.attributes - - if input_tokens: - assert ( - input_tokens - == span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] - ) - else: - assert GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS not in span.attributes - - if output_tokens: - assert ( - output_tokens - == span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] - ) - else: - assert ( - GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS not in span.attributes - ) - - assert server_address == span.attributes[ServerAttributes.SERVER_ADDRESS] - - -def assert_log_parent(log, span): - if span: - assert log.log_record.trace_id == span.get_span_context().trace_id - assert log.log_record.span_id == span.get_span_context().span_id - assert ( - log.log_record.trace_flags == span.get_span_context().trace_flags - ) - - -def get_current_weather_tool_definition(): - return { - "type": "function", - "function": { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. Boston, MA", - }, - }, - "required": ["location"], - "additionalProperties": False, - }, - }, - } diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py index ffcd99c5b4..3bf2bded8e 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py @@ -1,5 +1,8 @@ import pytest +from opentelemetry.instrumentation.openai_v2.utils import ( + is_latest_experimental_enabled, +) from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) @@ -42,15 +45,21 @@ ) -def assert_all_metric_attributes(data_point): +def assert_all_metric_attributes(data_point, latest_experimental_enabled): assert GenAIAttributes.GEN_AI_OPERATION_NAME in data_point.attributes assert ( data_point.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == GenAIAttributes.GenAiOperationNameValues.CHAT.value ) - assert GenAIAttributes.GEN_AI_SYSTEM in data_point.attributes + + provider_name_attr_name = ( + "gen_ai.provider.name" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_SYSTEM + ) + assert provider_name_attr_name in data_point.attributes assert ( - data_point.attributes[GenAIAttributes.GEN_AI_SYSTEM] + data_point.attributes[provider_name_attr_name] == GenAIAttributes.GenAiSystemValues.OPENAI.value ) assert GenAIAttributes.GEN_AI_REQUEST_MODEL in data_point.attributes @@ -63,21 +72,25 @@ def assert_all_metric_attributes(data_point): data_point.attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] == "gpt-4o-mini-2024-07-18" ) - assert "gen_ai.openai.response.system_fingerprint" in data_point.attributes - assert ( - data_point.attributes["gen_ai.openai.response.system_fingerprint"] - == "fp_0ba0d124f1" + + system_fingerprint_attr_key = ( + "openai.response.system_fingerprint" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SYSTEM_FINGERPRINT ) + assert system_fingerprint_attr_key in data_point.attributes assert ( - GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER - in data_point.attributes + data_point.attributes[system_fingerprint_attr_key] == "fp_0ba0d124f1" ) - assert ( - data_point.attributes[ - GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER - ] - == "default" + + service_tier_attr_key = ( + "openai.response.service_tier" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER ) + assert service_tier_attr_key in data_point.attributes + assert service_tier_attr_key in data_point.attributes + assert data_point.attributes[service_tier_attr_key] == "default" assert ( data_point.attributes[ServerAttributes.SERVER_ADDRESS] == "api.openai.com" @@ -86,142 +99,158 @@ def assert_all_metric_attributes(data_point): @pytest.mark.vcr() def test_chat_completion_metrics( - metric_reader, openai_client, instrument_with_content + metric_reader, openai_client, instrument_with_content, vcr ): - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] + with vcr.use_cassette("test_chat_completion_metrics.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] - openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, stream=False - ) + openai_client.chat.completions.create( + messages=messages_value, model=llm_model_value, stream=False + ) - metrics = metric_reader.get_metrics_data().resource_metrics - assert len(metrics) == 1 + metrics = metric_reader.get_metrics_data().resource_metrics + assert len(metrics) == 1 - metric_data = metrics[0].scope_metrics[0].metrics - assert len(metric_data) == 2 + metric_data = metrics[0].scope_metrics[0].metrics + assert len(metric_data) == 2 - duration_metric = next( - ( - m - for m in metric_data - if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION - ), - None, - ) - assert duration_metric is not None - - duration_point = duration_metric.data.data_points[0] - assert duration_point.sum > 0 - assert_all_metric_attributes(duration_point) - assert duration_point.explicit_bounds == _DURATION_BUCKETS - - token_usage_metric = next( - ( - m - for m in metric_data - if m.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE - ), - None, - ) - assert token_usage_metric is not None - - input_token_usage = next( - ( - d - for d in token_usage_metric.data.data_points - if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE] - == GenAIAttributes.GenAiTokenTypeValues.INPUT.value - ), - None, - ) - assert input_token_usage is not None - assert input_token_usage.sum == 12 - - assert input_token_usage.explicit_bounds == _TOKEN_USAGE_BUCKETS - assert input_token_usage.bucket_counts[2] == 1 - assert_all_metric_attributes(input_token_usage) - - output_token_usage = next( - ( - d - for d in token_usage_metric.data.data_points - if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE] - == GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value - ), - None, - ) - assert output_token_usage is not None - assert output_token_usage.sum == 5 - # assert against buckets [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864] - assert output_token_usage.bucket_counts[2] == 1 - assert_all_metric_attributes(output_token_usage) + duration_metric = next( + ( + m + for m in metric_data + if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION + ), + None, + ) + assert duration_metric is not None + + duration_point = duration_metric.data.data_points[0] + assert duration_point.sum > 0 + assert_all_metric_attributes( + duration_point, latest_experimental_enabled + ) + assert duration_point.explicit_bounds == _DURATION_BUCKETS + + token_usage_metric = next( + ( + m + for m in metric_data + if m.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE + ), + None, + ) + assert token_usage_metric is not None + + input_token_usage = next( + ( + d + for d in token_usage_metric.data.data_points + if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE] + == GenAIAttributes.GenAiTokenTypeValues.INPUT.value + ), + None, + ) + assert input_token_usage is not None + assert input_token_usage.sum == 12 + + assert input_token_usage.explicit_bounds == _TOKEN_USAGE_BUCKETS + assert input_token_usage.bucket_counts[2] == 1 + assert_all_metric_attributes( + input_token_usage, latest_experimental_enabled + ) + + output_token_usage = next( + ( + d + for d in token_usage_metric.data.data_points + if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE] + == GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value + ), + None, + ) + assert output_token_usage is not None + assert output_token_usage.sum == 5 + # assert against buckets [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864] + assert output_token_usage.bucket_counts[2] == 1 + assert_all_metric_attributes( + output_token_usage, latest_experimental_enabled + ) @pytest.mark.vcr() @pytest.mark.asyncio() async def test_async_chat_completion_metrics( - metric_reader, async_openai_client, instrument_with_content + metric_reader, async_openai_client, instrument_with_content, vcr ): - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] + with vcr.use_cassette("test_async_chat_completion_metrics.yaml"): + latest_experimental_enabled = is_latest_experimental_enabled() + llm_model_value = "gpt-4o-mini" + messages_value = [{"role": "user", "content": "Say this is a test"}] - await async_openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, stream=False - ) + await async_openai_client.chat.completions.create( + messages=messages_value, model=llm_model_value, stream=False + ) - metrics = metric_reader.get_metrics_data().resource_metrics - assert len(metrics) == 1 + metrics = metric_reader.get_metrics_data().resource_metrics + assert len(metrics) == 1 - metric_data = metrics[0].scope_metrics[0].metrics - assert len(metric_data) == 2 + metric_data = metrics[0].scope_metrics[0].metrics + assert len(metric_data) == 2 - duration_metric = next( - ( - m - for m in metric_data - if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION - ), - None, - ) - assert duration_metric is not None - assert duration_metric.data.data_points[0].sum > 0 - assert_all_metric_attributes(duration_metric.data.data_points[0]) - - token_usage_metric = next( - ( - m - for m in metric_data - if m.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE - ), - None, - ) - assert token_usage_metric is not None - - input_token_usage = next( - ( - d - for d in token_usage_metric.data.data_points - if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE] - == GenAIAttributes.GenAiTokenTypeValues.INPUT.value - ), - None, - ) + duration_metric = next( + ( + m + for m in metric_data + if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION + ), + None, + ) + assert duration_metric is not None + assert duration_metric.data.data_points[0].sum > 0 + assert_all_metric_attributes( + duration_metric.data.data_points[0], latest_experimental_enabled + ) - assert input_token_usage is not None - assert input_token_usage.sum == 12 - assert_all_metric_attributes(input_token_usage) - - output_token_usage = next( - ( - d - for d in token_usage_metric.data.data_points - if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE] - == GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value - ), - None, - ) + token_usage_metric = next( + ( + m + for m in metric_data + if m.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE + ), + None, + ) + assert token_usage_metric is not None + + input_token_usage = next( + ( + d + for d in token_usage_metric.data.data_points + if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE] + == GenAIAttributes.GenAiTokenTypeValues.INPUT.value + ), + None, + ) + + assert input_token_usage is not None + assert input_token_usage.sum == 12 + assert_all_metric_attributes( + input_token_usage, latest_experimental_enabled + ) + + output_token_usage = next( + ( + d + for d in token_usage_metric.data.data_points + if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE] + == GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value + ), + None, + ) - assert output_token_usage is not None - assert output_token_usage.sum == 12 - assert_all_metric_attributes(output_token_usage) + assert output_token_usage is not None + assert output_token_usage.sum == 12 + assert_all_metric_attributes( + output_token_usage, latest_experimental_enabled + ) From 8909a5bbe6d525bc9590a347a1c134cc220727cc Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Wed, 27 Aug 2025 11:36:18 -0700 Subject: [PATCH 2/5] ready except new event and common code --- .../instrumentation/openai_v2/__init__.py | 11 +- .../instrumentation/openai_v2/patch.py | 36 ++--- .../instrumentation/openai_v2/utils.py | 70 +++++---- .../tests/conftest.py | 6 +- .../tests/test_utils.py | 135 ++++++++++++++++++ 5 files changed, 205 insertions(+), 53 deletions(-) create mode 100644 instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_utils.py diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py index 15b6b8b1ef..970615bd7d 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py @@ -48,7 +48,7 @@ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor from opentelemetry.instrumentation.openai_v2.package import _instruments from opentelemetry.instrumentation.openai_v2.utils import ( - is_content_enabled, + get_content_mode, is_latest_experimental_enabled, ) from opentelemetry.instrumentation.utils import unwrap @@ -93,6 +93,7 @@ def _instrument(self, **kwargs): instruments = Instruments(self._meter) + latest_experimental_enabled = is_latest_experimental_enabled() wrap_function_wrapper( module="openai.resources.chat.completions", name="Completions.create", @@ -100,8 +101,8 @@ def _instrument(self, **kwargs): tracer, event_logger, instruments, - is_content_enabled(), - is_latest_experimental_enabled(), + get_content_mode(latest_experimental_enabled), + latest_experimental_enabled, ), ) @@ -112,8 +113,8 @@ def _instrument(self, **kwargs): tracer, event_logger, instruments, - is_content_enabled(), - is_latest_experimental_enabled(), + get_content_mode(latest_experimental_enabled), + latest_experimental_enabled, ), ) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py index dee44648be..153de6d350 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py @@ -31,6 +31,7 @@ from .instruments import Instruments from .utils import ( DataclassEncoder, + ContentCapturingMode, OutputMessage, TextPart, ToolCallRequestPart, @@ -47,7 +48,7 @@ def chat_completions_create( tracer: Tracer, event_logger: EventLogger, instruments: Instruments, - capture_content: bool, + content_mode: ContentCapturingMode, latest_experimental_enabled: bool, ): """Wrap the `create` method of the `ChatCompletion` class to trace it.""" @@ -71,7 +72,7 @@ def traced_method(wrapped, instance, args, kwargs): ) as span: record_input_messages( kwargs.get("messages", []), - capture_content, + content_mode, latest_experimental_enabled, span, event_logger, @@ -87,7 +88,7 @@ def traced_method(wrapped, instance, args, kwargs): result, span, event_logger, - capture_content, + content_mode, latest_experimental_enabled, ) @@ -97,7 +98,7 @@ def traced_method(wrapped, instance, args, kwargs): ) record_output_messages( getattr(result, "choices", []), - capture_content, + content_mode, latest_experimental_enabled, span, event_logger, @@ -128,7 +129,7 @@ def async_chat_completions_create( tracer: Tracer, event_logger: EventLogger, instruments: Instruments, - capture_content: bool, + content_mode: ContentCapturingMode, latest_experimental_enabled: bool, ): """Wrap the `create` method of the `AsyncChatCompletion` class to trace it.""" @@ -152,7 +153,7 @@ async def traced_method(wrapped, instance, args, kwargs): ) as span: record_input_messages( kwargs.get("messages", []), - capture_content, + content_mode, latest_experimental_enabled, span, event_logger, @@ -168,7 +169,7 @@ async def traced_method(wrapped, instance, args, kwargs): result, span, event_logger, - capture_content, + content_mode, latest_experimental_enabled, ) @@ -178,7 +179,7 @@ async def traced_method(wrapped, instance, args, kwargs): ) record_output_messages( getattr(result, "choices", []), - capture_content, + content_mode, latest_experimental_enabled, span, event_logger, @@ -380,14 +381,14 @@ def __init__( stream: Stream, span: Span, event_logger: EventLogger, - capture_content: bool, + content_mode: ContentCapturingMode, latest_experimental_enabled: bool, ): self.stream = stream self.span = span self.choice_buffers = [] self._span_started = False - self.capture_content = capture_content + self.content_mode = content_mode self.latest_experimental_enabled = latest_experimental_enabled self.event_logger = event_logger @@ -443,7 +444,8 @@ def cleanup(self): ) if self.latest_experimental_enabled: - if not self.capture_content: + if (self.content_mode == ContentCapturingMode.NONE or + (self.content_mode == ContentCapturingMode.SPAN and not self.span.is_recording())): pass else: output_messages = [] @@ -454,7 +456,7 @@ def cleanup(self): ) output_messages.append(message) - if self.capture_content and choice.text_content: + if choice.text_content: message.parts.append( TextPart(content="".join(choice.text_content)) ) @@ -472,9 +474,8 @@ def cleanup(self): part.arguments = arguments message.parts.append(part) - # TODO: config between spans and events - # also if spans and span is not recording, let's not do it all - if self.span.is_recording(): + + if self.span.is_recording() and self.content_mode == ContentCapturingMode.SPAN: self.span.set_attribute( "gen_ai.output.messages", json.dumps( @@ -483,16 +484,17 @@ def cleanup(self): cls=DataclassEncoder, ), ) + # TODO: event else: for idx, choice in enumerate(self.choice_buffers): message = {"role": "assistant"} - if self.capture_content and choice.text_content: + if self.content_mode == ContentCapturingMode.EVENT and choice.text_content: message["content"] = "".join(choice.text_content) if choice.tool_calls_buffers: tool_calls = [] for tool_call in choice.tool_calls_buffers: function = {"name": tool_call.function_name} - if self.capture_content: + if self.content_mode == ContentCapturingMode.EVENT: function["arguments"] = "".join( tool_call.arguments ) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py index 322966ca5f..3c8cef81bc 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py @@ -17,6 +17,7 @@ from collections.abc import Iterable from dataclasses import dataclass, field from enum import Enum +import logging from os import environ from typing import Any, List, Mapping, Optional, Union from urllib.parse import urlparse @@ -43,13 +44,28 @@ # TODO: reuse common code OTEL_SEMCONV_STABILITY_OPT_IN = "OTEL_SEMCONV_STABILITY_OPT_IN" +logger = logging.getLogger(__name__) -def is_content_enabled() -> bool: +class ContentCapturingMode(str, Enum): + SPAN = "span" + EVENT = "event" + NONE = "none" + +def get_content_mode(latest_experimental_enabled: bool) -> ContentCapturingMode: capture_content = environ.get( - OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, "false" - ) + OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, "none" + ).lower() - return capture_content.lower() == "true" + if latest_experimental_enabled: + try: + return ContentCapturingMode(capture_content) + except ValueError as ex: + logger.warning("Error when parsing `%s` environment variable: {%s}", OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, str(ex)) + return ContentCapturingMode.NONE + + else: + # back-compat + return ContentCapturingMode.EVENT if capture_content == "true" else ContentCapturingMode.NONE def is_latest_experimental_enabled() -> bool: @@ -61,7 +77,7 @@ def is_latest_experimental_enabled() -> bool: ) -def extract_tool_calls_old(item, capture_content): +def extract_tool_calls_old(item, content_mode: ContentCapturingMode): tool_calls = get_property_value(item, "tool_calls") if tool_calls is None: return None @@ -86,7 +102,7 @@ def extract_tool_calls_old(item, capture_content): tool_call_dict["function"]["name"] = name arguments = get_property_value(func, "arguments") - if capture_content and arguments: + if content_mode == ContentCapturingMode.EVENT and arguments: if isinstance(arguments, str): arguments = arguments.replace("\n", "") tool_call_dict["function"]["arguments"] = arguments @@ -150,13 +166,14 @@ def get_property_value(obj, property_name): def record_input_messages( messages, - capture_content: bool, + content_mode: ContentCapturingMode, latest_experimental_enabled: bool, span: Span, event_logger: EventLogger, ): if latest_experimental_enabled: - if not capture_content: + if (content_mode == ContentCapturingMode.NONE or + (content_mode == ContentCapturingMode.SPAN and not span.is_recording())): return chat_messages = [] @@ -186,19 +203,17 @@ def record_input_messages( chat_message.parts.append(TextPart(content=content)) # continue? - # TODO: config between spans and events - # also if spans and span is not recording, let's not do it all - if span.is_recording(): + if span.is_recording() and content_mode == ContentCapturingMode.SPAN: span.set_attribute( "gen_ai.input.messages", json.dumps( chat_messages, ensure_ascii=False, cls=DataclassEncoder ), ) - + # TODO: events else: for message in messages: - event_logger.emit(_message_to_event(message, capture_content)) + event_logger.emit(_message_to_event(message, content_mode)) def _is_text_part(content: Any) -> bool: @@ -210,13 +225,14 @@ def _is_text_part(content: Any) -> bool: def record_output_messages( choices, - capture_content: bool, + content_mode: ContentCapturingMode, latest_experimental_enabled: bool, span: Span, event_logger: EventLogger, ): if latest_experimental_enabled: - if not capture_content: + if (content_mode == ContentCapturingMode.NONE or + (content_mode == ContentCapturingMode.SPAN and not span.is_recording())): return output_messages = [] @@ -239,22 +255,21 @@ def record_output_messages( if _is_text_part(content): message.parts.append(TextPart(content=content)) - # TODO: config between spans and events - # also if spans and span is not recording, let's not do it all - if span.is_recording(): + + if span.is_recording() and content_mode == ContentCapturingMode.SPAN: span.set_attribute( "gen_ai.output.messages", json.dumps( output_messages, ensure_ascii=False, cls=DataclassEncoder ), ) - + # TODO: events else: for choice in choices: - event_logger.emit(_choice_to_event(choice, capture_content)) + event_logger.emit(_choice_to_event(choice, content_mode)) -def _message_to_event(message, capture_content): +def _message_to_event(message, content_mode: ContentCapturingMode): attributes = { GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value } @@ -262,10 +277,10 @@ def _message_to_event(message, capture_content): content = get_property_value(message, "content") body = {} - if capture_content and content: + if content_mode == ContentCapturingMode.EVENT and content: body["content"] = content if role == "assistant": - tool_calls = extract_tool_calls_old(message, capture_content) + tool_calls = extract_tool_calls_old(message, content_mode) if tool_calls: body = {"tool_calls": tool_calls} elif role == "tool": @@ -280,7 +295,7 @@ def _message_to_event(message, capture_content): ) -def _choice_to_event(choice, capture_content): +def _choice_to_event(choice, content_mode: ContentCapturingMode): attributes = { GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value } @@ -298,11 +313,11 @@ def _choice_to_event(choice, capture_content): else None ) } - tool_calls = extract_tool_calls_old(choice.message, capture_content) + tool_calls = extract_tool_calls_old(choice.message, content_mode) if tool_calls: message["tool_calls"] = tool_calls content = get_property_value(choice.message, "content") - if capture_content and content: + if content_mode == ContentCapturingMode.EVENT and content: message["content"] = content body["message"] = message @@ -376,7 +391,7 @@ def get_llm_request_attributes( ) is not None: if response_format_type == "text": attributes[output_type_attr_key] = ( - "text" # TODO there should be an enum + "text" # TODO there should be an enum in semconv package ) elif ( response_format_type == "json_schema" @@ -385,7 +400,6 @@ def get_llm_request_attributes( attributes[output_type_attr_key] = "json" else: # should never happen with chat completion API - # TODO: internal log pass else: # should never happen with chat completion API diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py index c1242929fe..b3420796bf 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py @@ -118,7 +118,7 @@ def instrument_no_content( latest_experimental_enabled, ): os.environ.update( - {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "False"} + {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "none"} ) os.environ.update( @@ -150,7 +150,7 @@ def instrument_with_content( latest_experimental_enabled, ): os.environ.update( - {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "True"} + {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "span" if latest_experimental_enabled else "True"} ) os.environ.update( @@ -181,7 +181,7 @@ def instrument_with_content_unsampled( latest_experimental_enabled, ): os.environ.update( - {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "True"} + {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "span" if latest_experimental_enabled else "True"} ) os.environ.update( diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_utils.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_utils.py new file mode 100644 index 0000000000..02656f2783 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_utils.py @@ -0,0 +1,135 @@ +import json +from typing import Optional +from opentelemetry.sdk.trace import ReadableSpan +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, + server_attributes as ServerAttributes, +) + +from openai.resources.chat.completions import ChatCompletion + +def assert_all_attributes( + span: ReadableSpan, + request_model: str, + latest_experimental_enabled: bool, + response_id: str = None, + response_model: str = None, + input_tokens: Optional[int] = None, + output_tokens: Optional[int] = None, + operation_name: str = "chat", + server_address: str = "api.openai.com", +): + assert span.name == f"{operation_name} {request_model}" + assert ( + operation_name + == span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + ) + provider_name_attr_name = "gen_ai.provider.name" if latest_experimental_enabled else GenAIAttributes.GEN_AI_SYSTEM + assert ( + GenAIAttributes.GenAiSystemValues.OPENAI.value + == span.attributes[provider_name_attr_name] + ) + assert ( + request_model == span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] + ) + if response_model: + assert ( + response_model + == span.attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] + ) + else: + assert GenAIAttributes.GEN_AI_RESPONSE_MODEL not in span.attributes + + if response_id: + assert ( + response_id == span.attributes[GenAIAttributes.GEN_AI_RESPONSE_ID] + ) + else: + assert GenAIAttributes.GEN_AI_RESPONSE_ID not in span.attributes + + if input_tokens: + assert ( + input_tokens + == span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] + ) + else: + assert GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS not in span.attributes + + if output_tokens: + assert ( + output_tokens + == span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] + ) + else: + assert ( + GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS not in span.attributes + ) + + assert server_address == span.attributes[ServerAttributes.SERVER_ADDRESS] + + +def assert_log_parent(log, span): + if span: + assert log.log_record.trace_id == span.get_span_context().trace_id + assert log.log_record.span_id == span.get_span_context().span_id + assert ( + log.log_record.trace_flags == span.get_span_context().trace_flags + ) + + +def get_current_weather_tool_definition(): + return { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. Boston, MA", + }, + }, + "required": ["location"], + "additionalProperties": False, + }, + }, + } + +def remove_none_values(body): + result = {} + for key, value in body.items(): + if value is None: + continue + if isinstance(value, dict): + result[key] = remove_none_values(value) + elif isinstance(value, list): + result[key] = [remove_none_values(i) for i in value] + else: + result[key] = value + return result + + +def assert_completion_attributes( + span: ReadableSpan, + request_model: str, + response: ChatCompletion, + latest_experimental_enabled: bool, + operation_name: str = "chat", + server_address: str = "api.openai.com", +): + return assert_all_attributes( + span, + request_model, + latest_experimental_enabled, + response.id, + response.model, + response.usage.prompt_tokens, + response.usage.completion_tokens, + operation_name, + server_address, + ) + +def assert_messages_attribute(actual, expected): + assert json.loads(actual) == expected \ No newline at end of file From 1b725b49d683b754034813354e6581a95bb223e9 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Wed, 27 Aug 2025 11:41:23 -0700 Subject: [PATCH 3/5] remove unintentional changes --- .../examples/manual/main.py | 47 +++++-------------- 1 file changed, 11 insertions(+), 36 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/main.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/main.py index 1f642ca264..4b0c121b7a 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/main.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/main.py @@ -4,13 +4,10 @@ from openai import OpenAI # NOTE: OpenTelemetry Python Logs and Events APIs are in beta -from opentelemetry import _events, _logs, metrics, trace +from opentelemetry import _events, _logs, trace from opentelemetry.exporter.otlp.proto.grpc._log_exporter import ( OTLPLogExporter, ) -from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( - OTLPMetricExporter, -) from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( OTLPSpanExporter, ) @@ -18,8 +15,6 @@ from opentelemetry.sdk._events import EventLoggerProvider from opentelemetry.sdk._logs import LoggerProvider from opentelemetry.sdk._logs.export import BatchLogRecordProcessor -from opentelemetry.sdk.metrics import MeterProvider -from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor @@ -36,42 +31,22 @@ ) _events.set_event_logger_provider(EventLoggerProvider()) -# configure metrics -metrics.set_meter_provider( - MeterProvider( - metric_readers=[ - PeriodicExportingMetricReader( - OTLPMetricExporter(), - ), - ] - ) -) - -from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor - -HTTPXClientInstrumentor().instrument() - # instrument OpenAI OpenAIInstrumentor().instrument() -tracer = trace.get_tracer(__name__) - def main(): client = OpenAI() - - for u in range(10): - with tracer.start_as_current_span("main"): - chat_completion = client.chat.completions.create( - model=os.getenv("CHAT_MODEL", "gpt-4o-mini"), - messages=[ - { - "role": "user", - "content": "Write a haiku on OpenTelemetry.", - }, - ], - ) - print(chat_completion.choices[0].message.content) + chat_completion = client.chat.completions.create( + model=os.getenv("CHAT_MODEL", "gpt-4o-mini"), + messages=[ + { + "role": "user", + "content": "Write a short poem on OpenTelemetry.", + }, + ], + ) + print(chat_completion.choices[0].message.content) if __name__ == "__main__": From 996cda8d05c80e99e790c29536f8bcd7de33bd59 Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Wed, 27 Aug 2025 16:18:43 -0700 Subject: [PATCH 4/5] implement details event and add tests --- .../README.rst | 2 + .../examples/manual/.env | 18 +- .../examples/zero-code/.env | 18 +- .../examples/zero-code/README.rst | 1 + .../instrumentation/openai_v2/patch.py | 265 ++++++--- .../instrumentation/openai_v2/utils.py | 241 +++++--- .../tests/conftest.py | 11 +- .../tests/test_async_chat_completions.py | 511 +++++++++-------- .../tests/test_chat_completions.py | 518 ++++++++---------- .../tests/test_chat_metrics.py | 13 +- .../tests/test_utils.py | 234 ++++++-- 11 files changed, 1081 insertions(+), 751 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst index 32de3ed255..0775c59afd 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst @@ -82,6 +82,8 @@ Message content such as the contents of the prompt, completion, function argumen are not captured by default. To capture message content as log events, set the environment variable `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` to `true`. +TODO! + Uninstrument ************ diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/.env b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/.env index 1e77ee78c0..16e686f44a 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/.env +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/.env @@ -12,5 +12,19 @@ OPENAI_API_KEY=sk-YOUR_API_KEY OTEL_SERVICE_NAME=opentelemetry-python-openai -# Change to 'false' to hide prompt and completion content -OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true +# Remove or change to 'none' to hide prompt and completion content +# Possible values (case insensitive): +# - `span` - record content on span attibutes +# - `event` - record content on event attributes +# - `true` - only used for backward compatibility when +# `gen_ai_latest_experimental` is not set in the +# `OTEL_SEMCONV_STABILITY_OPT_IN` environemnt variable. +# - everything else - don't record content on any signal +OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=span + +# Enables latest and greatest features available in GenAI semantic conventions. +# Note: since conventions are still in development, using this flag would +# likely result in having breaking changes. +# +# Comment out if you want to use semantic conventions of version 1.36.0. +OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/zero-code/.env b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/zero-code/.env index 8f2dd62b91..489353a1b1 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/zero-code/.env +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/zero-code/.env @@ -18,5 +18,19 @@ OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED=true # Uncomment if your OTLP endpoint doesn't support logs # OTEL_LOGS_EXPORTER=console -# Change to 'false' to hide prompt and completion content -OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true +# Remove or change to 'none' to hide prompt and completion content +# Possible values (case insensitive): +# - `span` - record content on span attibutes +# - `event` - record content on event attributes +# - `true` - only used for backward compatibility when +# `gen_ai_latest_experimental` is not set in the +# `OTEL_SEMCONV_STABILITY_OPT_IN` environemnt variable. +# - everything else - don't record content on any signal +OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=span + +# Enables latest and greatest features available in GenAI semantic conventions. +# Note: since conventions are still in development, using this flag would +# likely result in having breaking changes. +# +# Comment out if you want to use semantic conventions of version 1.36.0. +OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/zero-code/README.rst b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/zero-code/README.rst index 4332c0b7c0..51d2f50a7f 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/zero-code/README.rst +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/zero-code/README.rst @@ -16,6 +16,7 @@ Note: `.env <.env>`_ file configures additional environment variables: - ``OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true`` configures OpenAI instrumentation to capture prompt and completion contents on events. - ``OTEL_LOGS_EXPORTER=otlp`` to specify exporter type. +TODO! Setup ----- diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py index 153de6d350..434450d9a0 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py @@ -15,7 +15,7 @@ import json from timeit import default_timer -from typing import Optional +from typing import List, Optional from openai import Stream @@ -30,17 +30,18 @@ from .instruments import Instruments from .utils import ( - DataclassEncoder, ContentCapturingMode, + DataclassEncoder, OutputMessage, TextPart, ToolCallRequestPart, + create_details_event_attributes, get_llm_request_attributes, - handle_span_exception, is_streaming, + record_exception, record_input_messages, record_output_messages, - set_span_attribute, + set_attribute, ) @@ -62,6 +63,9 @@ def traced_method(wrapped, instance, args, kwargs): latest_experimental_enabled, ) } + details_event_attributes = create_details_event_attributes( + span_attributes, latest_experimental_enabled, content_mode + ) span_name = f"{span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]} {span_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL]}" with tracer.start_as_current_span( @@ -75,6 +79,7 @@ def traced_method(wrapped, instance, args, kwargs): content_mode, latest_experimental_enabled, span, + details_event_attributes, event_logger, ) @@ -87,29 +92,46 @@ def traced_method(wrapped, instance, args, kwargs): return StreamWrapper( result, span, + details_event_attributes, event_logger, content_mode, latest_experimental_enabled, ) - if span.is_recording(): - _set_response_attributes( - span, result, latest_experimental_enabled - ) + _set_response_attributes( + span, + details_event_attributes, + result, + latest_experimental_enabled, + ) record_output_messages( getattr(result, "choices", []), content_mode, latest_experimental_enabled, span, + details_event_attributes, event_logger, ) + if details_event_attributes: + event_logger.emit( + Event( + name="gen_ai.client.inference.operation.details", + attributes=details_event_attributes, + trace_id=span.get_span_context().trace_id, + span_id=span.get_span_context().span_id, + trace_flags=span.get_span_context().trace_flags, + ) + ) + span.end() return result except Exception as error: error_type = type(error).__qualname__ - handle_span_exception(span, error) + record_exception( + span, details_event_attributes, error, event_logger + ) raise finally: duration = max((default_timer() - start), 0) @@ -144,6 +166,10 @@ async def traced_method(wrapped, instance, args, kwargs): ) } + details_event_attributes = create_details_event_attributes( + span_attributes, latest_experimental_enabled, content_mode + ) + span_name = f"{span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]} {span_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL]}" with tracer.start_as_current_span( name=span_name, @@ -156,6 +182,7 @@ async def traced_method(wrapped, instance, args, kwargs): content_mode, latest_experimental_enabled, span, + details_event_attributes, event_logger, ) @@ -168,29 +195,45 @@ async def traced_method(wrapped, instance, args, kwargs): return StreamWrapper( result, span, + details_event_attributes, event_logger, content_mode, latest_experimental_enabled, ) - if span.is_recording(): - _set_response_attributes( - span, result, latest_experimental_enabled - ) + _set_response_attributes( + span, + details_event_attributes, + result, + latest_experimental_enabled, + ) record_output_messages( getattr(result, "choices", []), content_mode, latest_experimental_enabled, span, + details_event_attributes, event_logger, ) + if details_event_attributes: + event_logger.emit( + Event( + name="gen_ai.client.inference.operation.details", + attributes=details_event_attributes, + trace_id=span.get_span_context().trace_id, + span_id=span.get_span_context().span_id, + trace_flags=span.get_span_context().trace_flags, + ) + ) span.end() return result except Exception as error: error_type = type(error).__qualname__ - handle_span_exception(span, error) + record_exception( + span, details_event_attributes, error, event_logger + ) raise finally: duration = max((default_timer() - start), 0) @@ -286,24 +329,39 @@ def _record_metrics( ) -def _set_response_attributes(span, result, latest_experimental_enabled: bool): - set_span_attribute( - span, GenAIAttributes.GEN_AI_RESPONSE_MODEL, result.model +def _set_response_attributes( + span, details_event_attributes, result, latest_experimental_enabled: bool +): + if not span.is_recording() and details_event_attributes is None: + return + + set_attribute( + span, + details_event_attributes, + GenAIAttributes.GEN_AI_RESPONSE_MODEL, + result.model, ) + # finish reasons if getattr(result, "choices", None): finish_reasons = [] for choice in result.choices: finish_reasons.append(choice.finish_reason or "error") - set_span_attribute( + set_attribute( span, + details_event_attributes, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, finish_reasons, ) if getattr(result, "id", None): - set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_ID, result.id) + set_attribute( + span, + details_event_attributes, + GenAIAttributes.GEN_AI_RESPONSE_ID, + result.id, + ) service_tier_attr_key = ( "openai.response.service_tier" @@ -311,21 +369,24 @@ def _set_response_attributes(span, result, latest_experimental_enabled: bool): else GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER ) if getattr(result, "service_tier", None): - set_span_attribute( + set_attribute( span, + details_event_attributes, service_tier_attr_key, result.service_tier, ) # Get the usage if getattr(result, "usage", None): - set_span_attribute( + set_attribute( span, + details_event_attributes, GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS, result.usage.prompt_tokens, ) - set_span_attribute( + set_attribute( span, + details_event_attributes, GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS, result.usage.completion_tokens, ) @@ -369,6 +430,7 @@ def append_tool_call(self, tool_call): class StreamWrapper: span: Span + details_event_attributes: Optional[dict] response_id: Optional[str] = None response_model: Optional[str] = None service_tier: Optional[str] = None @@ -380,12 +442,14 @@ def __init__( self, stream: Stream, span: Span, + details_event_attributes: dict, event_logger: EventLogger, content_mode: ContentCapturingMode, latest_experimental_enabled: bool, ): self.stream = stream self.span = span + self.details_event_attributes = details_event_attributes self.choice_buffers = [] self._span_started = False self.content_mode = content_mode @@ -400,28 +464,32 @@ def setup(self): def cleanup(self): if self._span_started: - if self.span.is_recording(): + if self.span.is_recording() or self.details_event_attributes: if self.response_model: - set_span_attribute( + set_attribute( self.span, + self.details_event_attributes, GenAIAttributes.GEN_AI_RESPONSE_MODEL, self.response_model, ) if self.response_id: - set_span_attribute( + set_attribute( self.span, + self.details_event_attributes, GenAIAttributes.GEN_AI_RESPONSE_ID, self.response_id, ) - set_span_attribute( + set_attribute( self.span, + self.details_event_attributes, GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS, self.prompt_tokens, ) - set_span_attribute( + set_attribute( self.span, + self.details_event_attributes, GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS, self.completion_tokens, ) @@ -431,64 +499,66 @@ def cleanup(self): if self.latest_experimental_enabled else GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER ) - set_span_attribute( + set_attribute( self.span, + self.details_event_attributes, service_tier_attr_key, self.service_tier, ) - set_span_attribute( + set_attribute( self.span, + self.details_event_attributes, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, self.finish_reasons, ) if self.latest_experimental_enabled: - if (self.content_mode == ContentCapturingMode.NONE or - (self.content_mode == ContentCapturingMode.SPAN and not self.span.is_recording())): - pass - else: - output_messages = [] - for choice in self.choice_buffers: - message = OutputMessage( - finish_reason=choice.finish_reason or "error", - role="assistant", + if ( + self.content_mode == ContentCapturingMode.SPAN + and self.span.is_recording() + ): + output_messages = self._prepare_output_messages() + + self.span.set_attribute( + "gen_ai.output.messages", + json.dumps( + output_messages, + ensure_ascii=False, + cls=DataclassEncoder, + ), + ) + # TODO: once logger.enabled is supported, we should use it to optimize + # and, when enabled, can record event even when content is disabled + if ( + self.content_mode == ContentCapturingMode.EVENT + and self.details_event_attributes is not None + ): + output_messages = self._prepare_output_messages() + self.details_event_attributes["gen_ai.output.messages"] = ( + json.dumps( + output_messages, + ensure_ascii=False, + cls=DataclassEncoder, ) - output_messages.append(message) - - if choice.text_content: - message.parts.append( - TextPart(content="".join(choice.text_content)) - ) - if choice.tool_calls_buffers: - for tool_call in choice.tool_calls_buffers: - part = ToolCallRequestPart( - name=tool_call.function_name, - id=tool_call.tool_call_id, - ) - arguments = "".join(tool_call.arguments) - if arguments: - try: - part.arguments = json.loads(arguments) - except json.JSONDecodeError: - part.arguments = arguments - - message.parts.append(part) - - if self.span.is_recording() and self.content_mode == ContentCapturingMode.SPAN: - self.span.set_attribute( - "gen_ai.output.messages", - json.dumps( - output_messages, - ensure_ascii=False, - cls=DataclassEncoder, - ), + ) + + self.event_logger.emit( + Event( + name="gen_ai.client.inference.operation.details", + attributes=self.details_event_attributes, + trace_id=self.span.get_span_context().trace_id, + span_id=self.span.get_span_context().span_id, + trace_flags=self.span.get_span_context().trace_flags, ) - # TODO: event + ) else: for idx, choice in enumerate(self.choice_buffers): message = {"role": "assistant"} - if self.content_mode == ContentCapturingMode.EVENT and choice.text_content: + if ( + self.content_mode == ContentCapturingMode.EVENT + and choice.text_content + ): message["content"] = "".join(choice.text_content) if choice.tool_calls_buffers: tool_calls = [] @@ -532,6 +602,35 @@ def cleanup(self): self.span.end() self._span_started = False + def _prepare_output_messages(self) -> List[OutputMessage]: + output_messages = [] + for choice in self.choice_buffers: + message = OutputMessage( + finish_reason=choice.finish_reason or "error", + role="assistant", + ) + output_messages.append(message) + + if choice.text_content: + message.parts.append( + TextPart(content="".join(choice.text_content)) + ) + if choice.tool_calls_buffers: + for tool_call in choice.tool_calls_buffers: + part = ToolCallRequestPart( + name=tool_call.function_name, + id=tool_call.tool_call_id, + ) + arguments = "".join(tool_call.arguments) + if arguments: + try: + part.arguments = json.loads(arguments) + except json.JSONDecodeError: + part.arguments = arguments + + message.parts.append(part) + return output_messages + def __enter__(self): self.setup() return self @@ -539,7 +638,12 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): try: if exc_type is not None: - handle_span_exception(self.span, exc_val) + record_exception( + self.span, + self.details_event_attributes, + exc_val, + self.event_logger, + ) finally: self.cleanup() return False # Propagate the exception @@ -551,7 +655,12 @@ async def __aenter__(self): async def __aexit__(self, exc_type, exc_val, exc_tb): try: if exc_type is not None: - handle_span_exception(self.span, exc_val) + record_exception( + self.span, + self.details_event_attributes, + exc_val, + self.event_logger, + ) finally: self.cleanup() return False # Propagate the exception @@ -575,7 +684,12 @@ def __next__(self): self.cleanup() raise except Exception as error: - handle_span_exception(self.span, error) + record_exception( + self.span, + self.details_event_attributes, + error, + self.event_logger, + ) self.cleanup() raise @@ -588,7 +702,12 @@ async def __anext__(self): self.cleanup() raise except Exception as error: - handle_span_exception(self.span, error) + record_exception( + self.span, + self.details_event_attributes, + error, + self.event_logger, + ) self.cleanup() raise diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py index 3c8cef81bc..79549b8f51 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py @@ -14,10 +14,10 @@ import dataclasses import json +import logging from collections.abc import Iterable from dataclasses import dataclass, field from enum import Enum -import logging from os import environ from typing import Any, List, Mapping, Optional, Union from urllib.parse import urlparse @@ -46,12 +46,16 @@ logger = logging.getLogger(__name__) + class ContentCapturingMode(str, Enum): SPAN = "span" EVENT = "event" NONE = "none" - -def get_content_mode(latest_experimental_enabled: bool) -> ContentCapturingMode: + + +def get_content_mode( + latest_experimental_enabled: bool, +) -> ContentCapturingMode: capture_content = environ.get( OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, "none" ).lower() @@ -60,12 +64,20 @@ def get_content_mode(latest_experimental_enabled: bool) -> ContentCapturingMode: try: return ContentCapturingMode(capture_content) except ValueError as ex: - logger.warning("Error when parsing `%s` environment variable: {%s}", OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, str(ex)) + logger.warning( + "Error when parsing `%s` environment variable: {%s}", + OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, + str(ex), + ) return ContentCapturingMode.NONE - + else: # back-compat - return ContentCapturingMode.EVENT if capture_content == "true" else ContentCapturingMode.NONE + return ( + ContentCapturingMode.EVENT + if capture_content == "true" + else ContentCapturingMode.NONE + ) def is_latest_experimental_enabled() -> bool: @@ -77,6 +89,29 @@ def is_latest_experimental_enabled() -> bool: ) +def create_details_event_attributes( + request_attributes: dict, + latest_experimental_enabled: bool, + content_mode: ContentCapturingMode, +): + # TODO: once logger.enabled is supported, we should use it to optimize + # and, when enabled, can record event even when content is disabled + # for now, let's only enable event when user enabled content on events. + details_event_attributes = ( + request_attributes.copy() + if latest_experimental_enabled + and content_mode == ContentCapturingMode.EVENT + else None + ) + # TODO: switch to proper event name once possible + if details_event_attributes: + details_event_attributes["event.name"] = ( + "gen_ai.client.inference.operation.details" + ) + + return details_event_attributes + + def extract_tool_calls_old(item, content_mode: ContentCapturingMode): tool_calls = get_property_value(item, "tool_calls") if tool_calls is None: @@ -169,53 +204,70 @@ def record_input_messages( content_mode: ContentCapturingMode, latest_experimental_enabled: bool, span: Span, + details_event_attributes: dict, event_logger: EventLogger, ): if latest_experimental_enabled: - if (content_mode == ContentCapturingMode.NONE or - (content_mode == ContentCapturingMode.SPAN and not span.is_recording())): + if ( + content_mode == ContentCapturingMode.NONE + or ( + content_mode == ContentCapturingMode.SPAN + and not span.is_recording() + ) + or ( + content_mode == ContentCapturingMode.EVENT + and details_event_attributes is None + ) + ): return - chat_messages = [] - for message in messages: - role = get_property_value(message, "role") - chat_message = ChatMessage(role=role, parts=[]) - chat_messages.append(chat_message) - - content = get_property_value(message, "content") - - if role == "assistant": - tool_calls = get_property_value(message, "tool_calls") - if tool_calls: - chat_message.parts += extract_tool_calls_new(tool_calls) - if _is_text_part(content): - chat_message.parts.append(TextPart(content=content)) - - elif role == "tool": - tool_call_id = get_property_value(message, "tool_call_id") - chat_message.parts.append( - ToolCallResponsePart(id=tool_call_id, response=content) - ) - - else: - # system, developer, user, fallback - if _is_text_part(content): - chat_message.parts.append(TextPart(content=content)) - # continue? + chat_messages = json.dumps( + _prepare_input_messages(messages), + ensure_ascii=False, + cls=DataclassEncoder, + ) if span.is_recording() and content_mode == ContentCapturingMode.SPAN: - span.set_attribute( - "gen_ai.input.messages", - json.dumps( - chat_messages, ensure_ascii=False, cls=DataclassEncoder - ), - ) - # TODO: events + span.set_attribute("gen_ai.input.messages", chat_messages) + elif ( + details_event_attributes is not None + and content_mode == ContentCapturingMode.EVENT + ): + details_event_attributes["gen_ai.input.messages"] = chat_messages else: for message in messages: event_logger.emit(_message_to_event(message, content_mode)) +def _prepare_input_messages(messages) -> List["ChatMessage"]: + chat_messages = [] + for message in messages: + role = get_property_value(message, "role") + chat_message = ChatMessage(role=role, parts=[]) + chat_messages.append(chat_message) + + content = get_property_value(message, "content") + + if role == "assistant": + tool_calls = get_property_value(message, "tool_calls") + if tool_calls: + chat_message.parts += extract_tool_calls_new(tool_calls) + if _is_text_part(content): + chat_message.parts.append(TextPart(content=content)) + + elif role == "tool": + tool_call_id = get_property_value(message, "tool_call_id") + chat_message.parts.append( + ToolCallResponsePart(id=tool_call_id, response=content) + ) + + else: + # system, developer, user, fallback + if _is_text_part(content): + chat_message.parts.append(TextPart(content=content)) + return chat_messages + + def _is_text_part(content: Any) -> bool: return isinstance(content, str) or ( isinstance(content, Iterable) @@ -228,47 +280,57 @@ def record_output_messages( content_mode: ContentCapturingMode, latest_experimental_enabled: bool, span: Span, + event_attributes: dict, event_logger: EventLogger, ): if latest_experimental_enabled: - if (content_mode == ContentCapturingMode.NONE or - (content_mode == ContentCapturingMode.SPAN and not span.is_recording())): + if content_mode == ContentCapturingMode.NONE or ( + content_mode == ContentCapturingMode.SPAN + and not span.is_recording() + ): return - output_messages = [] - for choice in choices: - message = OutputMessage( - finish_reason=choice.finish_reason or "error", - role=( - choice.message.role - if choice.message and choice.message.role - else None - ), - ) - output_messages.append(message) - - if choice.message: - tool_calls = get_property_value(choice.message, "tool_calls") - if tool_calls: - message.parts += extract_tool_calls_new(tool_calls) - content = get_property_value(choice.message, "content") - if _is_text_part(content): - message.parts.append(TextPart(content=content)) - + output_messages = json.dumps( + _prepare_output_messages(choices), + ensure_ascii=False, + cls=DataclassEncoder, + ) - if span.is_recording() and content_mode == ContentCapturingMode.SPAN: - span.set_attribute( - "gen_ai.output.messages", - json.dumps( - output_messages, ensure_ascii=False, cls=DataclassEncoder - ), - ) - # TODO: events + if content_mode == ContentCapturingMode.SPAN: + span.set_attribute("gen_ai.output.messages", output_messages) + elif ( + content_mode == ContentCapturingMode.EVENT + and event_attributes is not None + ): + event_attributes["gen_ai.output.messages"] = output_messages else: for choice in choices: event_logger.emit(_choice_to_event(choice, content_mode)) +def _prepare_output_messages(choices) -> List["OutputMessage"]: + output_messages = [] + for choice in choices: + message = OutputMessage( + finish_reason=choice.finish_reason or "error", + role=( + choice.message.role + if choice.message and choice.message.role + else None + ), + ) + output_messages.append(message) + + if choice.message: + tool_calls = get_property_value(choice.message, "tool_calls") + if tool_calls: + message.parts += extract_tool_calls_new(tool_calls) + content = get_property_value(choice.message, "content") + if _is_text_part(content): + message.parts.append(TextPart(content=content)) + return output_messages + + def _message_to_event(message, content_mode: ContentCapturingMode): attributes = { GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value @@ -328,16 +390,16 @@ def _choice_to_event(choice, content_mode: ContentCapturingMode): ) -def set_span_attributes(span, attributes: dict): - for field, value in attributes.model_dump(by_alias=True).items(): - set_span_attribute(span, field, value) - +def set_attribute(span, event_attributes, name, value): + if not span.is_recording() and event_attributes is None: + return -def set_span_attribute(span, name, value): if non_numerical_value_is_set(value) is False: return span.set_attribute(name, value) + if event_attributes is not None: + event_attributes[name] = value def is_streaming(kwargs): @@ -427,12 +489,25 @@ def get_llm_request_attributes( return {k: v for k, v in attributes.items() if v is not None} -def handle_span_exception(span, error): - span.set_status(Status(StatusCode.ERROR, str(error))) +def record_exception(span, details_event_attributes, error, event_logger): if span.is_recording(): + span.set_status(Status(StatusCode.ERROR, str(error))) span.set_attribute( ErrorAttributes.ERROR_TYPE, type(error).__qualname__ ) + if details_event_attributes: + details_event_attributes[ErrorAttributes.ERROR_TYPE] = type( + error + ).__qualname__ + event_logger.emit( + Event( + name="gen_ai.client.inference.operation.details", + attributes=details_event_attributes, + trace_id=span.get_span_context().trace_id, + span_id=span.get_span_context().span_id, + trace_flags=span.get_span_context().trace_flags, + ) + ) span.end() @@ -483,11 +558,6 @@ class ChatMessage: parts: List[MessagePart] = field(default_factory=list) -@dataclass -class InputMessages: - messages: List[ChatMessage] = field(default_factory=list) - - class FinishReason(str, Enum): STOP = "stop" LENGTH = "length" @@ -501,11 +571,6 @@ class OutputMessage(ChatMessage): finish_reason: Union[FinishReason, str] = "" -@dataclass -class OutputMessages: - messages: List[OutputMessage] = field(default_factory=list) - - class DataclassEncoder(json.JSONEncoder): def default(self, obj): if dataclasses.is_dataclass(obj): diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py index b3420796bf..ee67ca3b34 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py @@ -110,6 +110,11 @@ def latest_experimental_enabled(request): return request.param +@pytest.fixture(scope="function", params=["span", "event"]) +def content_mode(request, latest_experimental_enabled): + return request.param if latest_experimental_enabled else "True" + + @pytest.fixture(scope="function") def instrument_no_content( tracer_provider, @@ -148,9 +153,10 @@ def instrument_with_content( event_logger_provider, meter_provider, latest_experimental_enabled, + content_mode, ): os.environ.update( - {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "span" if latest_experimental_enabled else "True"} + {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: content_mode} ) os.environ.update( @@ -179,9 +185,10 @@ def instrument_with_content_unsampled( event_logger_provider, meter_provider, latest_experimental_enabled, + content_mode, ): os.environ.update( - {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "span" if latest_experimental_enabled else "True"} + {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: content_mode} ) os.environ.update( diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_async_chat_completions.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_async_chat_completions.py index 617beaa442..ed1f9d7d58 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_async_chat_completions.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_async_chat_completions.py @@ -33,10 +33,16 @@ server_attributes as ServerAttributes, ) from tests.test_utils import ( + DEFAULT_MODEL, + USER_ONLY_EXPECTED_INPUT_MESSAGES, + USER_ONLY_PROMPT, + WEATHER_TOOL_EXPECTED_INPUT_MESSAGES, + WEATHER_TOOL_PROMPT, assert_all_attributes, assert_completion_attributes, assert_log_parent, assert_messages_attribute, + format_simple_expected_output_message, get_current_weather_tool_definition, remove_none_values, ) @@ -49,57 +55,46 @@ async def test_async_chat_completion_with_content( log_exporter, async_openai_client, instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette("test_async_chat_completion_with_content.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] response = await async_openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, stream=False + messages=USER_ONLY_PROMPT, model=DEFAULT_MODEL, stream=False ) spans = span_exporter.get_finished_spans() + logs = log_exporter.get_finished_logs() + if content_mode == "event": + assert len(logs) == 1 assert_completion_attributes( - spans[0], llm_model_value, response, latest_experimental_enabled + spans[0], + logs[0].log_record if content_mode == "event" else None, + DEFAULT_MODEL, + response, + latest_experimental_enabled, ) if latest_experimental_enabled: + signal = ( + logs[0].log_record if content_mode == "event" else spans[0] + ) assert_messages_attribute( - spans[0].attributes["gen_ai.input.messages"], - [ - { - "role": "user", - "parts": [ - { - "type": "text", - "content": messages_value[0]["content"], - } - ], - } - ], + signal.attributes["gen_ai.input.messages"], + USER_ONLY_EXPECTED_INPUT_MESSAGES, ) assert_messages_attribute( - spans[0].attributes["gen_ai.output.messages"], - [ - { - "role": "assistant", - "parts": [ - { - "type": "text", - "content": response.choices[0].message.content, - } - ], - "finish_reason": "stop", - } - ], + signal.attributes["gen_ai.output.messages"], + format_simple_expected_output_message( + response.choices[0].message.content + ), ) else: - logs = log_exporter.get_finished_logs() assert len(logs) == 2 - user_message = {"content": messages_value[0]["content"]} + user_message = {"content": USER_ONLY_PROMPT[0]["content"]} assert_message_in_logs( logs[0], "gen_ai.user.message", user_message, spans[0] ) @@ -128,16 +123,18 @@ async def test_async_chat_completion_no_content( ): with vcr.use_cassette("test_async_chat_completion_no_content.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] response = await async_openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, stream=False + messages=USER_ONLY_PROMPT, model=DEFAULT_MODEL, stream=False ) spans = span_exporter.get_finished_spans() assert_completion_attributes( - spans[0], llm_model_value, response, latest_experimental_enabled + spans[0], + None, + DEFAULT_MODEL, + response, + latest_experimental_enabled, ) logs = log_exporter.get_finished_logs() @@ -170,22 +167,21 @@ async def test_async_chat_completion_bad_endpoint( ): with vcr.use_cassette("test_async_chat_completion_bad_endpoint.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] client = AsyncOpenAI(base_url="http://localhost:4242") with pytest.raises(APIConnectionError): await client.chat.completions.create( - messages=messages_value, - model=llm_model_value, + messages=USER_ONLY_PROMPT, + model=DEFAULT_MODEL, timeout=0.1, ) spans = span_exporter.get_finished_spans() assert_all_attributes( spans[0], - llm_model_value, + None, + DEFAULT_MODEL, latest_experimental_enabled, server_address="localhost", ) @@ -206,18 +202,17 @@ async def test_async_chat_completion_404( ): with vcr.use_cassette("test_async_chat_completion_404.yaml"): llm_model_value = "this-model-does-not-exist" - messages_value = [{"role": "user", "content": "Say this is a test"}] with pytest.raises(NotFoundError): await async_openai_client.chat.completions.create( - messages=messages_value, + messages=USER_ONLY_PROMPT, model=llm_model_value, ) spans = span_exporter.get_finished_spans() assert_all_attributes( - spans[0], llm_model_value, is_latest_experimental_enabled() + spans[0], None, llm_model_value, is_latest_experimental_enabled() ) assert ( "NotFoundError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE] @@ -228,18 +223,18 @@ async def test_async_chat_completion_404( @pytest.mark.asyncio() async def test_async_chat_completion_extra_params( span_exporter, + log_exporter, async_openai_client, - instrument_no_content, + instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette("test_async_chat_completion_extra_params.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] response = await async_openai_client.chat.completions.create( - messages=messages_value, - model=llm_model_value, + messages=USER_ONLY_PROMPT, + model=DEFAULT_MODEL, seed=42, temperature=0.5, max_tokens=50, @@ -249,8 +244,16 @@ async def test_async_chat_completion_extra_params( ) spans = span_exporter.get_finished_spans() + logs = log_exporter.get_finished_logs() + if content_mode == "event": + assert len(logs) == 1 + assert_completion_attributes( - spans[0], llm_model_value, response, latest_experimental_enabled + spans[0], + logs[0].log_record if content_mode == "event" else None, + DEFAULT_MODEL, + response, + latest_experimental_enabled, ) request_seed_attr_key = ( @@ -290,53 +293,63 @@ async def test_async_chat_completion_multiple_choices( log_exporter, async_openai_client, instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette("test_async_chat_completion_multiple_choices.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] response = await async_openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, n=2, stream=False + messages=USER_ONLY_PROMPT, model=DEFAULT_MODEL, n=2, stream=False ) spans = span_exporter.get_finished_spans() + logs = log_exporter.get_finished_logs() + if content_mode == "event": + assert len(logs) == 1 assert_completion_attributes( - spans[0], llm_model_value, response, latest_experimental_enabled + spans[0], + logs[0].log_record if content_mode == "event" else None, + DEFAULT_MODEL, + response, + latest_experimental_enabled, ) if latest_experimental_enabled: + expected_output_messages = [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": response.choices[0].message.content, + } + ], + "finish_reason": "stop", + }, + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": response.choices[1].message.content, + } + ], + "finish_reason": "stop", + }, + ] + + signal = ( + logs[0].log_record if content_mode == "event" else spans[0] + ) assert_messages_attribute( - spans[0].attributes["gen_ai.output.messages"], - [ - { - "role": "assistant", - "parts": [ - { - "type": "text", - "content": response.choices[0].message.content, - } - ], - "finish_reason": "stop", - }, - { - "role": "assistant", - "parts": [ - { - "type": "text", - "content": response.choices[1].message.content, - } - ], - "finish_reason": "stop", - }, - ], + signal.attributes["gen_ai.output.messages"], + expected_output_messages, ) else: - logs = log_exporter.get_finished_logs() assert len(logs) == 3 # 1 user message + 2 choice messages - user_message = {"content": messages_value[0]["content"]} + user_message = {"content": USER_ONLY_PROMPT[0]["content"]} assert_message_in_logs( logs[0], "gen_ai.user.message", user_message, spans[0] ) @@ -373,6 +386,7 @@ async def test_async_chat_completion_tool_calls_with_content( log_exporter, async_openai_client, instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette( @@ -383,6 +397,7 @@ async def test_async_chat_completion_tool_calls_with_content( log_exporter, async_openai_client, True, + content_mode, is_latest_experimental_enabled(), ) @@ -404,6 +419,7 @@ async def test_async_chat_completion_tool_calls_no_content( log_exporter, async_openai_client, False, + None, is_latest_experimental_enabled(), ) @@ -413,20 +429,14 @@ async def chat_completion_tool_call( log_exporter, async_openai_client, expect_content, + content_mode, latest_experimental_enabled, ): - llm_model_value = "gpt-4o-mini" - messages_value = [ - {"role": "system", "content": "You're a helpful assistant."}, - { - "role": "user", - "content": "What's the weather in Seattle and San Francisco today?", - }, - ] + messages_value = WEATHER_TOOL_PROMPT.copy() response_0 = await async_openai_client.chat.completions.create( messages=messages_value, - model=llm_model_value, + model=DEFAULT_MODEL, tool_choice="auto", tools=[get_current_weather_tool_definition()], ) @@ -459,7 +469,7 @@ async def chat_completion_tool_call( messages_value.append(tool_call_result_1) response_1 = await async_openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value + messages=messages_value, model=DEFAULT_MODEL ) # sanity check @@ -468,11 +478,24 @@ async def chat_completion_tool_call( # validate both calls spans = span_exporter.get_finished_spans() assert len(spans) == 2 + + logs = log_exporter.get_finished_logs() + if content_mode == "event": + assert len(logs) == 2 + assert_completion_attributes( - spans[0], llm_model_value, response_0, latest_experimental_enabled + spans[0], + logs[0].log_record if content_mode == "event" else None, + DEFAULT_MODEL, + response_0, + latest_experimental_enabled, ) assert_completion_attributes( - spans[1], llm_model_value, response_1, latest_experimental_enabled + spans[1], + logs[1].log_record if content_mode == "event" else None, + DEFAULT_MODEL, + response_1, + latest_experimental_enabled, ) if latest_experimental_enabled: @@ -480,28 +503,12 @@ async def chat_completion_tool_call( pass else: # first call - first_input = [ - { - "role": "system", - "parts": [ - { - "type": "text", - "content": messages_value[0]["content"], - } - ], - }, - { - "role": "user", - "parts": [ - { - "type": "text", - "content": messages_value[1]["content"], - } - ], - }, - ] + signal_0 = ( + logs[0].log_record if content_mode == "event" else spans[0] + ) assert_messages_attribute( - spans[0].attributes["gen_ai.input.messages"], first_input + signal_0.attributes["gen_ai.input.messages"], + WEATHER_TOOL_EXPECTED_INPUT_MESSAGES, ) first_output = [ @@ -529,13 +536,13 @@ async def chat_completion_tool_call( } ] assert_messages_attribute( - spans[0].attributes["gen_ai.output.messages"], first_output + signal_0.attributes["gen_ai.output.messages"], first_output ) # second call del first_output[0]["finish_reason"] second_input = [] - second_input += first_input + second_input += WEATHER_TOOL_EXPECTED_INPUT_MESSAGES.copy() second_input += first_output second_input += [ { @@ -564,12 +571,15 @@ async def chat_completion_tool_call( }, ] + signal_1 = ( + logs[1].log_record if content_mode == "event" else spans[1] + ) assert_messages_attribute( - spans[1].attributes["gen_ai.input.messages"], second_input + signal_1.attributes["gen_ai.input.messages"], second_input ) assert_messages_attribute( - spans[1].attributes["gen_ai.output.messages"], + signal_1.attributes["gen_ai.output.messages"], [ { "role": "assistant", @@ -586,7 +596,6 @@ async def chat_completion_tool_call( ], ) else: - logs = log_exporter.get_finished_logs() assert len(logs) == 9 # 3 logs for first completion, 6 for second # call one @@ -720,16 +729,16 @@ async def test_async_chat_completion_streaming( log_exporter, async_openai_client, instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette("test_async_chat_completion_streaming.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() llm_model_value = "gpt-4" - messages_value = [{"role": "user", "content": "Say this is a test"}] kwargs = { "model": llm_model_value, - "messages": messages_value, + "messages": USER_ONLY_PROMPT, "stream": True, "stream_options": {"include_usage": True}, } @@ -750,8 +759,13 @@ async def test_async_chat_completion_streaming( response_stream_id = chunk.id spans = span_exporter.get_finished_spans() + logs = log_exporter.get_finished_logs() + if content_mode == "events": + assert len(logs) == 1 + assert_all_attributes( spans[0], + logs[0].log_record if content_mode == "event" else None, llm_model_value, latest_experimental_enabled, response_stream_id, @@ -760,34 +774,18 @@ async def test_async_chat_completion_streaming( response_stream_usage.completion_tokens, ) if latest_experimental_enabled: + signal = ( + logs[0].log_record if content_mode == "event" else spans[0] + ) assert_messages_attribute( - spans[0].attributes["gen_ai.input.messages"], - [ - { - "role": "user", - "parts": [ - { - "type": "text", - "content": messages_value[0]["content"], - } - ], - } - ], + signal.attributes["gen_ai.input.messages"], + USER_ONLY_EXPECTED_INPUT_MESSAGES, ) assert_messages_attribute( - spans[0].attributes["gen_ai.output.messages"], - [ - { - "role": "assistant", - "parts": [ - {"type": "text", "content": response_stream_result} - ], - "finish_reason": "stop", - } - ], + signal.attributes["gen_ai.output.messages"], + format_simple_expected_output_message(response_stream_result), ) else: - logs = log_exporter.get_finished_logs() assert len(logs) == 2 user_message = {"content": "Say this is a test"} @@ -815,6 +813,7 @@ async def test_async_chat_completion_streaming_not_complete( log_exporter, async_openai_client, instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette( @@ -822,11 +821,10 @@ async def test_async_chat_completion_streaming_not_complete( ): latest_experimental_enabled = is_latest_experimental_enabled() llm_model_value = "gpt-4" - messages_value = [{"role": "user", "content": "Say this is a test"}] kwargs = { "model": llm_model_value, - "messages": messages_value, + "messages": USER_ONLY_PROMPT, "stream": True, } @@ -850,42 +848,32 @@ async def test_async_chat_completion_streaming_not_complete( response.close() spans = span_exporter.get_finished_spans() + logs = log_exporter.get_finished_logs() + if content_mode == "events": + assert len(logs) == 1 assert_all_attributes( spans[0], + logs[0].log_record if content_mode == "event" else None, llm_model_value, latest_experimental_enabled, response_stream_id, response_stream_model, ) if latest_experimental_enabled: + signal = ( + logs[0].log_record if content_mode == "event" else spans[0] + ) assert_messages_attribute( - spans[0].attributes["gen_ai.input.messages"], - [ - { - "role": "user", - "parts": [ - { - "type": "text", - "content": messages_value[0]["content"], - } - ], - } - ], + signal.attributes["gen_ai.input.messages"], + USER_ONLY_EXPECTED_INPUT_MESSAGES, ) assert_messages_attribute( - spans[0].attributes["gen_ai.output.messages"], - [ - { - "role": "assistant", - "parts": [ - {"type": "text", "content": response_stream_result} - ], - "finish_reason": "error", - } - ], + signal.attributes["gen_ai.output.messages"], + format_simple_expected_output_message( + response_stream_result, finish_reason="error" + ), ) else: - logs = log_exporter.get_finished_logs() assert len(logs) == 2 user_message = {"content": "Say this is a test"} @@ -913,24 +901,17 @@ async def test_async_chat_completion_multiple_choices_streaming( log_exporter, async_openai_client, instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette( "test_async_chat_completion_multiple_choices_streaming.yaml" ): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [ - {"role": "system", "content": "You're a helpful assistant."}, - { - "role": "user", - "content": "What's the weather in Seattle and San Francisco today?", - }, - ] response_0 = await async_openai_client.chat.completions.create( - messages=messages_value, - model=llm_model_value, + messages=WEATHER_TOOL_PROMPT, + model=DEFAULT_MODEL, n=2, stream=True, stream_options={"include_usage": True}, @@ -958,9 +939,15 @@ async def test_async_chat_completion_multiple_choices_streaming( assert "stop" == finish_reasons[0] spans = span_exporter.get_finished_spans() + logs = log_exporter.get_finished_logs() + + if content_mode == "event": + assert 1 == len(logs) + assert_all_attributes( spans[0], - llm_model_value, + logs[0].log_record if content_mode == "event" else None, + DEFAULT_MODEL, latest_experimental_enabled, response_stream_id, response_stream_model, @@ -969,59 +956,43 @@ async def test_async_chat_completion_multiple_choices_streaming( ) if latest_experimental_enabled: + expected_output_messages = [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "".join(response_stream_result[0]), + } + ], + "finish_reason": "stop", + }, + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "".join(response_stream_result[1]), + } + ], + "finish_reason": "stop", + }, + ] + signal = ( + logs[0].log_record if content_mode == "event" else spans[0] + ) assert_messages_attribute( - spans[0].attributes["gen_ai.input.messages"], - [ - { - "role": "system", - "parts": [ - { - "type": "text", - "content": messages_value[0]["content"], - } - ], - }, - { - "role": "user", - "parts": [ - { - "type": "text", - "content": messages_value[1]["content"], - } - ], - }, - ], + signal.attributes["gen_ai.input.messages"], + WEATHER_TOOL_EXPECTED_INPUT_MESSAGES, ) assert_messages_attribute( - spans[0].attributes["gen_ai.output.messages"], - [ - { - "role": "assistant", - "parts": [ - { - "type": "text", - "content": "".join(response_stream_result[0]), - } - ], - "finish_reason": "stop", - }, - { - "role": "assistant", - "parts": [ - { - "type": "text", - "content": "".join(response_stream_result[1]), - } - ], - "finish_reason": "stop", - }, - ], + signal.attributes["gen_ai.output.messages"], + expected_output_messages, ) else: - logs = log_exporter.get_finished_logs() assert len(logs) == 4 - system_message = {"content": messages_value[0]["content"]} + system_message = {"content": WEATHER_TOOL_PROMPT[0]["content"]} assert_message_in_logs( logs[0], "gen_ai.system.message", system_message, spans[0] ) @@ -1065,6 +1036,7 @@ async def test_async_chat_completion_multiple_tools_streaming_with_content( log_exporter, async_openai_client, instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette( @@ -1075,6 +1047,7 @@ async def test_async_chat_completion_multiple_tools_streaming_with_content( log_exporter, async_openai_client, True, + content_mode, is_latest_experimental_enabled(), ) @@ -1096,6 +1069,7 @@ async def test_async_chat_completion_multiple_tools_streaming_no_content( log_exporter, async_openai_client, False, + None, is_latest_experimental_enabled(), ) @@ -1107,6 +1081,7 @@ async def test_async_chat_completion_streaming_unsampled( log_exporter, async_openai_client, instrument_with_content_unsampled, + content_mode, vcr, ): with vcr.use_cassette( @@ -1114,28 +1089,57 @@ async def test_async_chat_completion_streaming_unsampled( ): latest_experimental_enabled = is_latest_experimental_enabled() llm_model_value = "gpt-4" - messages_value = [{"role": "user", "content": "Say this is a test"}] kwargs = { "model": llm_model_value, - "messages": messages_value, + "messages": USER_ONLY_PROMPT, "stream": True, "stream_options": {"include_usage": True}, } response_stream_result = "" + response_stream_id = None + response_stream_usage = None response = await async_openai_client.chat.completions.create(**kwargs) async for chunk in response: if chunk.choices: response_stream_result += chunk.choices[0].delta.content or "" + # get the last chunk + if getattr(chunk, "usage", None): + response_stream_usage = chunk.usage + response_stream_model = chunk.model + response_stream_id = chunk.id spans = span_exporter.get_finished_spans() assert len(spans) == 0 logs = log_exporter.get_finished_logs() + if content_mode == "event": + assert len(logs) == 1 + + assert_all_attributes( + None, + logs[0].log_record if content_mode == "event" else None, + llm_model_value, + latest_experimental_enabled, + response_stream_id, + response_stream_model, + response_stream_usage.prompt_tokens, + response_stream_usage.completion_tokens, + ) + if latest_experimental_enabled: - assert len(logs) == 0 - # TODO: new event + if content_mode == "event": + assert_messages_attribute( + logs[0].log_record.attributes["gen_ai.input.messages"], + USER_ONLY_EXPECTED_INPUT_MESSAGES, + ) + assert_messages_attribute( + logs[0].log_record.attributes["gen_ai.output.messages"], + format_simple_expected_output_message( + "".join(response_stream_result) + ), + ) else: assert len(logs) == 2 @@ -1173,20 +1177,12 @@ async def async_chat_completion_multiple_tools_streaming( log_exporter, async_openai_client, expect_content, + content_mode, latest_experimental_enabled, ): - llm_model_value = "gpt-4o-mini" - messages_value = [ - {"role": "system", "content": "You're a helpful assistant."}, - { - "role": "user", - "content": "What's the weather in Seattle and San Francisco today?", - }, - ] - response = await async_openai_client.chat.completions.create( - messages=messages_value, - model=llm_model_value, + messages=WEATHER_TOOL_PROMPT, + model=DEFAULT_MODEL, tool_choice="auto", tools=[get_current_weather_tool_definition()], stream=True, @@ -1222,9 +1218,14 @@ async def async_chat_completion_multiple_tools_streaming( assert "tool_calls" == finish_reason spans = span_exporter.get_finished_spans() + logs = log_exporter.get_finished_logs() + if content_mode == "event": + assert len(logs) == 1 + assert_all_attributes( spans[0], - llm_model_value, + logs[0].log_record if content_mode == "event" else None, + DEFAULT_MODEL, latest_experimental_enabled, response_stream_id, response_stream_model, @@ -1235,28 +1236,12 @@ async def async_chat_completion_multiple_tools_streaming( if latest_experimental_enabled: if expect_content: # first call - first_input = [ - { - "role": "system", - "parts": [ - { - "type": "text", - "content": messages_value[0]["content"], - } - ], - }, - { - "role": "user", - "parts": [ - { - "type": "text", - "content": messages_value[1]["content"], - } - ], - }, - ] + signal = ( + logs[0].log_record if content_mode == "event" else spans[0] + ) assert_messages_attribute( - spans[0].attributes["gen_ai.input.messages"], first_input + signal.attributes["gen_ai.input.messages"], + WEATHER_TOOL_EXPECTED_INPUT_MESSAGES, ) first_output = [ @@ -1280,14 +1265,13 @@ async def async_chat_completion_multiple_tools_streaming( } ] assert_messages_attribute( - spans[0].attributes["gen_ai.output.messages"], first_output + signal.attributes["gen_ai.output.messages"], first_output ) else: - logs = log_exporter.get_finished_logs() assert len(logs) == 3 system_message = ( - {"content": messages_value[0]["content"]} + {"content": WEATHER_TOOL_PROMPT[0]["content"]} if expect_content else None ) @@ -1345,7 +1329,6 @@ async def async_chat_completion_multiple_tools_streaming( def assert_message_in_logs(log, event_name, expected_content, parent_span): - # TODO: switch to top-level eventName under latest-experimental flag assert log.log_record.attributes[EventAttributes.EVENT_NAME] == event_name assert ( log.log_record.attributes[GenAIAttributes.GEN_AI_SYSTEM] diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py index b676f37c68..f0dbcc4bc9 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py @@ -34,10 +34,16 @@ ) from opentelemetry.semconv._incubating.metrics import gen_ai_metrics from tests.test_utils import ( + DEFAULT_MODEL, + USER_ONLY_EXPECTED_INPUT_MESSAGES, + USER_ONLY_PROMPT, + WEATHER_TOOL_EXPECTED_INPUT_MESSAGES, + WEATHER_TOOL_PROMPT, assert_all_attributes, assert_completion_attributes, assert_log_parent, assert_messages_attribute, + format_simple_expected_output_message, get_current_weather_tool_definition, ) @@ -48,57 +54,47 @@ def test_chat_completion_with_content( log_exporter, openai_client, instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette("test_chat_completion_with_content.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] response = openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, stream=False + messages=USER_ONLY_PROMPT, model=DEFAULT_MODEL, stream=False ) spans = span_exporter.get_finished_spans() + logs = log_exporter.get_finished_logs() + if content_mode == "event": + assert len(logs) == 1 + assert_completion_attributes( - spans[0], llm_model_value, response, latest_experimental_enabled + spans[0], + logs[0].log_record if content_mode == "event" else None, + DEFAULT_MODEL, + response, + latest_experimental_enabled, ) if latest_experimental_enabled: + signal = ( + logs[0].log_record if content_mode == "event" else spans[0] + ) assert_messages_attribute( - spans[0].attributes["gen_ai.input.messages"], - [ - { - "role": "user", - "parts": [ - { - "type": "text", - "content": messages_value[0]["content"], - } - ], - } - ], + signal.attributes["gen_ai.input.messages"], + USER_ONLY_EXPECTED_INPUT_MESSAGES, ) assert_messages_attribute( - spans[0].attributes["gen_ai.output.messages"], - [ - { - "role": "assistant", - "parts": [ - { - "type": "text", - "content": response.choices[0].message.content, - } - ], - "finish_reason": "stop", - } - ], + signal.attributes["gen_ai.output.messages"], + format_simple_expected_output_message( + response.choices[0].message.content + ), ) else: - logs = log_exporter.get_finished_logs() assert len(logs) == 2 - user_message = {"content": messages_value[0]["content"]} + user_message = {"content": USER_ONLY_PROMPT[0]["content"]} assert_message_in_logs( logs[0], "gen_ai.user.message", user_message, spans[0] ) @@ -126,16 +122,18 @@ def test_chat_completion_no_content( ): with vcr.use_cassette("test_chat_completion_no_content.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] response = openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, stream=False + messages=USER_ONLY_PROMPT, model=DEFAULT_MODEL, stream=False ) spans = span_exporter.get_finished_spans() assert_completion_attributes( - spans[0], llm_model_value, response, latest_experimental_enabled + spans[0], + None, + DEFAULT_MODEL, + response, + latest_experimental_enabled, ) logs = log_exporter.get_finished_logs() @@ -168,22 +166,21 @@ def test_chat_completion_bad_endpoint( ): with vcr.use_cassette("test_chat_completion_bad_endpoint.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] client = OpenAI(base_url="http://localhost:4242") with pytest.raises(APIConnectionError): client.chat.completions.create( - messages=messages_value, - model=llm_model_value, + messages=USER_ONLY_PROMPT, + model=DEFAULT_MODEL, timeout=0.1, ) spans = span_exporter.get_finished_spans() assert_all_attributes( spans[0], - llm_model_value, + None, + DEFAULT_MODEL, latest_experimental_enabled, server_address="localhost", ) @@ -226,18 +223,17 @@ def test_chat_completion_404( with vcr.use_cassette("test_chat_completion_404.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() llm_model_value = "this-model-does-not-exist" - messages_value = [{"role": "user", "content": "Say this is a test"}] with pytest.raises(NotFoundError): openai_client.chat.completions.create( - messages=messages_value, + messages=USER_ONLY_PROMPT, model=llm_model_value, ) spans = span_exporter.get_finished_spans() assert_all_attributes( - spans[0], llm_model_value, latest_experimental_enabled + spans[0], None, llm_model_value, latest_experimental_enabled ) assert ( "NotFoundError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE] @@ -274,12 +270,10 @@ def test_chat_completion_extra_params( ): with vcr.use_cassette("test_chat_completion_extra_params.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] response = openai_client.chat.completions.create( - messages=messages_value, - model=llm_model_value, + messages=USER_ONLY_PROMPT, + model=DEFAULT_MODEL, seed=42, temperature=0.5, max_tokens=50, @@ -290,7 +284,11 @@ def test_chat_completion_extra_params( spans = span_exporter.get_finished_spans() assert_completion_attributes( - spans[0], llm_model_value, response, latest_experimental_enabled + spans[0], + None, + DEFAULT_MODEL, + response, + latest_experimental_enabled, ) request_seed_attr_key = ( @@ -329,52 +327,66 @@ def test_chat_completion_multiple_choices( log_exporter, openai_client, instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette("test_chat_completion_multiple_choices.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] response = openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, n=2, stream=False + messages=USER_ONLY_PROMPT, model=DEFAULT_MODEL, n=2, stream=False ) spans = span_exporter.get_finished_spans() + logs = log_exporter.get_finished_logs() + if content_mode == "event": + assert len(logs) == 1 assert_completion_attributes( - spans[0], llm_model_value, response, latest_experimental_enabled + spans[0], + logs[0].log_record if content_mode == "event" else None, + DEFAULT_MODEL, + response, + latest_experimental_enabled, ) if latest_experimental_enabled: + expected_output_messages = [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": response.choices[0].message.content, + } + ], + "finish_reason": "stop", + }, + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": response.choices[1].message.content, + } + ], + "finish_reason": "stop", + }, + ] + + signal = ( + logs[0].log_record if content_mode == "event" else spans[0] + ) assert_messages_attribute( - spans[0].attributes["gen_ai.output.messages"], - [ - { - "role": "assistant", - "parts": [ - { - "type": "text", - "content": response.choices[0].message.content, - } - ], - "finish_reason": "stop", - }, - { - "role": "assistant", - "parts": [ - { - "type": "text", - "content": response.choices[1].message.content, - } - ], - "finish_reason": "stop", - }, - ], + signal.attributes["gen_ai.input.messages"], + USER_ONLY_EXPECTED_INPUT_MESSAGES, + ) + assert_messages_attribute( + signal.attributes["gen_ai.output.messages"], + expected_output_messages, ) else: - logs = log_exporter.get_finished_logs() assert len(logs) == 3 # 1 user message + 2 choice messages - user_message = {"content": messages_value[0]["content"]} + user_message = {"content": USER_ONLY_PROMPT[0]["content"]} assert_message_in_logs( logs[0], "gen_ai.user.message", user_message, spans[0] ) @@ -410,6 +422,7 @@ def test_chat_completion_tool_calls_with_content( log_exporter, openai_client, instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette("test_chat_completion_tool_calls_with_content.yaml"): @@ -418,6 +431,7 @@ def test_chat_completion_tool_calls_with_content( log_exporter, openai_client, True, + content_mode, is_latest_experimental_enabled(), ) @@ -436,6 +450,7 @@ def test_chat_completion_tool_calls_no_content( log_exporter, openai_client, False, + None, is_latest_experimental_enabled(), ) @@ -445,20 +460,13 @@ def chat_completion_tool_call( log_exporter, openai_client, expect_content, + content_mode, latest_experimental_enabled, ): - llm_model_value = "gpt-4o-mini" - messages_value = [ - {"role": "system", "content": "You're a helpful assistant."}, - { - "role": "user", - "content": "What's the weather in Seattle and San Francisco today?", - }, - ] - + messages_value = WEATHER_TOOL_PROMPT.copy() response_0 = openai_client.chat.completions.create( messages=messages_value, - model=llm_model_value, + model=DEFAULT_MODEL, tool_choice="auto", tools=[get_current_weather_tool_definition()], ) @@ -491,7 +499,7 @@ def chat_completion_tool_call( messages_value.append(tool_call_result_1) response_1 = openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value + messages=messages_value, model=DEFAULT_MODEL ) # sanity check @@ -500,11 +508,24 @@ def chat_completion_tool_call( # validate both calls spans = span_exporter.get_finished_spans() assert len(spans) == 2 + + logs = log_exporter.get_finished_logs() + if content_mode == "event": + assert len(logs) == 2 + assert_completion_attributes( - spans[0], llm_model_value, response_0, latest_experimental_enabled + spans[0], + logs[0].log_record if content_mode == "event" else None, + DEFAULT_MODEL, + response_0, + latest_experimental_enabled, ) assert_completion_attributes( - spans[1], llm_model_value, response_1, latest_experimental_enabled + spans[1], + logs[1].log_record if content_mode == "event" else None, + DEFAULT_MODEL, + response_1, + latest_experimental_enabled, ) if latest_experimental_enabled: @@ -512,28 +533,12 @@ def chat_completion_tool_call( pass else: # first call - first_input = [ - { - "role": "system", - "parts": [ - { - "type": "text", - "content": messages_value[0]["content"], - } - ], - }, - { - "role": "user", - "parts": [ - { - "type": "text", - "content": messages_value[1]["content"], - } - ], - }, - ] + signal_0 = ( + logs[0].log_record if content_mode == "event" else spans[0] + ) assert_messages_attribute( - spans[0].attributes["gen_ai.input.messages"], first_input + signal_0.attributes["gen_ai.input.messages"], + WEATHER_TOOL_EXPECTED_INPUT_MESSAGES, ) first_output = [ @@ -560,14 +565,15 @@ def chat_completion_tool_call( "finish_reason": "tool_calls", } ] + assert_messages_attribute( - spans[0].attributes["gen_ai.output.messages"], first_output + signal_0.attributes["gen_ai.output.messages"], first_output ) # second call del first_output[0]["finish_reason"] second_input = [] - second_input += first_input + second_input += WEATHER_TOOL_EXPECTED_INPUT_MESSAGES.copy() second_input += first_output second_input += [ { @@ -596,30 +602,20 @@ def chat_completion_tool_call( }, ] + signal_1 = ( + logs[1].log_record if content_mode == "event" else spans[1] + ) assert_messages_attribute( - spans[1].attributes["gen_ai.input.messages"], second_input + signal_1.attributes["gen_ai.input.messages"], second_input ) assert_messages_attribute( - spans[1].attributes["gen_ai.output.messages"], - [ - { - "role": "assistant", - "parts": [ - { - "type": "text", - "content": response_1.choices[ - 0 - ].message.content, - }, - ], - "finish_reason": "stop", - } - ], + signal_1.attributes["gen_ai.output.messages"], + format_simple_expected_output_message( + response_1.choices[0].message.content + ), ) else: - logs = log_exporter.get_finished_logs() - assert len(logs) == 9 # 3 logs for first completion, 6 for second # call one @@ -752,16 +748,16 @@ def test_chat_completion_streaming( log_exporter, openai_client, instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette("test_chat_completion_streaming.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() llm_model_value = "gpt-4" - messages_value = [{"role": "user", "content": "Say this is a test"}] kwargs = { "model": llm_model_value, - "messages": messages_value, + "messages": USER_ONLY_PROMPT, "stream": True, "stream_options": {"include_usage": True}, } @@ -782,8 +778,13 @@ def test_chat_completion_streaming( response_stream_id = chunk.id spans = span_exporter.get_finished_spans() + logs = log_exporter.get_finished_logs() + if content_mode == "events": + assert len(logs) == 1 + assert_all_attributes( spans[0], + logs[0].log_record if content_mode == "event" else None, llm_model_value, latest_experimental_enabled, response_stream_id, @@ -792,34 +793,18 @@ def test_chat_completion_streaming( response_stream_usage.completion_tokens, ) if latest_experimental_enabled: + signal = ( + logs[0].log_record if content_mode == "event" else spans[0] + ) assert_messages_attribute( - spans[0].attributes["gen_ai.input.messages"], - [ - { - "role": "user", - "parts": [ - { - "type": "text", - "content": messages_value[0]["content"], - } - ], - } - ], + signal.attributes["gen_ai.input.messages"], + USER_ONLY_EXPECTED_INPUT_MESSAGES, ) assert_messages_attribute( - spans[0].attributes["gen_ai.output.messages"], - [ - { - "role": "assistant", - "parts": [ - {"type": "text", "content": response_stream_result} - ], - "finish_reason": "stop", - } - ], + signal.attributes["gen_ai.output.messages"], + format_simple_expected_output_message(response_stream_result), ) else: - logs = log_exporter.get_finished_logs() assert len(logs) == 2 user_message = {"content": "Say this is a test"} @@ -846,16 +831,16 @@ def test_chat_completion_streaming_not_complete( log_exporter, openai_client, instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette("test_chat_completion_streaming_not_complete.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() llm_model_value = "gpt-4" - messages_value = [{"role": "user", "content": "Say this is a test"}] kwargs = { "model": llm_model_value, - "messages": messages_value, + "messages": USER_ONLY_PROMPT, "stream": True, } @@ -877,42 +862,32 @@ def test_chat_completion_streaming_not_complete( response.close() spans = span_exporter.get_finished_spans() + logs = log_exporter.get_finished_logs() + if content_mode == "events": + assert len(logs) == 1 assert_all_attributes( spans[0], + logs[0].log_record if content_mode == "event" else None, llm_model_value, latest_experimental_enabled, response_stream_id, response_stream_model, ) if latest_experimental_enabled: + signal = ( + logs[0].log_record if content_mode == "event" else spans[0] + ) assert_messages_attribute( - spans[0].attributes["gen_ai.input.messages"], - [ - { - "role": "user", - "parts": [ - { - "type": "text", - "content": messages_value[0]["content"], - } - ], - } - ], + signal.attributes["gen_ai.input.messages"], + USER_ONLY_EXPECTED_INPUT_MESSAGES, ) assert_messages_attribute( - spans[0].attributes["gen_ai.output.messages"], - [ - { - "role": "assistant", - "parts": [ - {"type": "text", "content": response_stream_result} - ], - "finish_reason": "error", - } - ], + signal.attributes["gen_ai.output.messages"], + format_simple_expected_output_message( + response_stream_result, finish_reason="error" + ), ) else: - logs = log_exporter.get_finished_logs() assert len(logs) == 2 user_message = {"content": "Say this is a test"} @@ -939,24 +914,16 @@ def test_chat_completion_multiple_choices_streaming( log_exporter, openai_client, instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette( "test_chat_completion_multiple_choices_streaming.yaml" ): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [ - {"role": "system", "content": "You're a helpful assistant."}, - { - "role": "user", - "content": "What's the weather in Seattle and San Francisco today?", - }, - ] - response_0 = openai_client.chat.completions.create( - messages=messages_value, - model=llm_model_value, + messages=WEATHER_TOOL_PROMPT, + model=DEFAULT_MODEL, n=2, stream=True, stream_options={"include_usage": True}, @@ -984,9 +951,13 @@ def test_chat_completion_multiple_choices_streaming( assert "stop" == finish_reasons[0] spans = span_exporter.get_finished_spans() + logs = log_exporter.get_finished_logs() + if content_mode == "events": + assert len(logs) == 1 assert_all_attributes( spans[0], - llm_model_value, + logs[0].log_record if content_mode == "event" else None, + DEFAULT_MODEL, latest_experimental_enabled, response_stream_id, response_stream_model, @@ -994,59 +965,43 @@ def test_chat_completion_multiple_choices_streaming( response_stream_usage.completion_tokens, ) if latest_experimental_enabled: + expected_output_messages = [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "".join(response_stream_result[0]), + } + ], + "finish_reason": "stop", + }, + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "".join(response_stream_result[1]), + } + ], + "finish_reason": "stop", + }, + ] + signal = ( + logs[0].log_record if content_mode == "event" else spans[0] + ) assert_messages_attribute( - spans[0].attributes["gen_ai.input.messages"], - [ - { - "role": "system", - "parts": [ - { - "type": "text", - "content": messages_value[0]["content"], - } - ], - }, - { - "role": "user", - "parts": [ - { - "type": "text", - "content": messages_value[1]["content"], - } - ], - }, - ], + signal.attributes["gen_ai.input.messages"], + WEATHER_TOOL_EXPECTED_INPUT_MESSAGES, ) assert_messages_attribute( - spans[0].attributes["gen_ai.output.messages"], - [ - { - "role": "assistant", - "parts": [ - { - "type": "text", - "content": "".join(response_stream_result[0]), - } - ], - "finish_reason": "stop", - }, - { - "role": "assistant", - "parts": [ - { - "type": "text", - "content": "".join(response_stream_result[1]), - } - ], - "finish_reason": "stop", - }, - ], + signal.attributes["gen_ai.output.messages"], + expected_output_messages, ) else: - logs = log_exporter.get_finished_logs() assert len(logs) == 4 - system_message = {"content": messages_value[0]["content"]} + system_message = {"content": WEATHER_TOOL_PROMPT[0]["content"]} assert_message_in_logs( logs[0], "gen_ai.system.message", system_message, spans[0] ) @@ -1089,6 +1044,7 @@ def test_chat_completion_multiple_tools_streaming_with_content( log_exporter, openai_client, instrument_with_content, + content_mode, vcr, ): with vcr.use_cassette( @@ -1099,6 +1055,7 @@ def test_chat_completion_multiple_tools_streaming_with_content( log_exporter, openai_client, True, + content_mode, is_latest_experimental_enabled(), ) @@ -1119,6 +1076,7 @@ def test_chat_completion_multiple_tools_streaming_no_content( log_exporter, openai_client, False, + None, is_latest_experimental_enabled(), ) @@ -1129,30 +1087,49 @@ def test_chat_completion_with_content_span_unsampled( log_exporter, openai_client, instrument_with_content_unsampled, + content_mode, vcr, ): with vcr.use_cassette( "test_chat_completion_with_content_span_unsampled.yaml" ): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] response = openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, stream=False + messages=USER_ONLY_PROMPT, model=DEFAULT_MODEL, stream=False ) spans = span_exporter.get_finished_spans() assert len(spans) == 0 logs = log_exporter.get_finished_logs() + if content_mode == "event": + assert len(logs) == 1 + + assert_completion_attributes( + None, + logs[0].log_record if content_mode == "event" else None, + DEFAULT_MODEL, + response, + latest_experimental_enabled, + ) + if latest_experimental_enabled: - assert len(logs) == 0 - # TODO: check event + if content_mode == "event": + assert_messages_attribute( + logs[0].log_record.attributes["gen_ai.input.messages"], + USER_ONLY_EXPECTED_INPUT_MESSAGES, + ) + assert_messages_attribute( + logs[0].log_record.attributes["gen_ai.output.messages"], + format_simple_expected_output_message( + response.choices[0].message.content + ), + ) else: assert len(logs) == 2 - user_message = {"content": messages_value[0]["content"]} + user_message = {"content": USER_ONLY_PROMPT[0]["content"]} assert_message_in_logs( logs[0], "gen_ai.user.message", user_message, None ) @@ -1186,20 +1163,12 @@ def chat_completion_multiple_tools_streaming( log_exporter, openai_client, expect_content, + content_mode, latest_experimental_enabled, ): - llm_model_value = "gpt-4o-mini" - messages_value = [ - {"role": "system", "content": "You're a helpful assistant."}, - { - "role": "user", - "content": "What's the weather in Seattle and San Francisco today?", - }, - ] - response = openai_client.chat.completions.create( - messages=messages_value, - model=llm_model_value, + messages=WEATHER_TOOL_PROMPT, + model=DEFAULT_MODEL, tool_choice="auto", tools=[get_current_weather_tool_definition()], stream=True, @@ -1235,9 +1204,14 @@ def chat_completion_multiple_tools_streaming( assert "tool_calls" == finish_reason spans = span_exporter.get_finished_spans() + logs = log_exporter.get_finished_logs() + if content_mode == "event": + assert len(logs) == 1 + assert_all_attributes( spans[0], - llm_model_value, + logs[0].log_record if content_mode == "event" else None, + DEFAULT_MODEL, latest_experimental_enabled, response_stream_id, response_stream_model, @@ -1248,28 +1222,11 @@ def chat_completion_multiple_tools_streaming( if latest_experimental_enabled: if expect_content: # first call - first_input = [ - { - "role": "system", - "parts": [ - { - "type": "text", - "content": messages_value[0]["content"], - } - ], - }, - { - "role": "user", - "parts": [ - { - "type": "text", - "content": messages_value[1]["content"], - } - ], - }, - ] + signal = ( + logs[0].log_record if content_mode == "event" else spans[0] + ) assert_messages_attribute( - spans[0].attributes["gen_ai.input.messages"], first_input + signal.attributes["gen_ai.input.messages"], WEATHER_TOOL_EXPECTED_INPUT_MESSAGES ) first_output = [ @@ -1293,14 +1250,13 @@ def chat_completion_multiple_tools_streaming( } ] assert_messages_attribute( - spans[0].attributes["gen_ai.output.messages"], first_output + signal.attributes["gen_ai.output.messages"], first_output ) else: - logs = log_exporter.get_finished_logs() assert len(logs) == 3 system_message = ( - {"content": messages_value[0]["content"]} + {"content": WEATHER_TOOL_PROMPT[0]["content"]} if expect_content else None ) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py index 3bf2bded8e..76a89a90c8 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py @@ -10,6 +10,11 @@ server_attributes as ServerAttributes, ) from opentelemetry.semconv._incubating.metrics import gen_ai_metrics +from tests.test_utils import ( + DEFAULT_MODEL, + USER_ONLY_EXPECTED_INPUT_MESSAGES, + USER_ONLY_PROMPT, +) _DURATION_BUCKETS = ( 0.01, @@ -103,11 +108,9 @@ def test_chat_completion_metrics( ): with vcr.use_cassette("test_chat_completion_metrics.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, stream=False + messages=USER_ONLY_PROMPT, model=DEFAULT_MODEL, stream=False ) metrics = metric_reader.get_metrics_data().resource_metrics @@ -186,11 +189,9 @@ async def test_async_chat_completion_metrics( ): with vcr.use_cassette("test_async_chat_completion_metrics.yaml"): latest_experimental_enabled = is_latest_experimental_enabled() - llm_model_value = "gpt-4o-mini" - messages_value = [{"role": "user", "content": "Say this is a test"}] await async_openai_client.chat.completions.create( - messages=messages_value, model=llm_model_value, stream=False + messages=USER_ONLY_PROMPT, model=DEFAULT_MODEL, stream=False ) metrics = metric_reader.get_metrics_data().resource_metrics diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_utils.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_utils.py index 02656f2783..7a6d28e2f5 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_utils.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_utils.py @@ -1,15 +1,62 @@ import json from typing import Optional + +from openai.resources.chat.completions import ChatCompletion + +from opentelemetry.sdk._logs import LogRecord from opentelemetry.sdk.trace import ReadableSpan from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, +) +from opentelemetry.semconv._incubating.attributes import ( server_attributes as ServerAttributes, ) -from openai.resources.chat.completions import ChatCompletion +DEFAULT_MODEL = "gpt-4o-mini" +USER_ONLY_PROMPT = [{"role": "user", "content": "Say this is a test"}] +USER_ONLY_EXPECTED_INPUT_MESSAGES = [ + { + "role": "user", + "parts": [ + { + "type": "text", + "content": USER_ONLY_PROMPT[0]["content"], + } + ], + } +] +WEATHER_TOOL_PROMPT = [ + {"role": "system", "content": "You're a helpful assistant."}, + { + "role": "user", + "content": "What's the weather in Seattle and San Francisco today?", + }, +] +WEATHER_TOOL_EXPECTED_INPUT_MESSAGES = [ + { + "role": "system", + "parts": [ + { + "type": "text", + "content": WEATHER_TOOL_PROMPT[0]["content"], + } + ], + }, + { + "role": "user", + "parts": [ + { + "type": "text", + "content": WEATHER_TOOL_PROMPT[1]["content"], + } + ], + }, +] + def assert_all_attributes( span: ReadableSpan, + details_event: LogRecord, request_model: str, latest_experimental_enabled: bool, response_id: str = None, @@ -19,54 +66,154 @@ def assert_all_attributes( operation_name: str = "chat", server_address: str = "api.openai.com", ): - assert span.name == f"{operation_name} {request_model}" - assert ( - operation_name - == span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] - ) - provider_name_attr_name = "gen_ai.provider.name" if latest_experimental_enabled else GenAIAttributes.GEN_AI_SYSTEM - assert ( - GenAIAttributes.GenAiSystemValues.OPENAI.value - == span.attributes[provider_name_attr_name] - ) - assert ( - request_model == span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] + if span: + assert span.name == f"{operation_name} {request_model}" + if details_event: + assert ( + "gen_ai.client.inference.operation.details" + == details_event.attributes["event.name"] + ) + + if span: + assert ( + operation_name + == span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + ) + if details_event: + assert ( + operation_name + == details_event.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] + ) + + provider_name_attr_name = ( + "gen_ai.provider.name" + if latest_experimental_enabled + else GenAIAttributes.GEN_AI_SYSTEM ) - if response_model: + if span: assert ( - response_model - == span.attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] + GenAIAttributes.GenAiSystemValues.OPENAI.value + == span.attributes[provider_name_attr_name] + ) + if details_event: + assert ( + GenAIAttributes.GenAiSystemValues.OPENAI.value + == details_event.attributes[provider_name_attr_name] ) - else: - assert GenAIAttributes.GEN_AI_RESPONSE_MODEL not in span.attributes - if response_id: + if span: + assert ( + request_model + == span.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] + ) + if details_event: assert ( - response_id == span.attributes[GenAIAttributes.GEN_AI_RESPONSE_ID] + request_model + == details_event.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] ) + + if response_model: + if span: + assert ( + response_model + == span.attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] + ) + if details_event: + assert ( + response_model + == details_event.attributes[ + GenAIAttributes.GEN_AI_RESPONSE_MODEL + ] + ) + else: + if span: + assert GenAIAttributes.GEN_AI_RESPONSE_MODEL not in span.attributes + if details_event: + assert ( + GenAIAttributes.GEN_AI_RESPONSE_MODEL + not in details_event.attributes + ) + + if response_id: + if span: + assert ( + response_id + == span.attributes[GenAIAttributes.GEN_AI_RESPONSE_ID] + ) + if details_event: + assert ( + response_id + == details_event.attributes[GenAIAttributes.GEN_AI_RESPONSE_ID] + ) else: - assert GenAIAttributes.GEN_AI_RESPONSE_ID not in span.attributes + if span: + assert GenAIAttributes.GEN_AI_RESPONSE_ID not in span.attributes + if details_event: + assert ( + GenAIAttributes.GEN_AI_RESPONSE_MODEL + not in details_event.attributes + ) if input_tokens: - assert ( - input_tokens - == span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] - ) + if span: + assert ( + input_tokens + == span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] + ) + if details_event: + assert ( + input_tokens + == details_event.attributes[ + GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS + ] + ) else: - assert GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS not in span.attributes + if span: + assert ( + GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS + not in span.attributes + ) + if details_event: + assert ( + GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS + not in details_event.attributes + ) if output_tokens: + if span: + assert ( + output_tokens + == span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] + ) + if details_event: + assert ( + output_tokens + == details_event.attributes[ + GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS + ] + ) + else: + if span: + assert ( + GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS + not in span.attributes + ) + if details_event: + assert ( + GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS + not in details_event.attributes + ) + + if span: assert ( - output_tokens - == span.attributes[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] + server_address == span.attributes[ServerAttributes.SERVER_ADDRESS] ) - else: + if details_event: assert ( - GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS not in span.attributes + server_address + == details_event.attributes[ServerAttributes.SERVER_ADDRESS] ) - assert server_address == span.attributes[ServerAttributes.SERVER_ADDRESS] - def assert_log_parent(log, span): if span: @@ -97,6 +244,7 @@ def get_current_weather_tool_definition(): }, } + def remove_none_values(body): result = {} for key, value in body.items(): @@ -113,6 +261,7 @@ def remove_none_values(body): def assert_completion_attributes( span: ReadableSpan, + details_event: LogRecord, request_model: str, response: ChatCompletion, latest_experimental_enabled: bool, @@ -121,6 +270,7 @@ def assert_completion_attributes( ): return assert_all_attributes( span, + details_event, request_model, latest_experimental_enabled, response.id, @@ -131,5 +281,23 @@ def assert_completion_attributes( server_address, ) + def assert_messages_attribute(actual, expected): - assert json.loads(actual) == expected \ No newline at end of file + assert json.loads(actual) == expected + + +def format_simple_expected_output_message( + content: str, finish_reason: str = "stop" +): + return [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": content, + } + ], + "finish_reason": finish_reason, + } + ] From 4d9ba77587ed7b6e749770142c051ed5e37f549e Mon Sep 17 00:00:00 2001 From: Liudmila Molkova Date: Wed, 27 Aug 2025 18:07:21 -0700 Subject: [PATCH 5/5] update schema URL and docs --- .../README.rst | 23 ++++++++++++++++--- .../examples/manual/README.rst | 3 ++- .../examples/zero-code/README.rst | 4 ++-- .../instrumentation/openai_v2/__init__.py | 6 ++--- 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst index 0775c59afd..25e8ec86d8 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst @@ -19,7 +19,7 @@ Many LLM platforms support the OpenAI SDK. This means systems such as the follow * - Name - gen_ai.system * - `Azure OpenAI `_ - - ``az.ai.openai`` + - ``azure.ai.openai`` * - `Gemini `_ - ``gemini`` * - `Perplexity `_ @@ -80,9 +80,26 @@ Enabling message content Message content such as the contents of the prompt, completion, function arguments and return values are not captured by default. To capture message content as log events, set the environment variable -`OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` to `true`. +``OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT`` to one of the following values: -TODO! +- ``true`` - Legacy. Used to enable content capturing on ``gen_ai.{role}.message`` and ``gen_ai.choice`` events when + `latest experimental features <#enabling-the-latest-experimental-features>`_ are *not* enabled. +- ``span`` - Used to enable content capturing on *span* attributes when + `latest experimental features <#enabling-the-latest-experimental-features>`_ are enabled. +- ``event`` - Used to enable content capturing on *event* attributes when + `latest experimental features <#enabling-the-latest-experimental-features>`_ are enabled. + +Enabling the latest experimental features +*********************************************** + +To enable the latest experimental features, set the environment variable +``OTEL_SEMCONV_STABILITY_OPT_IN`` to ``gen_ai_latest_experimental``. Or, if you use +``OTEL_SEMCONV_STABILITY_OPT_IN`` to enable other features, append ``,gen_ai_latest_experimental`` to its value. + +Without this setting, OpenAI instrumentation aligns with `Semantic Conventions v1.28.0 `_ +and would not capture additional details introduced in later versions. + +.. note:: Generative AI semantic conventions are still evolving. The latest experimental features will introduce breaking changes in future releases. Uninstrument ************ diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/README.rst b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/README.rst index 61e4c4ae8e..cd380f0fbe 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/README.rst +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/manual/README.rst @@ -11,7 +11,8 @@ your OpenAI requests. Note: `.env <.env>`_ file configures additional environment variables: -- ``OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true`` configures OpenAI instrumentation to capture prompt and completion contents on events. +- ``OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=span`` configures OpenAI instrumentation to capture prompt and completion contents on *span* attributes. +- ``OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental`` enables latest experimental features. Setup ----- diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/zero-code/README.rst b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/zero-code/README.rst index 51d2f50a7f..c9e2cdfd7a 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/zero-code/README.rst +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/zero-code/README.rst @@ -13,10 +13,10 @@ your OpenAI requests. Note: `.env <.env>`_ file configures additional environment variables: - ``OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED=true`` configures OpenTelemetry SDK to export logs and events. -- ``OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true`` configures OpenAI instrumentation to capture prompt and completion contents on events. +- ``OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=span`` configures OpenAI instrumentation to capture prompt and completion contents on *span* attributes. - ``OTEL_LOGS_EXPORTER=otlp`` to specify exporter type. +- ``OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental`` enables latest experimental features. -TODO! Setup ----- diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py index 970615bd7d..2396949a12 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/__init__.py @@ -74,13 +74,13 @@ def _instrument(self, **kwargs): __name__, "", tracer_provider, - schema_url=Schemas.V1_28_0.value, + schema_url="https://opentelemetry.io/schemas/1.37.0", # TODO: Schemas.V1_37_0.value, ) event_logger_provider = kwargs.get("event_logger_provider") event_logger = get_event_logger( __name__, "", - schema_url=Schemas.V1_28_0.value, + schema_url="https://opentelemetry.io/schemas/1.37.0", # TODO: Schemas.V1_37_0.value, event_logger_provider=event_logger_provider, ) meter_provider = kwargs.get("meter_provider") @@ -88,7 +88,7 @@ def _instrument(self, **kwargs): __name__, "", meter_provider, - schema_url=Schemas.V1_28_0.value, + schema_url="https://opentelemetry.io/schemas/1.37.0", # TODO: Schemas.V1_37_0.value, ) instruments = Instruments(self._meter)