From 7149fed6a6f36abe1e3967f60407908dcdff37e0 Mon Sep 17 00:00:00 2001 From: shuningc Date: Mon, 17 Nov 2025 18:36:42 -0800 Subject: [PATCH 1/4] Adding Llamaindex llm instrumentation spike --- .../pyproject.toml | 58 +++++ .../instrumentation/llamaindex/__init__.py | 67 ++++++ .../llamaindex/callback_handler.py | 222 ++++++++++++++++++ .../instrumentation/llamaindex/config.py | 3 + .../instrumentation/llamaindex/version.py | 1 + .../tests/test_llm_instrumentation.py | 190 +++++++++++++++ 6 files changed, 541 insertions(+) create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/pyproject.toml create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/__init__.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/config.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/version.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_llm_instrumentation.py diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/pyproject.toml b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/pyproject.toml new file mode 100644 index 0000000..55a6708 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/pyproject.toml @@ -0,0 +1,58 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "splunk-otel-instrumentation-llamaindex" +dynamic = ["version"] +description = "OpenTelemetry LlamaIndex instrumentation" +readme = "README.rst" +license = "Apache-2.0" +requires-python = ">=3.9" +authors = [ + { name = "OpenTelemetry Authors", email = "cncf-opentelemetry-contributors@lists.cncf.io" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] +dependencies = [ + "opentelemetry-api ~= 1.38.0.dev0", + "opentelemetry-instrumentation ~= 0.59b0.dev0", + "opentelemetry-semantic-conventions ~= 0.59b0.dev0", + "splunk-otel-util-genai>=0.1.4", +] + +[project.optional-dependencies] +instruments = ["llama-index-core >= 0.14.0"] +test = [ + "llama-index-core >= 0.14.0", + "llama-index-llms-openai >= 0.6.0", + "pytest >= 7.0.0", +] + +[project.entry-points.opentelemetry_instrumentor] +llamaindex = "opentelemetry.instrumentation.llamaindex:LlamaindexInstrumentor" + +[project.urls] +Homepage = "https://github.com/open-telemetry/opentelemetry-python-contrib/tree/main/instrumentation-genai/opentelemetry-instrumentation-llamaindex" +Repository = "https://github.com/open-telemetry/opentelemetry-python-contrib" + +[tool.hatch.version] +path = "src/opentelemetry/instrumentation/llamaindex/version.py" + +[tool.hatch.build.targets.sdist] +include = ["/src", "/tests"] + +[tool.hatch.build.targets.wheel] +packages = ["src/opentelemetry"] + +[tool.ruff] +exclude = ["./"] diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/__init__.py new file mode 100644 index 0000000..c3bea98 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/__init__.py @@ -0,0 +1,67 @@ +from opentelemetry.instrumentation.instrumentor import BaseInstrumentor +from opentelemetry.util.genai.handler import get_telemetry_handler +from opentelemetry.instrumentation.llamaindex.config import Config +from opentelemetry.instrumentation.llamaindex.callback_handler import ( + LlamaindexCallbackHandler, +) +from wrapt import wrap_function_wrapper + +_instruments = ("llama-index-core >= 0.14.0",) + + +class LlamaindexInstrumentor(BaseInstrumentor): + def __init__( + self, + exception_logger=None, + disable_trace_context_propagation=False, + use_legacy_attributes: bool = True, + ): + super().__init__() + Config._exception_logger = exception_logger + Config.use_legacy_attributes = use_legacy_attributes + self._disable_trace_context_propagation = ( + disable_trace_context_propagation + ) + self._telemetry_handler = None + + def instrumentation_dependencies(self): + return _instruments + + def _instrument(self, **kwargs): + tracer_provider = kwargs.get("tracer_provider") + meter_provider = kwargs.get("meter_provider") + logger_provider = kwargs.get("logger_provider") + + self._telemetry_handler = get_telemetry_handler( + tracer_provider=tracer_provider, + meter_provider=meter_provider, + logger_provider=logger_provider, + ) + + llamaindexCallBackHandler = LlamaindexCallbackHandler( + telemetry_handler=self._telemetry_handler + ) + + wrap_function_wrapper( + module="llama_index.core.callbacks.base", + name="CallbackManager.__init__", + wrapper=_BaseCallbackManagerInitWrapper(llamaindexCallBackHandler), + ) + + def _uninstrument(self, **kwargs): + pass + + +class _BaseCallbackManagerInitWrapper: + def __init__(self, callback_handler: "LlamaindexCallbackHandler"): + self._callback_handler = callback_handler + + def __call__(self, wrapped, instance, args, kwargs) -> None: + wrapped(*args, **kwargs) + # LlamaIndex uses 'handlers' instead of 'inheritable_handlers' + for handler in instance.handlers: + if isinstance(handler, type(self._callback_handler)): + break + else: + self._callback_handler._callback_manager = instance + instance.add_handler(self._callback_handler) diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py new file mode 100644 index 0000000..7846cfb --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py @@ -0,0 +1,222 @@ +from typing import Any, Dict, Optional + +from llama_index.core.callbacks.base_handler import BaseCallbackHandler +from llama_index.core.callbacks.schema import CBEventType + +from opentelemetry.util.genai.handler import TelemetryHandler +from opentelemetry.util.genai.types import ( + InputMessage, + LLMInvocation, + OutputMessage, + Text, +) + + +def _safe_str(value: Any) -> str: + """Safely convert value to string.""" + try: + return str(value) + except (TypeError, ValueError): + return "" + + +class LlamaindexCallbackHandler(BaseCallbackHandler): + """Simplified LlamaIndex callback handler - LLM invocation only.""" + + def __init__( + self, + telemetry_handler: Optional[TelemetryHandler] = None, + ) -> None: + super().__init__( + event_starts_to_ignore=[], + event_ends_to_ignore=[], + ) + self._handler = telemetry_handler + + def start_trace(self, trace_id: Optional[str] = None) -> None: + """Start a trace - required by BaseCallbackHandler.""" + pass + + def end_trace( + self, + trace_id: Optional[str] = None, + trace_map: Optional[Dict[str, Any]] = None, + ) -> None: + """End a trace - required by BaseCallbackHandler.""" + pass + + def on_event_start( + self, + event_type: CBEventType, + payload: Optional[Dict[str, Any]] = None, + event_id: str = "", + parent_id: str = "", + **kwargs: Any, + ) -> str: + """Handle event start - only processing LLM events.""" + if event_type == CBEventType.LLM: + self._handle_llm_start(event_id, parent_id, payload, **kwargs) + return event_id + + def on_event_end( + self, + event_type: CBEventType, + payload: Optional[Dict[str, Any]] = None, + event_id: str = "", + **kwargs: Any, + ) -> None: + """Handle event end - only processing LLM events.""" + if event_type == CBEventType.LLM: + self._handle_llm_end(event_id, payload, **kwargs) + + def _handle_llm_start( + self, + event_id: str, + parent_id: str, + payload: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> None: + """Handle LLM invocation start.""" + if not self._handler or not payload: + return + + # Extract model information from payload + serialized = payload.get("serialized", {}) + model_name = ( + serialized.get("model") + or serialized.get("model_name") + or "unknown" + ) + + # Extract messages from payload + # LlamaIndex messages are ChatMessage objects with .content and .role properties + messages = payload.get("messages", []) + input_messages = [] + + for msg in messages: + # Handle ChatMessage objects (has .content property and .role attribute) + if hasattr(msg, "content") and hasattr(msg, "role"): + # Extract role - could be MessageRole enum + role_value = ( + str(msg.role.value) + if hasattr(msg.role, "value") + else str(msg.role) + ) + # Extract content - this is a property that pulls from blocks[0].text + content = _safe_str(msg.content) + input_messages.append( + InputMessage( + role=role_value, parts=[Text(content=content)] + ) + ) + elif isinstance(msg, dict): + # Handle serialized messages (dict format) + role = msg.get("role", "user") + # Try to extract from blocks first (LlamaIndex format) + blocks = msg.get("blocks", []) + if blocks and isinstance(blocks[0], dict): + content = blocks[0].get("text", "") + else: + # Fallback to direct content field + content = msg.get("content", "") + + role_value = ( + str(role.value) if hasattr(role, "value") else str(role) + ) + input_messages.append( + InputMessage( + role=role_value, + parts=[Text(content=_safe_str(content))], + ) + ) + + # Create LLM invocation with event_id as run_id + llm_inv = LLMInvocation( + request_model=_safe_str(model_name), + input_messages=input_messages, + attributes={}, + run_id=event_id, # Use event_id as run_id for registry lookup + ) + llm_inv.framework = "llamaindex" + + # Start the LLM invocation (handler stores it in _entity_registry) + self._handler.start_llm(llm_inv) + + def _handle_llm_end( + self, + event_id: str, + payload: Optional[Dict[str, Any]], + **kwargs: Any, + ) -> None: + """Handle LLM invocation end.""" + if not self._handler: + return + + # Get the LLM invocation from handler's registry using event_id + llm_inv = self._handler.get_entity(event_id) + if not llm_inv or not isinstance(llm_inv, LLMInvocation): + return + + if payload: + # Extract response from payload + response = payload.get("response") + + # Handle both dict and object types for response + if response: + # Get message - could be dict or object + if isinstance(response, dict): + message = response.get("message", {}) + raw_response = response.get("raw") + else: + # response is a ChatResponse object + message = getattr(response, "message", None) + raw_response = getattr(response, "raw", None) + + # Extract content from message + if message: + if isinstance(message, dict): + # Message is dict + blocks = message.get("blocks", []) + if blocks and isinstance(blocks[0], dict): + content = blocks[0].get("text", "") + else: + content = message.get("content", "") + else: + # Message is ChatMessage object + blocks = getattr(message, "blocks", []) + if blocks and len(blocks) > 0: + content = getattr(blocks[0], "text", "") + else: + content = getattr(message, "content", "") + + # Create output message + llm_inv.output_messages = [ + OutputMessage( + role="assistant", + parts=[Text(content=_safe_str(content))], + finish_reason="stop", + ) + ] + + # Extract token usage from response.raw (OpenAI format) + # LlamaIndex stores the raw API response (e.g., OpenAI response) in response.raw + # raw_response could be a dict or an object (e.g., ChatCompletion from OpenAI) + if raw_response: + # Try to get usage from dict or object + if isinstance(raw_response, dict): + usage = raw_response.get("usage", {}) + else: + # It's an object, try to get usage attribute + usage = getattr(raw_response, "usage", None) + + if usage: + # usage could also be dict or object + if isinstance(usage, dict): + llm_inv.input_tokens = usage.get("prompt_tokens") + llm_inv.output_tokens = usage.get("completion_tokens") + else: + llm_inv.input_tokens = getattr(usage, "prompt_tokens", None) + llm_inv.output_tokens = getattr(usage, "completion_tokens", None) + + # Stop the LLM invocation + self._handler.stop_llm(llm_inv) diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/config.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/config.py new file mode 100644 index 0000000..44199c0 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/config.py @@ -0,0 +1,3 @@ +class Config: + exception_logger = None + use_legacy_attributes = True diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/version.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/version.py new file mode 100644 index 0000000..3dc1f76 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/version.py @@ -0,0 +1 @@ +__version__ = "0.1.0" diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_llm_instrumentation.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_llm_instrumentation.py new file mode 100644 index 0000000..50324c3 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_llm_instrumentation.py @@ -0,0 +1,190 @@ +"""Tests for LlamaIndex LLM instrumentation with OpenTelemetry.""" + +import os + +from llama_index.core.llms import ChatMessage, MessageRole +from llama_index.core.llms.mock import MockLLM +from opentelemetry import metrics, trace +from opentelemetry.instrumentation.llamaindex import LlamaindexInstrumentor +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import InMemoryMetricReader +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import ( + ConsoleSpanExporter, + SimpleSpanProcessor, +) +from opentelemetry.semconv._incubating.metrics import gen_ai_metrics + + +def setup_telemetry(): + """Setup OpenTelemetry with both trace and metrics exporters.""" + # Setup tracing + trace.set_tracer_provider(TracerProvider()) + tracer_provider = trace.get_tracer_provider() + tracer_provider.add_span_processor( + SimpleSpanProcessor(ConsoleSpanExporter()) + ) + + # Setup metrics with InMemoryMetricReader + metric_reader = InMemoryMetricReader() + meter_provider = MeterProvider(metric_readers=[metric_reader]) + metrics.set_meter_provider(meter_provider) + + return tracer_provider, meter_provider, metric_reader + + +def test_with_openai(): + """Test with real OpenAI API - requires OPENAI_API_KEY environment variable.""" + from llama_index.llms.openai import OpenAI + + print("=" * 80) + print("Testing with OpenAI API") + print("=" * 80) + + llm = OpenAI(model="gpt-3.5-turbo") + messages = [ + ChatMessage( + role=MessageRole.SYSTEM, content="You are a helpful assistant." + ), + ChatMessage( + role=MessageRole.USER, content="Say hello in exactly 5 words" + ), + ] + + response = llm.chat(messages) + print(f"\nResponse: {response.message.content}") + + if hasattr(response, "raw") and response.raw: + if isinstance(response.raw, dict): + usage = response.raw.get("usage", {}) + else: + usage = getattr(response.raw, "usage", None) + + if usage: + if isinstance(usage, dict): + prompt_tokens = usage.get("prompt_tokens") + completion_tokens = usage.get("completion_tokens") + total_tokens = usage.get("total_tokens") + else: + prompt_tokens = getattr(usage, "prompt_tokens", None) + completion_tokens = getattr(usage, "completion_tokens", None) + total_tokens = getattr(usage, "total_tokens", None) + + print(f"\nToken Usage: input={prompt_tokens}, output={completion_tokens}, total={total_tokens}") + + print("=" * 80) + + +class MockLLMWithUsage(MockLLM): + """MockLLM that includes fake usage data for testing.""" + + def _complete(self, prompt, **kwargs): + """Override internal complete to inject usage data.""" + response = super()._complete(prompt, **kwargs) + # Note: MockLLM uses _complete internally, but we can't easily inject + # usage here because the ChatResponse is created later + return response + + +def test_with_mock(): + """Test with MockLLM - no API key needed.""" + print("=" * 80) + print("Testing with MockLLM") + print("=" * 80) + + llm = MockLLM(max_tokens=50) + messages = [ + ChatMessage( + role=MessageRole.SYSTEM, content="You are a helpful assistant." + ), + ChatMessage(role=MessageRole.USER, content="Say hello in 5 words"), + ] + + response = llm.chat(messages) + print(f"\nResponse: {response.message.content[:100]}...") + print("=" * 80) + + +def test_message_extraction(): + """Test message extraction.""" + print("\n" + "=" * 80) + print("Testing message extraction") + print("=" * 80) + + llm = MockLLM(max_tokens=20) + messages = [ + ChatMessage(role=MessageRole.SYSTEM, content="You are helpful."), + ChatMessage(role=MessageRole.USER, content="Test message"), + ] + + response = llm.chat(messages) + print(f"\nResponse: {response.message.content[:50]}...") + print("=" * 80) + + +if __name__ == "__main__": + # Enable metrics emission + os.environ["OTEL_INSTRUMENTATION_GENAI_EMITTERS"] = "span_metric" + + # Setup telemetry + tracer_provider, meter_provider, metric_reader = setup_telemetry() + + # Instrument LlamaIndex + instrumentor = LlamaindexInstrumentor() + instrumentor.instrument( + tracer_provider=tracer_provider, + meter_provider=meter_provider + ) + print("LlamaIndex instrumentation enabled\n") + + # Run tests + if os.environ.get("OPENAI_API_KEY"): + print("Testing with real OpenAI API\n") + test_with_openai() + else: + print("Testing with MockLLM (set OPENAI_API_KEY to test real API)\n") + test_with_mock() + + # Test message extraction + test_message_extraction() + + # Check metrics + print("\n" + "=" * 80) + print("Metrics Summary") + print("=" * 80) + + metrics_data = metric_reader.get_metrics_data() + found_duration = False + found_token_usage = False + + if metrics_data: + for rm in getattr(metrics_data, "resource_metrics", []) or []: + for scope in getattr(rm, "scope_metrics", []) or []: + for metric in getattr(scope, "metrics", []) or []: + print(f"\nMetric: {metric.name}") + + if metric.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION: + found_duration = True + dps = getattr(metric.data, "data_points", []) + if dps: + print(f" Duration: {dps[0].sum:.4f} seconds") + print(f" Count: {dps[0].count}") + + if metric.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE: + found_token_usage = True + dps = getattr(metric.data, "data_points", []) + for dp in dps: + token_type = dp.attributes.get("gen_ai.token.type", "unknown") + print(f" Token type: {token_type}, Sum: {dp.sum}, Count: {dp.count}") + + print("\n" + "=" * 80) + status = [] + if found_duration: + status.append("Duration: OK") + if found_token_usage: + status.append("Token Usage: OK") + if not found_duration and not found_token_usage: + status.append("No metrics (use real API for metrics)") + + print("Status: " + " | ".join(status)) + print("=" * 80) From 0bde623c06ab03d8719fe34302ae466e56b05231 Mon Sep 17 00:00:00 2001 From: shuningc Date: Mon, 17 Nov 2025 18:43:00 -0800 Subject: [PATCH 2/4] Updating readme --- .../README.rst | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst new file mode 100644 index 0000000..5371d3c --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst @@ -0,0 +1,155 @@ +OpenTelemetry LlamaIndex Instrumentation +========================================= + +This library provides automatic instrumentation for LlamaIndex applications using OpenTelemetry. + +Installation +------------ + +Development installation:: + + # Install the package in editable mode + cd instrumentation-genai/opentelemetry-instrumentation-llamaindex + pip install -e . + + # Install test dependencies + pip install -e ".[test]" + + # Install util-genai (required for telemetry) + cd ../../util/opentelemetry-util-genai + pip install -e . + + +Quick Start +----------- + +.. code-block:: python + + import os + from opentelemetry.instrumentation.llamaindex import LlamaindexInstrumentor + from opentelemetry import trace, metrics + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor + from opentelemetry.sdk.metrics import MeterProvider + from opentelemetry.sdk.metrics.export import InMemoryMetricReader + + # Enable metrics (default is spans only) + os.environ["OTEL_INSTRUMENTATION_GENAI_EMITTERS"] = "span_metric" + + # Setup tracing + trace.set_tracer_provider(TracerProvider()) + trace.get_tracer_provider().add_span_processor( + SimpleSpanProcessor(ConsoleSpanExporter()) + ) + + # Setup metrics + metric_reader = InMemoryMetricReader() + meter_provider = MeterProvider(metric_readers=[metric_reader]) + metrics.set_meter_provider(meter_provider) + + # Enable instrumentation with providers + LlamaindexInstrumentor().instrument( + tracer_provider=trace.get_tracer_provider(), + meter_provider=meter_provider + ) + + # Use LlamaIndex as normal + from llama_index.llms.openai import OpenAI + from llama_index.core.llms import ChatMessage, MessageRole + + llm = OpenAI(model="gpt-3.5-turbo") + messages = [ChatMessage(role=MessageRole.USER, content="Hello")] + response = llm.chat(messages) + + +Running Tests +------------- + +.. code-block:: bash + + # Set environment variables + export OPENAI_API_KEY=your-api-key + export OTEL_INSTRUMENTATION_GENAI_EMITTERS=span_metric + + # Run the test + cd tests + python test_llm_instrumentation.py + + +Expected Output +--------------- + +**Span Attributes**:: + + { + "gen_ai.framework": "llamaindex", + "gen_ai.request.model": "gpt-3.5-turbo", + "gen_ai.operation.name": "chat", + "gen_ai.usage.input_tokens": 24, + "gen_ai.usage.output_tokens": 7 + } + +**Metrics**:: + + Metric: gen_ai.client.operation.duration + Duration: 0.6900 seconds + Count: 1 + + Metric: gen_ai.client.token.usage + Token type: input, Sum: 24, Count: 1 + Token type: output, Sum: 7, Count: 1 + + +Key Implementation Differences from LangChain +---------------------------------------------- + +**1. Event-Based Callbacks** + +LlamaIndex uses ``on_event_start(event_type, ...)`` and ``on_event_end(event_type, ...)`` +instead of LangChain's method-based callbacks (``on_llm_start``, ``on_llm_end``). + +Event types are dispatched via ``CBEventType`` enum:: + + CBEventType.LLM # LLM invocations + CBEventType.AGENT # Agent steps + CBEventType.EMBEDDING # Embedding operations + +**2. Handler Registration** + +LlamaIndex uses ``handlers`` list:: + + callback_manager.handlers.append(handler) + +LangChain uses ``inheritable_handlers``:: + + callback_manager.inheritable_handlers.append(handler) + +**3. Response Structure** + +LlamaIndex ``ChatMessage`` uses ``blocks`` (list of TextBlock objects):: + + message.content # Computed property from blocks[0].text + +LangChain uses simple strings:: + + message.content # Direct string property + +**4. Token Usage** + +LlamaIndex returns objects (not dicts):: + + response.raw.usage.prompt_tokens # Object attribute + response.raw.usage.completion_tokens # Object attribute + +LangChain returns dicts:: + + response["usage"]["prompt_tokens"] # Dict key + response["usage"]["completion_tokens"] # Dict key + + +References +---------- + +* `OpenTelemetry Project `_ +* `LlamaIndex `_ +* `LlamaIndex Callbacks `_ From 633d0af33df0d2670a95f1c06b3ab4ca81cb4a99 Mon Sep 17 00:00:00 2001 From: shuningc Date: Tue, 18 Nov 2025 14:39:39 -0800 Subject: [PATCH 3/4] feat: Add embedding instrumentation for LlamaIndex - Add embedding event handlers (_handle_embedding_start, _handle_embedding_end) - Extract model name, input texts, and dimension count from embedding events - Create vendor_detection.py module with VendorRule-based provider detection - Support 13+ embedding providers (OpenAI, Azure, AWS, Google, Cohere, etc.) - Add test_embedding_instrumentation.py with single and batch embedding tests - Update README with embedding documentation and provider list - Tested successfully with OpenAI embeddings API --- .../README.rst | 70 +++++++- .../llamaindex/callback_handler.py | 89 ++++++++++- .../llamaindex/vendor_detection.py | 119 ++++++++++++++ .../tests/test_embedding_instrumentation.py | 151 ++++++++++++++++++ 4 files changed, 422 insertions(+), 7 deletions(-) create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/vendor_detection.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_embedding_instrumentation.py diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst index 5371d3c..bf9ea59 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst +++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst @@ -65,6 +65,8 @@ Quick Start Running Tests ------------- +**LLM Tests**: + .. code-block:: bash # Set environment variables @@ -75,11 +77,23 @@ Running Tests cd tests python test_llm_instrumentation.py +**Embedding Tests**: + +.. code-block:: bash + + # Set environment variables + export OPENAI_API_KEY=your-api-key + export OTEL_INSTRUMENTATION_GENAI_EMITTERS=span_metric + + # Run the test + cd tests + python test_embedding_instrumentation.py + Expected Output --------------- -**Span Attributes**:: +**LLM Span Attributes**:: { "gen_ai.framework": "llamaindex", @@ -89,6 +103,15 @@ Expected Output "gen_ai.usage.output_tokens": 7 } +**Embedding Span Attributes**:: + + { + "gen_ai.operation.name": "embeddings", + "gen_ai.request.model": "text-embedding-3-small", + "gen_ai.provider.name": "openai", + "gen_ai.embeddings.dimension.count": 1536 + } + **Metrics**:: Metric: gen_ai.client.operation.duration @@ -110,9 +133,9 @@ instead of LangChain's method-based callbacks (``on_llm_start``, ``on_llm_end``) Event types are dispatched via ``CBEventType`` enum:: - CBEventType.LLM # LLM invocations - CBEventType.AGENT # Agent steps - CBEventType.EMBEDDING # Embedding operations + CBEventType.LLM # LLM invocations (chat, complete) + CBEventType.AGENT # Agent steps (not yet instrumented) + CBEventType.EMBEDDING # Embedding operations (get_text_embedding, get_text_embedding_batch) **2. Handler Registration** @@ -147,6 +170,45 @@ LangChain returns dicts:: response["usage"]["completion_tokens"] # Dict key +Supported Features +------------------ + +**LLM Operations** + +* ✅ Chat completion (``llm.chat()``, ``llm.stream_chat()``) +* ✅ Text completion (``llm.complete()``, ``llm.stream_complete()``) +* ✅ Token usage tracking +* ✅ Model name detection +* ✅ Framework attribution + +**Embedding Operations** + +* ✅ Single text embedding (``embed_model.get_text_embedding()``) +* ✅ Batch embedding (``embed_model.get_text_embedding_batch()``) +* ✅ Query embedding (``embed_model.get_query_embedding()``) +* ✅ Provider detection (OpenAI, Azure, AWS Bedrock, Google, Cohere, HuggingFace, Ollama, and more) +* ✅ Dimension count tracking +* ✅ Input text capture + +**Provider Detection** + +Embedding instrumentation automatically detects the provider from class names: + +* **OpenAI**: ``OpenAIEmbedding`` +* **Azure**: ``AzureOpenAIEmbedding`` +* **AWS**: ``BedrockEmbedding`` +* **Google**: ``GeminiEmbedding``, ``VertexTextEmbedding``, ``GooglePaLMEmbedding`` +* **Cohere**: ``CohereEmbedding`` +* **HuggingFace**: ``HuggingFaceEmbedding``, ``HuggingFaceInferenceAPIEmbedding`` +* **Ollama**: ``OllamaEmbedding`` +* **Anthropic**: ``AnthropicEmbedding`` +* **MistralAI**: ``MistralAIEmbedding`` +* **Together**: ``TogetherEmbedding`` +* **Fireworks**: ``FireworksEmbedding`` +* **Voyage**: ``VoyageEmbedding`` +* **Jina**: ``JinaEmbedding`` + + References ---------- diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py index 7846cfb..fe9a1a1 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py +++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py @@ -5,12 +5,15 @@ from opentelemetry.util.genai.handler import TelemetryHandler from opentelemetry.util.genai.types import ( + EmbeddingInvocation, InputMessage, LLMInvocation, OutputMessage, Text, ) +from .vendor_detection import detect_vendor_from_class + def _safe_str(value: Any) -> str: """Safely convert value to string.""" @@ -21,7 +24,7 @@ def _safe_str(value: Any) -> str: class LlamaindexCallbackHandler(BaseCallbackHandler): - """Simplified LlamaIndex callback handler - LLM invocation only.""" + """LlamaIndex callback handler supporting LLM and Embedding instrumentation.""" def __init__( self, @@ -53,9 +56,11 @@ def on_event_start( parent_id: str = "", **kwargs: Any, ) -> str: - """Handle event start - only processing LLM events.""" + """Handle event start - processing LLM and EMBEDDING events.""" if event_type == CBEventType.LLM: self._handle_llm_start(event_id, parent_id, payload, **kwargs) + elif event_type == CBEventType.EMBEDDING: + self._handle_embedding_start(event_id, parent_id, payload, **kwargs) return event_id def on_event_end( @@ -65,9 +70,11 @@ def on_event_end( event_id: str = "", **kwargs: Any, ) -> None: - """Handle event end - only processing LLM events.""" + """Handle event end - processing LLM and EMBEDDING events.""" if event_type == CBEventType.LLM: self._handle_llm_end(event_id, payload, **kwargs) + elif event_type == CBEventType.EMBEDDING: + self._handle_embedding_end(event_id, payload, **kwargs) def _handle_llm_start( self, @@ -220,3 +227,79 @@ def _handle_llm_end( # Stop the LLM invocation self._handler.stop_llm(llm_inv) + + def _handle_embedding_start( + self, + event_id: str, + parent_id: str, + payload: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> None: + """Handle embedding invocation start.""" + if not self._handler or not payload: + return + + # Extract model information from payload + serialized = payload.get("serialized", {}) + model_name = ( + serialized.get("model_name") + or serialized.get("model") + or "unknown" + ) + + # Detect provider from class name + class_name = serialized.get("class_name", "") + provider = detect_vendor_from_class(class_name) + + # Note: input texts are not available at start time in LlamaIndex + # They will be available in the end event payload + + # Create embedding invocation with event_id as run_id + emb_inv = EmbeddingInvocation( + request_model=_safe_str(model_name), + input_texts=[], # Will be populated on end event + provider=provider, + attributes={}, + run_id=event_id, + ) + emb_inv.framework = "llamaindex" + + # Start the embedding invocation + self._handler.start_embedding(emb_inv) + + def _handle_embedding_end( + self, + event_id: str, + payload: Optional[Dict[str, Any]], + **kwargs: Any, + ) -> None: + """Handle embedding invocation end.""" + if not self._handler: + return + + # Get the embedding invocation from handler's registry using event_id + emb_inv = self._handler.get_entity(event_id) + if not emb_inv or not isinstance(emb_inv, EmbeddingInvocation): + return + + if payload: + # Extract input chunks (texts) from response + # chunks is the list of input texts that were embedded + chunks = payload.get("chunks", []) + if chunks: + emb_inv.input_texts = [_safe_str(chunk) for chunk in chunks] + + # Extract embedding vectors from response + # embeddings is the list of output vectors + embeddings = payload.get("embeddings", []) + + # Determine dimension from first embedding vector + if embeddings and len(embeddings) > 0: + first_embedding = embeddings[0] + if isinstance(first_embedding, list): + emb_inv.dimension_count = len(first_embedding) + elif hasattr(first_embedding, "__len__"): + emb_inv.dimension_count = len(first_embedding) + + # Stop the embedding invocation + self._handler.stop_embedding(emb_inv) diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/vendor_detection.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/vendor_detection.py new file mode 100644 index 0000000..6f9c9f0 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/vendor_detection.py @@ -0,0 +1,119 @@ +"""Vendor detection for LlamaIndex embedding providers.""" + +from dataclasses import dataclass +from typing import List, Set + + +@dataclass(frozen=True) +class VendorRule: + """Rule for detecting vendor from LlamaIndex class names.""" + + exact_matches: Set[str] + patterns: List[str] + vendor_name: str + + def matches(self, class_name: str) -> bool: + """Check if class name matches this vendor rule.""" + if class_name in self.exact_matches: + return True + class_lower = class_name.lower() + return any(pattern in class_lower for pattern in self.patterns) + + +def _get_vendor_rules() -> List[VendorRule]: + """ + Get vendor detection rules ordered by specificity (most specific first). + + Returns: + List of VendorRule objects for detecting embedding vendors from class names + """ + return [ + VendorRule( + exact_matches={"AzureOpenAIEmbedding"}, + patterns=["azure"], + vendor_name="azure", + ), + VendorRule( + exact_matches={"OpenAIEmbedding"}, + patterns=["openai"], + vendor_name="openai", + ), + VendorRule( + exact_matches={"BedrockEmbedding"}, + patterns=["bedrock", "aws"], + vendor_name="aws", + ), + VendorRule( + exact_matches={"VertexTextEmbedding", "GeminiEmbedding", "GooglePaLMEmbedding"}, + patterns=["vertex", "google", "palm", "gemini"], + vendor_name="google", + ), + VendorRule( + exact_matches={"CohereEmbedding"}, + patterns=["cohere"], + vendor_name="cohere", + ), + VendorRule( + exact_matches={"HuggingFaceEmbedding", "HuggingFaceInferenceAPIEmbedding"}, + patterns=["huggingface"], + vendor_name="huggingface", + ), + VendorRule( + exact_matches={"OllamaEmbedding"}, + patterns=["ollama"], + vendor_name="ollama", + ), + VendorRule( + exact_matches={"AnthropicEmbedding"}, + patterns=["anthropic"], + vendor_name="anthropic", + ), + VendorRule( + exact_matches={"MistralAIEmbedding"}, + patterns=["mistral"], + vendor_name="mistralai", + ), + VendorRule( + exact_matches={"TogetherEmbedding"}, + patterns=["together"], + vendor_name="together", + ), + VendorRule( + exact_matches={"FireworksEmbedding"}, + patterns=["fireworks"], + vendor_name="fireworks", + ), + VendorRule( + exact_matches={"VoyageEmbedding"}, + patterns=["voyage"], + vendor_name="voyage", + ), + VendorRule( + exact_matches={"JinaEmbedding"}, + patterns=["jina"], + vendor_name="jina", + ), + ] + + +def detect_vendor_from_class(class_name: str) -> str: + """ + Detect vendor from LlamaIndex embedding class name. + Uses unified detection rules combining exact matches and patterns. + + Args: + class_name: The class name from serialized embedding information + + Returns: + Vendor string (lowercase), defaults to None if no match found + """ + if not class_name: + return None + + vendor_rules = _get_vendor_rules() + + for rule in vendor_rules: + if rule.matches(class_name): + return rule.vendor_name + + return None diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_embedding_instrumentation.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_embedding_instrumentation.py new file mode 100644 index 0000000..355a057 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_embedding_instrumentation.py @@ -0,0 +1,151 @@ +"""Test embedding instrumentation for LlamaIndex.""" + +import os + +from llama_index.core import Settings +from llama_index.core.callbacks import CallbackManager +from llama_index.embeddings.openai import OpenAIEmbedding +from opentelemetry import metrics, trace +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import InMemoryMetricReader +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor + +from opentelemetry.instrumentation.llamaindex import LlamaindexInstrumentor + + +# Global setup - shared across tests +metric_reader = None +instrumentor = None + + +def setup_telemetry(): + """Setup OpenTelemetry with span and metric exporters (once).""" + global metric_reader, instrumentor + + if metric_reader is not None: + return metric_reader + + # Enable metrics + os.environ["OTEL_INSTRUMENTATION_GENAI_EMITTERS"] = "span_metric" + + # Setup tracing + trace.set_tracer_provider(TracerProvider()) + trace.get_tracer_provider().add_span_processor( + SimpleSpanProcessor(ConsoleSpanExporter()) + ) + + # Setup metrics with InMemoryMetricReader + metric_reader = InMemoryMetricReader() + meter_provider = MeterProvider(metric_readers=[metric_reader]) + metrics.set_meter_provider(meter_provider) + + # Enable instrumentation once + instrumentor = LlamaindexInstrumentor() + instrumentor.instrument( + tracer_provider=trace.get_tracer_provider(), + meter_provider=metrics.get_meter_provider(), + ) + + return metric_reader + + +def test_embedding_single_text(): + """Test single text embedding instrumentation.""" + print("\nTest: Single Text Embedding") + print("=" * 60) + + metric_reader = setup_telemetry() + + # Configure embedding model + embed_model = OpenAIEmbedding( + model="text-embedding-3-small", + api_key=os.environ.get("OPENAI_API_KEY"), + ) + Settings.embed_model = embed_model + + # Make sure callback manager is initialized + if Settings.callback_manager is None: + Settings.callback_manager = CallbackManager() + + # Generate single embedding + text = "LlamaIndex is a data framework for LLM applications" + embedding = embed_model.get_text_embedding(text) + + print(f"\nText: {text}") + print(f"Embedding dimension: {len(embedding)}") + print(f"First 5 values: {embedding[:5]}") + + # Validate metrics + print("\nMetrics:") + metrics_data = metric_reader.get_metrics_data() + for resource_metric in metrics_data.resource_metrics: + for scope_metric in resource_metric.scope_metrics: + for metric in scope_metric.metrics: + print(f"\nMetric: {metric.name}") + for data_point in metric.data.data_points: + if hasattr(data_point, "bucket_counts"): + # Histogram + print(f" Count: {sum(data_point.bucket_counts)}") + else: + # Counter + print(f" Value: {data_point.value}") + + print("\nTest completed successfully") + + +def test_embedding_batch(): + """Test batch embedding instrumentation.""" + print("\nTest: Batch Embeddings") + print("=" * 60) + + metric_reader = setup_telemetry() + + # Configure embedding model + embed_model = OpenAIEmbedding( + model="text-embedding-3-small", + api_key=os.environ.get("OPENAI_API_KEY"), + ) + Settings.embed_model = embed_model + + # Make sure callback manager is initialized + if Settings.callback_manager is None: + Settings.callback_manager = CallbackManager() + + # Generate batch embeddings + texts = [ + "Paris is the capital of France", + "Berlin is the capital of Germany", + "Rome is the capital of Italy", + ] + embeddings = embed_model.get_text_embedding_batch(texts) + + print(f"\nEmbedded {len(embeddings)} texts") + print(f"Dimension: {len(embeddings[0])}") + + # Validate metrics + print("\nMetrics:") + metrics_data = metric_reader.get_metrics_data() + for resource_metric in metrics_data.resource_metrics: + for scope_metric in resource_metric.scope_metrics: + for metric in scope_metric.metrics: + print(f"\nMetric: {metric.name}") + for data_point in metric.data.data_points: + if hasattr(data_point, "bucket_counts"): + # Histogram + print(f" Count: {sum(data_point.bucket_counts)}") + else: + # Counter + print(f" Value: {data_point.value}") + + print("\nTest completed successfully") + + +if __name__ == "__main__": + test_embedding_single_text() + print("\n" + "=" * 60 + "\n") + test_embedding_batch() + + # Cleanup + if instrumentor: + instrumentor.uninstrument() From 5ed2d858ebf8fe33d0e897ed35537b889de79810 Mon Sep 17 00:00:00 2001 From: shuningc Date: Tue, 25 Nov 2025 15:31:30 -0800 Subject: [PATCH 4/4] Adding temporary RAG instrumentation solution for llamaindex --- .../llamaindex/callback_handler.py | 280 +++++++++++++++++- .../tests/test_rag.py | 97 ++++++ 2 files changed, 373 insertions(+), 4 deletions(-) create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_rag.py diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py index fe9a1a1..a070e06 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py +++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py @@ -2,14 +2,15 @@ from llama_index.core.callbacks.base_handler import BaseCallbackHandler from llama_index.core.callbacks.schema import CBEventType - from opentelemetry.util.genai.handler import TelemetryHandler from opentelemetry.util.genai.types import ( EmbeddingInvocation, InputMessage, LLMInvocation, OutputMessage, + Step, Text, + Workflow, ) from .vendor_detection import detect_vendor_from_class @@ -35,6 +36,7 @@ def __init__( event_ends_to_ignore=[], ) self._handler = telemetry_handler + self._auto_workflow_id: Optional[str] = None # Track auto-created workflow def start_trace(self, trace_id: Optional[str] = None) -> None: """Start a trace - required by BaseCallbackHandler.""" @@ -48,6 +50,17 @@ def end_trace( """End a trace - required by BaseCallbackHandler.""" pass + def _get_parent_span(self, parent_id: str) -> Optional[Any]: + """Get parent span from handler's registry using parent_id.""" + if not self._handler or not parent_id: + return None + # Get the parent entity from handler's registry + parent_entity = self._handler.get_entity(parent_id) + if parent_entity: + # Return the span attribute if it exists + return getattr(parent_entity, "span", None) + return None + def on_event_start( self, event_type: CBEventType, @@ -56,11 +69,17 @@ def on_event_start( parent_id: str = "", **kwargs: Any, ) -> str: - """Handle event start - processing LLM and EMBEDDING events.""" + """Handle event start - processing LLM, EMBEDDING, QUERY, RETRIEVE, and SYNTHESIZE events.""" if event_type == CBEventType.LLM: self._handle_llm_start(event_id, parent_id, payload, **kwargs) elif event_type == CBEventType.EMBEDDING: self._handle_embedding_start(event_id, parent_id, payload, **kwargs) + elif event_type == CBEventType.QUERY: + self._handle_query_start(event_id, parent_id, payload, **kwargs) + elif event_type == CBEventType.RETRIEVE: + self._handle_retrieve_start(event_id, parent_id, payload, **kwargs) + elif event_type == CBEventType.SYNTHESIZE: + self._handle_synthesize_start(event_id, parent_id, payload, **kwargs) return event_id def on_event_end( @@ -70,11 +89,17 @@ def on_event_end( event_id: str = "", **kwargs: Any, ) -> None: - """Handle event end - processing LLM and EMBEDDING events.""" + """Handle event end - processing LLM, EMBEDDING, QUERY, RETRIEVE, and SYNTHESIZE events.""" if event_type == CBEventType.LLM: self._handle_llm_end(event_id, payload, **kwargs) elif event_type == CBEventType.EMBEDDING: self._handle_embedding_end(event_id, payload, **kwargs) + elif event_type == CBEventType.QUERY: + self._handle_query_end(event_id, payload, **kwargs) + elif event_type == CBEventType.RETRIEVE: + self._handle_retrieve_end(event_id, payload, **kwargs) + elif event_type == CBEventType.SYNTHESIZE: + self._handle_synthesize_end(event_id, payload, **kwargs) def _handle_llm_start( self, @@ -143,12 +168,18 @@ def _handle_llm_start( input_messages=input_messages, attributes={}, run_id=event_id, # Use event_id as run_id for registry lookup + parent_run_id=parent_id if parent_id else None, # Set parent for hierarchy ) llm_inv.framework = "llamaindex" + + # Resolve parent_id to parent_span for proper span context + parent_span = self._get_parent_span(parent_id) + if parent_span: + llm_inv.parent_span = parent_span # type: ignore[attr-defined] # Start the LLM invocation (handler stores it in _entity_registry) self._handler.start_llm(llm_inv) - + def _handle_llm_end( self, event_id: str, @@ -261,8 +292,14 @@ def _handle_embedding_start( provider=provider, attributes={}, run_id=event_id, + parent_run_id=parent_id if parent_id else None, # Set parent for hierarchy ) emb_inv.framework = "llamaindex" + + # Resolve parent_id to parent_span for proper span context + parent_span = self._get_parent_span(parent_id) + if parent_span: + emb_inv.parent_span = parent_span # type: ignore[attr-defined] # Start the embedding invocation self._handler.start_embedding(emb_inv) @@ -303,3 +340,238 @@ def _handle_embedding_end( # Stop the embedding invocation self._handler.stop_embedding(emb_inv) + + def _handle_query_start( + self, + event_id: str, + parent_id: str, + payload: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> None: + """Handle query pipeline start - create Workflow if no parent, else Step.""" + if not self._handler or not payload: + return + + query_str = payload.get("query_str", "") + + # If no parent, this is the root workflow + if not parent_id: + workflow = Workflow( + name="llama_index_query_pipeline", + workflow_type="workflow", + initial_input=_safe_str(query_str), + attributes={}, + run_id=event_id, + ) + workflow.framework = "llamaindex" + self._handler.start_workflow(workflow) + + def _handle_query_end( + self, + event_id: str, + payload: Optional[Dict[str, Any]], + **kwargs: Any, + ) -> None: + """Handle query pipeline end.""" + if not self._handler: + return + + entity = self._handler.get_entity(event_id) + if not entity: + return + + if isinstance(entity, Workflow): + if payload: + response = payload.get("response") + if response: + # Extract response text + response_text = "" + if isinstance(response, dict): + response_text = response.get("response", "") + elif hasattr(response, "response"): + response_text = getattr(response, "response", "") + entity.final_output = _safe_str(response_text) + self._handler.stop_workflow(entity) + elif isinstance(entity, Step): + if payload: + response = payload.get("response") + if response: + response_text = "" + if isinstance(response, dict): + response_text = response.get("response", "") + elif hasattr(response, "response"): + response_text = getattr(response, "response", "") + entity.output_data = _safe_str(response_text) + self._handler.stop_step(entity) + + def _handle_retrieve_start( + self, + event_id: str, + parent_id: str, + payload: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> None: + """Handle retrieval start - create Step for retrieve task.""" + if not self._handler or not payload: + return + + query_str = payload.get("query_str", "") + + # If parent_id doesn't exist or doesn't resolve to a tracked entity, + # create a root Workflow to hold the RAG steps + parent_entity = self._handler.get_entity(parent_id) if parent_id else None + + if not parent_entity: + # No valid parent - create auto-workflow + workflow_id = f"{event_id}_workflow" + workflow = Workflow( + name="llama_index_rag", + workflow_type="rag", + initial_input=_safe_str(query_str), + attributes={}, + run_id=workflow_id, + ) + workflow.framework = "llamaindex" + self._handler.start_workflow(workflow) + # Track this auto-created workflow + self._auto_workflow_id = workflow_id + # Get the workflow's span to use as parent + workflow_entity = self._handler.get_entity(workflow_id) + if workflow_entity: + parent_span = getattr(workflow_entity, "span", None) + else: + parent_span = None + else: + # Valid parent exists - resolve to parent_span + parent_span = self._get_parent_span(parent_id) + + # Create a step for the retrieval task + step = Step( + name="retrieve.task", + step_type="retrieve", + objective="Retrieve relevant documents", + input_data=_safe_str(query_str), + run_id=event_id, + parent_run_id=parent_id if parent_id else None, + attributes={}, + ) + + # Set parent_span if we have one + if parent_span: + step.parent_span = parent_span # type: ignore[attr-defined] + + self._handler.start_step(step) + + def _handle_retrieve_end( + self, + event_id: str, + payload: Optional[Dict[str, Any]], + **kwargs: Any, + ) -> None: + """Handle retrieval end - update step with retrieved nodes.""" + if not self._handler: + return + + step = self._handler.get_entity(event_id) + if not step or not isinstance(step, Step): + return + + if payload: + nodes = payload.get("nodes", []) + if nodes: + # Store document count and scores + step.attributes["retrieve.documents_count"] = len(nodes) + scores = [] + doc_ids = [] + for node in nodes: + if hasattr(node, "score") and node.score is not None: + scores.append(node.score) + if hasattr(node, "node_id"): + doc_ids.append(str(node.node_id)) + elif hasattr(node, "id_"): + doc_ids.append(str(node.id_)) + + if scores: + step.attributes["retrieve.scores"] = scores + if doc_ids: + step.attributes["retrieve.document_ids"] = doc_ids + + # Create output summary + step.output_data = f"Retrieved {len(nodes)} documents" + + self._handler.stop_step(step) + + def _handle_synthesize_start( + self, + event_id: str, + parent_id: str, + payload: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> None: + """Handle synthesis start - create Step for synthesize task.""" + if not self._handler or not payload: + return + + query_str = payload.get("query_str", "") + + # Create a step for the synthesis task + step = Step( + name="synthesize.task", + step_type="synthesize", + objective="Synthesize response from retrieved documents", + input_data=_safe_str(query_str), + run_id=event_id, + parent_run_id=parent_id if parent_id else None, + attributes={}, + ) + + # Resolve parent_id to parent_span for proper span context + parent_span = self._get_parent_span(parent_id) + if parent_span: + step.parent_span = parent_span # type: ignore[attr-defined] + + self._handler.start_step(step) + + def _handle_synthesize_end( + self, + event_id: str, + payload: Optional[Dict[str, Any]], + **kwargs: Any, + ) -> None: + """Handle synthesis end - update step with synthesized response.""" + if not self._handler: + return + + step = self._handler.get_entity(event_id) + if not step or not isinstance(step, Step): + return + + if payload: + response = payload.get("response") + if response: + # Extract response text + response_text = "" + if isinstance(response, dict): + response_text = response.get("response", "") + elif hasattr(response, "response"): + response_text = getattr(response, "response", "") + step.output_data = _safe_str(response_text) + + self._handler.stop_step(step) + + # If we auto-created a workflow, close it after synthesize completes + if self._auto_workflow_id: + workflow = self._handler.get_entity(self._auto_workflow_id) + if workflow and isinstance(workflow, Workflow): + # Set final output from synthesize response + if payload: + response = payload.get("response") + if response: + response_text = "" + if isinstance(response, dict): + response_text = response.get("response", "") + elif hasattr(response, "response"): + response_text = getattr(response, "response", "") + workflow.final_output = _safe_str(response_text) + self._handler.stop_workflow(workflow) + self._auto_workflow_id = None # Reset for next query diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_rag.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_rag.py new file mode 100644 index 0000000..fb8b718 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_rag.py @@ -0,0 +1,97 @@ +""" +Test LlamaIndex RAG instrumentation without agents. + +This test validates that: +1. QUERY events create Workflow spans at the root level +2. RETRIEVE events create Step spans with parent_run_id pointing to the Workflow +3. SYNTHESIZE events create Step spans with parent_run_id pointing to the Workflow +4. LLM invocations nest under their Step parent via parent_run_id +5. Embedding invocations nest under their Step parent via parent_run_id +""" + +from llama_index.core import Document, Settings, VectorStoreIndex +from llama_index.embeddings.openai import OpenAIEmbedding +from llama_index.llms.openai import OpenAI +from opentelemetry import trace +from opentelemetry.instrumentation.llamaindex import LlamaindexInstrumentor +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor + + +def setup_telemetry(): + """Setup OpenTelemetry with console exporter to see trace structure.""" + trace.set_tracer_provider(TracerProvider()) + tracer_provider = trace.get_tracer_provider() + tracer_provider.add_span_processor(SimpleSpanProcessor(ConsoleSpanExporter())) + return tracer_provider + + +def test_rag_without_agents(): + """Test RAG instrumentation creates correct hierarchy: Workflow -> Steps -> LLM/Embedding""" + + print("=" * 80) + print("Setting up telemetry...") + print("=" * 80) + setup_telemetry() + + # Setup LlamaIndex + Settings.llm = OpenAI(model="gpt-4o-mini", temperature=0.1) + Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") + + # Instrument + instrumentor = LlamaindexInstrumentor() + instrumentor.instrument() + + # Debug: Check callback handler + from llama_index.core import Settings as LlamaSettings + print(f"\nCallbacks registered: {len(LlamaSettings.callback_manager.handlers)}") + for handler in LlamaSettings.callback_manager.handlers: + print(f" Handler: {type(handler).__name__}") + + # Create sample documents + documents = [ + Document( + text="Paris is the capital of France. It has a population of over 2 million.", + metadata={"source": "geography", "country": "France"}, + ), + Document( + text="The Eiffel Tower is in Paris. It was completed in 1889.", + metadata={"source": "landmarks", "country": "France"}, + ), + ] + + print("\n" + "=" * 80) + print("Creating vector index (should see Embedding spans)...") + print("=" * 80) + index = VectorStoreIndex.from_documents(documents) + + print("\n" + "=" * 80) + print("Creating query engine...") + print("=" * 80) + query_engine = index.as_query_engine(similarity_top_k=2) + + print("\n" + "=" * 80) + print("Executing RAG query (should see Workflow -> retrieve.task/synthesize.task -> LLM/Embedding)...") + print("=" * 80) + response = query_engine.query("What is the capital of France?") + + print("\n" + "=" * 80) + print("RESULTS") + print("=" * 80) + print(f"Response: {response.response}") + print(f"Source nodes: {len(response.source_nodes)}") + + print("\n" + "=" * 80) + print("✓ Test completed!") + print("=" * 80) + print("\nExpected trace structure:") + print(" Workflow (gen_ai.operation.name=query)") + print(" ├─ Step (gen_ai.operation.name=retrieve.task)") + print(" │ └─ EmbeddingInvocation") + print(" └─ Step (gen_ai.operation.name=synthesize.task)") + print(" └─ LLMInvocation") + print("=" * 80) + + +if __name__ == "__main__": + test_rag_without_agents()