From 7149fed6a6f36abe1e3967f60407908dcdff37e0 Mon Sep 17 00:00:00 2001
From: shuningc <shuningc@splunk.com>
Date: Mon, 17 Nov 2025 18:36:42 -0800
Subject: [PATCH 1/4] Adding Llamaindex llm instrumentation spike

---
 .../pyproject.toml                            |  58 +++++
 .../instrumentation/llamaindex/__init__.py    |  67 ++++++
 .../llamaindex/callback_handler.py            | 222 ++++++++++++++++++
 .../instrumentation/llamaindex/config.py      |   3 +
 .../instrumentation/llamaindex/version.py     |   1 +
 .../tests/test_llm_instrumentation.py         | 190 +++++++++++++++
 6 files changed, 541 insertions(+)
 create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/pyproject.toml
 create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/__init__.py
 create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py
 create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/config.py
 create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/version.py
 create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_llm_instrumentation.py

diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/pyproject.toml b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/pyproject.toml
new file mode 100644
index 0000000..55a6708
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/pyproject.toml
@@ -0,0 +1,58 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "splunk-otel-instrumentation-llamaindex"
+dynamic = ["version"]
+description = "OpenTelemetry LlamaIndex instrumentation"
+readme = "README.rst"
+license = "Apache-2.0"
+requires-python = ">=3.9"
+authors = [
+    { name = "OpenTelemetry Authors", email = "cncf-opentelemetry-contributors@lists.cncf.io" },
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+]
+dependencies = [
+    "opentelemetry-api ~= 1.38.0.dev0",
+    "opentelemetry-instrumentation ~= 0.59b0.dev0",
+    "opentelemetry-semantic-conventions ~= 0.59b0.dev0",
+    "splunk-otel-util-genai>=0.1.4",
+]
+
+[project.optional-dependencies]
+instruments = ["llama-index-core >= 0.14.0"]
+test = [
+    "llama-index-core >= 0.14.0",
+    "llama-index-llms-openai >= 0.6.0",
+    "pytest >= 7.0.0",
+]
+
+[project.entry-points.opentelemetry_instrumentor]
+llamaindex = "opentelemetry.instrumentation.llamaindex:LlamaindexInstrumentor"
+
+[project.urls]
+Homepage = "https://github.com/open-telemetry/opentelemetry-python-contrib/tree/main/instrumentation-genai/opentelemetry-instrumentation-llamaindex"
+Repository = "https://github.com/open-telemetry/opentelemetry-python-contrib"
+
+[tool.hatch.version]
+path = "src/opentelemetry/instrumentation/llamaindex/version.py"
+
+[tool.hatch.build.targets.sdist]
+include = ["/src", "/tests"]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/opentelemetry"]
+
+[tool.ruff]
+exclude = ["./"]
diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/__init__.py
new file mode 100644
index 0000000..c3bea98
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/__init__.py
@@ -0,0 +1,67 @@
+from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
+from opentelemetry.util.genai.handler import get_telemetry_handler
+from opentelemetry.instrumentation.llamaindex.config import Config
+from opentelemetry.instrumentation.llamaindex.callback_handler import (
+    LlamaindexCallbackHandler,
+)
+from wrapt import wrap_function_wrapper
+
+_instruments = ("llama-index-core >= 0.14.0",)
+
+
+class LlamaindexInstrumentor(BaseInstrumentor):
+    def __init__(
+        self,
+        exception_logger=None,
+        disable_trace_context_propagation=False,
+        use_legacy_attributes: bool = True,
+    ):
+        super().__init__()
+        Config._exception_logger = exception_logger
+        Config.use_legacy_attributes = use_legacy_attributes
+        self._disable_trace_context_propagation = (
+            disable_trace_context_propagation
+        )
+        self._telemetry_handler = None
+
+    def instrumentation_dependencies(self):
+        return _instruments
+
+    def _instrument(self, **kwargs):
+        tracer_provider = kwargs.get("tracer_provider")
+        meter_provider = kwargs.get("meter_provider")
+        logger_provider = kwargs.get("logger_provider")
+
+        self._telemetry_handler = get_telemetry_handler(
+            tracer_provider=tracer_provider,
+            meter_provider=meter_provider,
+            logger_provider=logger_provider,
+        )
+
+        llamaindexCallBackHandler = LlamaindexCallbackHandler(
+            telemetry_handler=self._telemetry_handler
+        )
+
+        wrap_function_wrapper(
+            module="llama_index.core.callbacks.base",
+            name="CallbackManager.__init__",
+            wrapper=_BaseCallbackManagerInitWrapper(llamaindexCallBackHandler),
+        )
+
+    def _uninstrument(self, **kwargs):
+        pass
+
+
+class _BaseCallbackManagerInitWrapper:
+    def __init__(self, callback_handler: "LlamaindexCallbackHandler"):
+        self._callback_handler = callback_handler
+
+    def __call__(self, wrapped, instance, args, kwargs) -> None:
+        wrapped(*args, **kwargs)
+        # LlamaIndex uses 'handlers' instead of 'inheritable_handlers'
+        for handler in instance.handlers:
+            if isinstance(handler, type(self._callback_handler)):
+                break
+        else:
+            self._callback_handler._callback_manager = instance
+            instance.add_handler(self._callback_handler)
diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py
new file mode 100644
index 0000000..7846cfb
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py
@@ -0,0 +1,222 @@
+from typing import Any, Dict, Optional
+
+from llama_index.core.callbacks.base_handler import BaseCallbackHandler
+from llama_index.core.callbacks.schema import CBEventType
+
+from opentelemetry.util.genai.handler import TelemetryHandler
+from opentelemetry.util.genai.types import (
+    InputMessage,
+    LLMInvocation,
+    OutputMessage,
+    Text,
+)
+
+
+def _safe_str(value: Any) -> str:
+    """Safely convert value to string."""
+    try:
+        return str(value)
+    except (TypeError, ValueError):
+        return "<unrepr>"
+
+
+class LlamaindexCallbackHandler(BaseCallbackHandler):
+    """Simplified LlamaIndex callback handler - LLM invocation only."""
+
+    def __init__(
+        self,
+        telemetry_handler: Optional[TelemetryHandler] = None,
+    ) -> None:
+        super().__init__(
+            event_starts_to_ignore=[],
+            event_ends_to_ignore=[],
+        )
+        self._handler = telemetry_handler
+
+    def start_trace(self, trace_id: Optional[str] = None) -> None:
+        """Start a trace - required by BaseCallbackHandler."""
+        pass
+
+    def end_trace(
+        self,
+        trace_id: Optional[str] = None,
+        trace_map: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """End a trace - required by BaseCallbackHandler."""
+        pass
+
+    def on_event_start(
+        self,
+        event_type: CBEventType,
+        payload: Optional[Dict[str, Any]] = None,
+        event_id: str = "",
+        parent_id: str = "",
+        **kwargs: Any,
+    ) -> str:
+        """Handle event start - only processing LLM events."""
+        if event_type == CBEventType.LLM:
+            self._handle_llm_start(event_id, parent_id, payload, **kwargs)
+        return event_id
+
+    def on_event_end(
+        self,
+        event_type: CBEventType,
+        payload: Optional[Dict[str, Any]] = None,
+        event_id: str = "",
+        **kwargs: Any,
+    ) -> None:
+        """Handle event end - only processing LLM events."""
+        if event_type == CBEventType.LLM:
+            self._handle_llm_end(event_id, payload, **kwargs)
+
+    def _handle_llm_start(
+        self,
+        event_id: str,
+        parent_id: str,
+        payload: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Handle LLM invocation start."""
+        if not self._handler or not payload:
+            return
+
+        # Extract model information from payload
+        serialized = payload.get("serialized", {})
+        model_name = (
+            serialized.get("model")
+            or serialized.get("model_name")
+            or "unknown"
+        )
+
+        # Extract messages from payload
+        # LlamaIndex messages are ChatMessage objects with .content and .role properties
+        messages = payload.get("messages", [])
+        input_messages = []
+
+        for msg in messages:
+            # Handle ChatMessage objects (has .content property and .role attribute)
+            if hasattr(msg, "content") and hasattr(msg, "role"):
+                # Extract role - could be MessageRole enum
+                role_value = (
+                    str(msg.role.value)
+                    if hasattr(msg.role, "value")
+                    else str(msg.role)
+                )
+                # Extract content - this is a property that pulls from blocks[0].text
+                content = _safe_str(msg.content)
+                input_messages.append(
+                    InputMessage(
+                        role=role_value, parts=[Text(content=content)]
+                    )
+                )
+            elif isinstance(msg, dict):
+                # Handle serialized messages (dict format)
+                role = msg.get("role", "user")
+                # Try to extract from blocks first (LlamaIndex format)
+                blocks = msg.get("blocks", [])
+                if blocks and isinstance(blocks[0], dict):
+                    content = blocks[0].get("text", "")
+                else:
+                    # Fallback to direct content field
+                    content = msg.get("content", "")
+
+                role_value = (
+                    str(role.value) if hasattr(role, "value") else str(role)
+                )
+                input_messages.append(
+                    InputMessage(
+                        role=role_value,
+                        parts=[Text(content=_safe_str(content))],
+                    )
+                )
+
+        # Create LLM invocation with event_id as run_id
+        llm_inv = LLMInvocation(
+            request_model=_safe_str(model_name),
+            input_messages=input_messages,
+            attributes={},
+            run_id=event_id,  # Use event_id as run_id for registry lookup
+        )
+        llm_inv.framework = "llamaindex"
+
+        # Start the LLM invocation (handler stores it in _entity_registry)
+        self._handler.start_llm(llm_inv)
+
+    def _handle_llm_end(
+        self,
+        event_id: str,
+        payload: Optional[Dict[str, Any]],
+        **kwargs: Any,
+    ) -> None:
+        """Handle LLM invocation end."""
+        if not self._handler:
+            return
+
+        # Get the LLM invocation from handler's registry using event_id
+        llm_inv = self._handler.get_entity(event_id)
+        if not llm_inv or not isinstance(llm_inv, LLMInvocation):
+            return
+
+        if payload:
+            # Extract response from payload
+            response = payload.get("response")
+
+            # Handle both dict and object types for response
+            if response:
+                # Get message - could be dict or object
+                if isinstance(response, dict):
+                    message = response.get("message", {})
+                    raw_response = response.get("raw")
+                else:
+                    # response is a ChatResponse object
+                    message = getattr(response, "message", None)
+                    raw_response = getattr(response, "raw", None)
+
+                # Extract content from message
+                if message:
+                    if isinstance(message, dict):
+                        # Message is dict
+                        blocks = message.get("blocks", [])
+                        if blocks and isinstance(blocks[0], dict):
+                            content = blocks[0].get("text", "")
+                        else:
+                            content = message.get("content", "")
+                    else:
+                        # Message is ChatMessage object
+                        blocks = getattr(message, "blocks", [])
+                        if blocks and len(blocks) > 0:
+                            content = getattr(blocks[0], "text", "")
+                        else:
+                            content = getattr(message, "content", "")
+
+                    # Create output message
+                    llm_inv.output_messages = [
+                        OutputMessage(
+                            role="assistant",
+                            parts=[Text(content=_safe_str(content))],
+                            finish_reason="stop",
+                        )
+                    ]
+
+                # Extract token usage from response.raw (OpenAI format)
+                # LlamaIndex stores the raw API response (e.g., OpenAI response) in response.raw
+                # raw_response could be a dict or an object (e.g., ChatCompletion from OpenAI)
+                if raw_response:
+                    # Try to get usage from dict or object
+                    if isinstance(raw_response, dict):
+                        usage = raw_response.get("usage", {})
+                    else:
+                        # It's an object, try to get usage attribute
+                        usage = getattr(raw_response, "usage", None)
+                    
+                    if usage:
+                        # usage could also be dict or object
+                        if isinstance(usage, dict):
+                            llm_inv.input_tokens = usage.get("prompt_tokens")
+                            llm_inv.output_tokens = usage.get("completion_tokens")
+                        else:
+                            llm_inv.input_tokens = getattr(usage, "prompt_tokens", None)
+                            llm_inv.output_tokens = getattr(usage, "completion_tokens", None)
+
+        # Stop the LLM invocation
+        self._handler.stop_llm(llm_inv)
diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/config.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/config.py
new file mode 100644
index 0000000..44199c0
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/config.py
@@ -0,0 +1,3 @@
+class Config:
+    exception_logger = None
+    use_legacy_attributes = True
diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/version.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/version.py
new file mode 100644
index 0000000..3dc1f76
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/version.py
@@ -0,0 +1 @@
+__version__ = "0.1.0"
diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_llm_instrumentation.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_llm_instrumentation.py
new file mode 100644
index 0000000..50324c3
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_llm_instrumentation.py
@@ -0,0 +1,190 @@
+"""Tests for LlamaIndex LLM instrumentation with OpenTelemetry."""
+
+import os
+
+from llama_index.core.llms import ChatMessage, MessageRole
+from llama_index.core.llms.mock import MockLLM
+from opentelemetry import metrics, trace
+from opentelemetry.instrumentation.llamaindex import LlamaindexInstrumentor
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import InMemoryMetricReader
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import (
+    ConsoleSpanExporter,
+    SimpleSpanProcessor,
+)
+from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
+
+
+def setup_telemetry():
+    """Setup OpenTelemetry with both trace and metrics exporters."""
+    # Setup tracing
+    trace.set_tracer_provider(TracerProvider())
+    tracer_provider = trace.get_tracer_provider()
+    tracer_provider.add_span_processor(
+        SimpleSpanProcessor(ConsoleSpanExporter())
+    )
+
+    # Setup metrics with InMemoryMetricReader
+    metric_reader = InMemoryMetricReader()
+    meter_provider = MeterProvider(metric_readers=[metric_reader])
+    metrics.set_meter_provider(meter_provider)
+
+    return tracer_provider, meter_provider, metric_reader
+
+
+def test_with_openai():
+    """Test with real OpenAI API - requires OPENAI_API_KEY environment variable."""
+    from llama_index.llms.openai import OpenAI
+
+    print("=" * 80)
+    print("Testing with OpenAI API")
+    print("=" * 80)
+
+    llm = OpenAI(model="gpt-3.5-turbo")
+    messages = [
+        ChatMessage(
+            role=MessageRole.SYSTEM, content="You are a helpful assistant."
+        ),
+        ChatMessage(
+            role=MessageRole.USER, content="Say hello in exactly 5 words"
+        ),
+    ]
+
+    response = llm.chat(messages)
+    print(f"\nResponse: {response.message.content}")
+
+    if hasattr(response, "raw") and response.raw:
+        if isinstance(response.raw, dict):
+            usage = response.raw.get("usage", {})
+        else:
+            usage = getattr(response.raw, "usage", None)
+
+        if usage:
+            if isinstance(usage, dict):
+                prompt_tokens = usage.get("prompt_tokens")
+                completion_tokens = usage.get("completion_tokens")
+                total_tokens = usage.get("total_tokens")
+            else:
+                prompt_tokens = getattr(usage, "prompt_tokens", None)
+                completion_tokens = getattr(usage, "completion_tokens", None)
+                total_tokens = getattr(usage, "total_tokens", None)
+
+            print(f"\nToken Usage: input={prompt_tokens}, output={completion_tokens}, total={total_tokens}")
+
+    print("=" * 80)
+
+
+class MockLLMWithUsage(MockLLM):
+    """MockLLM that includes fake usage data for testing."""
+
+    def _complete(self, prompt, **kwargs):
+        """Override internal complete to inject usage data."""
+        response = super()._complete(prompt, **kwargs)
+        # Note: MockLLM uses _complete internally, but we can't easily inject
+        # usage here because the ChatResponse is created later
+        return response
+
+
+def test_with_mock():
+    """Test with MockLLM - no API key needed."""
+    print("=" * 80)
+    print("Testing with MockLLM")
+    print("=" * 80)
+
+    llm = MockLLM(max_tokens=50)
+    messages = [
+        ChatMessage(
+            role=MessageRole.SYSTEM, content="You are a helpful assistant."
+        ),
+        ChatMessage(role=MessageRole.USER, content="Say hello in 5 words"),
+    ]
+
+    response = llm.chat(messages)
+    print(f"\nResponse: {response.message.content[:100]}...")
+    print("=" * 80)
+
+
+def test_message_extraction():
+    """Test message extraction."""
+    print("\n" + "=" * 80)
+    print("Testing message extraction")
+    print("=" * 80)
+
+    llm = MockLLM(max_tokens=20)
+    messages = [
+        ChatMessage(role=MessageRole.SYSTEM, content="You are helpful."),
+        ChatMessage(role=MessageRole.USER, content="Test message"),
+    ]
+
+    response = llm.chat(messages)
+    print(f"\nResponse: {response.message.content[:50]}...")
+    print("=" * 80)
+
+
+if __name__ == "__main__":
+    # Enable metrics emission
+    os.environ["OTEL_INSTRUMENTATION_GENAI_EMITTERS"] = "span_metric"
+
+    # Setup telemetry
+    tracer_provider, meter_provider, metric_reader = setup_telemetry()
+
+    # Instrument LlamaIndex
+    instrumentor = LlamaindexInstrumentor()
+    instrumentor.instrument(
+        tracer_provider=tracer_provider,
+        meter_provider=meter_provider
+    )
+    print("LlamaIndex instrumentation enabled\n")
+
+    # Run tests
+    if os.environ.get("OPENAI_API_KEY"):
+        print("Testing with real OpenAI API\n")
+        test_with_openai()
+    else:
+        print("Testing with MockLLM (set OPENAI_API_KEY to test real API)\n")
+        test_with_mock()
+
+    # Test message extraction
+    test_message_extraction()
+
+    # Check metrics
+    print("\n" + "=" * 80)
+    print("Metrics Summary")
+    print("=" * 80)
+
+    metrics_data = metric_reader.get_metrics_data()
+    found_duration = False
+    found_token_usage = False
+
+    if metrics_data:
+        for rm in getattr(metrics_data, "resource_metrics", []) or []:
+            for scope in getattr(rm, "scope_metrics", []) or []:
+                for metric in getattr(scope, "metrics", []) or []:
+                    print(f"\nMetric: {metric.name}")
+
+                    if metric.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION:
+                        found_duration = True
+                        dps = getattr(metric.data, "data_points", [])
+                        if dps:
+                            print(f"  Duration: {dps[0].sum:.4f} seconds")
+                            print(f"  Count: {dps[0].count}")
+
+                    if metric.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE:
+                        found_token_usage = True
+                        dps = getattr(metric.data, "data_points", [])
+                        for dp in dps:
+                            token_type = dp.attributes.get("gen_ai.token.type", "unknown")
+                            print(f"  Token type: {token_type}, Sum: {dp.sum}, Count: {dp.count}")
+
+    print("\n" + "=" * 80)
+    status = []
+    if found_duration:
+        status.append("Duration: OK")
+    if found_token_usage:
+        status.append("Token Usage: OK")
+    if not found_duration and not found_token_usage:
+        status.append("No metrics (use real API for metrics)")
+
+    print("Status: " + " | ".join(status))
+    print("=" * 80)

From 0bde623c06ab03d8719fe34302ae466e56b05231 Mon Sep 17 00:00:00 2001
From: shuningc <shuningc@splunk.com>
Date: Mon, 17 Nov 2025 18:43:00 -0800
Subject: [PATCH 2/4] Updating readme

---
 .../README.rst                                | 155 ++++++++++++++++++
 1 file changed, 155 insertions(+)
 create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst

diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst
new file mode 100644
index 0000000..5371d3c
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst
@@ -0,0 +1,155 @@
+OpenTelemetry LlamaIndex Instrumentation
+=========================================
+
+This library provides automatic instrumentation for LlamaIndex applications using OpenTelemetry.
+
+Installation
+------------
+
+Development installation::
+
+    # Install the package in editable mode
+    cd instrumentation-genai/opentelemetry-instrumentation-llamaindex
+    pip install -e .
+    
+    # Install test dependencies
+    pip install -e ".[test]"
+    
+    # Install util-genai (required for telemetry)
+    cd ../../util/opentelemetry-util-genai
+    pip install -e .
+
+
+Quick Start
+-----------
+
+.. code-block:: python
+
+    import os
+    from opentelemetry.instrumentation.llamaindex import LlamaindexInstrumentor
+    from opentelemetry import trace, metrics
+    from opentelemetry.sdk.trace import TracerProvider
+    from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor
+    from opentelemetry.sdk.metrics import MeterProvider
+    from opentelemetry.sdk.metrics.export import InMemoryMetricReader
+    
+    # Enable metrics (default is spans only)
+    os.environ["OTEL_INSTRUMENTATION_GENAI_EMITTERS"] = "span_metric"
+    
+    # Setup tracing
+    trace.set_tracer_provider(TracerProvider())
+    trace.get_tracer_provider().add_span_processor(
+        SimpleSpanProcessor(ConsoleSpanExporter())
+    )
+    
+    # Setup metrics
+    metric_reader = InMemoryMetricReader()
+    meter_provider = MeterProvider(metric_readers=[metric_reader])
+    metrics.set_meter_provider(meter_provider)
+    
+    # Enable instrumentation with providers
+    LlamaindexInstrumentor().instrument(
+        tracer_provider=trace.get_tracer_provider(),
+        meter_provider=meter_provider
+    )
+    
+    # Use LlamaIndex as normal
+    from llama_index.llms.openai import OpenAI
+    from llama_index.core.llms import ChatMessage, MessageRole
+    
+    llm = OpenAI(model="gpt-3.5-turbo")
+    messages = [ChatMessage(role=MessageRole.USER, content="Hello")]
+    response = llm.chat(messages)
+
+
+Running Tests
+-------------
+
+.. code-block:: bash
+
+    # Set environment variables
+    export OPENAI_API_KEY=your-api-key
+    export OTEL_INSTRUMENTATION_GENAI_EMITTERS=span_metric
+    
+    # Run the test
+    cd tests
+    python test_llm_instrumentation.py
+
+
+Expected Output
+---------------
+
+**Span Attributes**::
+
+    {
+        "gen_ai.framework": "llamaindex",
+        "gen_ai.request.model": "gpt-3.5-turbo",
+        "gen_ai.operation.name": "chat",
+        "gen_ai.usage.input_tokens": 24,
+        "gen_ai.usage.output_tokens": 7
+    }
+
+**Metrics**::
+
+    Metric: gen_ai.client.operation.duration
+      Duration: 0.6900 seconds
+      Count: 1
+    
+    Metric: gen_ai.client.token.usage
+      Token type: input, Sum: 24, Count: 1
+      Token type: output, Sum: 7, Count: 1
+
+
+Key Implementation Differences from LangChain
+----------------------------------------------
+
+**1. Event-Based Callbacks**
+
+LlamaIndex uses ``on_event_start(event_type, ...)`` and ``on_event_end(event_type, ...)`` 
+instead of LangChain's method-based callbacks (``on_llm_start``, ``on_llm_end``).
+
+Event types are dispatched via ``CBEventType`` enum::
+
+    CBEventType.LLM       # LLM invocations
+    CBEventType.AGENT     # Agent steps  
+    CBEventType.EMBEDDING # Embedding operations
+
+**2. Handler Registration**
+
+LlamaIndex uses ``handlers`` list::
+
+    callback_manager.handlers.append(handler)
+
+LangChain uses ``inheritable_handlers``::
+
+    callback_manager.inheritable_handlers.append(handler)
+
+**3. Response Structure**
+
+LlamaIndex ``ChatMessage`` uses ``blocks`` (list of TextBlock objects)::
+
+    message.content  # Computed property from blocks[0].text
+    
+LangChain uses simple strings::
+
+    message.content  # Direct string property
+
+**4. Token Usage**
+
+LlamaIndex returns objects (not dicts)::
+
+    response.raw.usage.prompt_tokens      # Object attribute
+    response.raw.usage.completion_tokens  # Object attribute
+    
+LangChain returns dicts::
+
+    response["usage"]["prompt_tokens"]      # Dict key
+    response["usage"]["completion_tokens"]  # Dict key
+
+
+References
+----------
+
+* `OpenTelemetry Project <https://opentelemetry.io/>`_
+* `LlamaIndex <https://www.llamaindex.ai/>`_
+* `LlamaIndex Callbacks <https://docs.llamaindex.ai/en/stable/module_guides/observability/callbacks/>`_

From 633d0af33df0d2670a95f1c06b3ab4ca81cb4a99 Mon Sep 17 00:00:00 2001
From: shuningc <shuningc@splunk.com>
Date: Tue, 18 Nov 2025 14:39:39 -0800
Subject: [PATCH 3/4] feat: Add embedding instrumentation for LlamaIndex

- Add embedding event handlers (_handle_embedding_start, _handle_embedding_end)
- Extract model name, input texts, and dimension count from embedding events
- Create vendor_detection.py module with VendorRule-based provider detection
- Support 13+ embedding providers (OpenAI, Azure, AWS, Google, Cohere, etc.)
- Add test_embedding_instrumentation.py with single and batch embedding tests
- Update README with embedding documentation and provider list
- Tested successfully with OpenAI embeddings API
---
 .../README.rst                                |  70 +++++++-
 .../llamaindex/callback_handler.py            |  89 ++++++++++-
 .../llamaindex/vendor_detection.py            | 119 ++++++++++++++
 .../tests/test_embedding_instrumentation.py   | 151 ++++++++++++++++++
 4 files changed, 422 insertions(+), 7 deletions(-)
 create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/vendor_detection.py
 create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_embedding_instrumentation.py

diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst
index 5371d3c..bf9ea59 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst
+++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/README.rst
@@ -65,6 +65,8 @@ Quick Start
 Running Tests
 -------------
 
+**LLM Tests**:
+
 .. code-block:: bash
 
     # Set environment variables
@@ -75,11 +77,23 @@ Running Tests
     cd tests
     python test_llm_instrumentation.py
 
+**Embedding Tests**:
+
+.. code-block:: bash
+
+    # Set environment variables
+    export OPENAI_API_KEY=your-api-key
+    export OTEL_INSTRUMENTATION_GENAI_EMITTERS=span_metric
+    
+    # Run the test
+    cd tests
+    python test_embedding_instrumentation.py
+
 
 Expected Output
 ---------------
 
-**Span Attributes**::
+**LLM Span Attributes**::
 
     {
         "gen_ai.framework": "llamaindex",
@@ -89,6 +103,15 @@ Expected Output
         "gen_ai.usage.output_tokens": 7
     }
 
+**Embedding Span Attributes**::
+
+    {
+        "gen_ai.operation.name": "embeddings",
+        "gen_ai.request.model": "text-embedding-3-small",
+        "gen_ai.provider.name": "openai",
+        "gen_ai.embeddings.dimension.count": 1536
+    }
+
 **Metrics**::
 
     Metric: gen_ai.client.operation.duration
@@ -110,9 +133,9 @@ instead of LangChain's method-based callbacks (``on_llm_start``, ``on_llm_end``)
 
 Event types are dispatched via ``CBEventType`` enum::
 
-    CBEventType.LLM       # LLM invocations
-    CBEventType.AGENT     # Agent steps  
-    CBEventType.EMBEDDING # Embedding operations
+    CBEventType.LLM       # LLM invocations (chat, complete)
+    CBEventType.AGENT     # Agent steps (not yet instrumented)
+    CBEventType.EMBEDDING # Embedding operations (get_text_embedding, get_text_embedding_batch)
 
 **2. Handler Registration**
 
@@ -147,6 +170,45 @@ LangChain returns dicts::
     response["usage"]["completion_tokens"]  # Dict key
 
 
+Supported Features
+------------------
+
+**LLM Operations**
+
+* ✅ Chat completion (``llm.chat()``, ``llm.stream_chat()``)
+* ✅ Text completion (``llm.complete()``, ``llm.stream_complete()``)
+* ✅ Token usage tracking
+* ✅ Model name detection
+* ✅ Framework attribution
+
+**Embedding Operations**
+
+* ✅ Single text embedding (``embed_model.get_text_embedding()``)
+* ✅ Batch embedding (``embed_model.get_text_embedding_batch()``)
+* ✅ Query embedding (``embed_model.get_query_embedding()``)
+* ✅ Provider detection (OpenAI, Azure, AWS Bedrock, Google, Cohere, HuggingFace, Ollama, and more)
+* ✅ Dimension count tracking
+* ✅ Input text capture
+
+**Provider Detection**
+
+Embedding instrumentation automatically detects the provider from class names:
+
+* **OpenAI**: ``OpenAIEmbedding``
+* **Azure**: ``AzureOpenAIEmbedding``
+* **AWS**: ``BedrockEmbedding``
+* **Google**: ``GeminiEmbedding``, ``VertexTextEmbedding``, ``GooglePaLMEmbedding``
+* **Cohere**: ``CohereEmbedding``
+* **HuggingFace**: ``HuggingFaceEmbedding``, ``HuggingFaceInferenceAPIEmbedding``
+* **Ollama**: ``OllamaEmbedding``
+* **Anthropic**: ``AnthropicEmbedding``
+* **MistralAI**: ``MistralAIEmbedding``
+* **Together**: ``TogetherEmbedding``
+* **Fireworks**: ``FireworksEmbedding``
+* **Voyage**: ``VoyageEmbedding``
+* **Jina**: ``JinaEmbedding``
+
+
 References
 ----------
 
diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py
index 7846cfb..fe9a1a1 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py
@@ -5,12 +5,15 @@
 
 from opentelemetry.util.genai.handler import TelemetryHandler
 from opentelemetry.util.genai.types import (
+    EmbeddingInvocation,
     InputMessage,
     LLMInvocation,
     OutputMessage,
     Text,
 )
 
+from .vendor_detection import detect_vendor_from_class
+
 
 def _safe_str(value: Any) -> str:
     """Safely convert value to string."""
@@ -21,7 +24,7 @@ def _safe_str(value: Any) -> str:
 
 
 class LlamaindexCallbackHandler(BaseCallbackHandler):
-    """Simplified LlamaIndex callback handler - LLM invocation only."""
+    """LlamaIndex callback handler supporting LLM and Embedding instrumentation."""
 
     def __init__(
         self,
@@ -53,9 +56,11 @@ def on_event_start(
         parent_id: str = "",
         **kwargs: Any,
     ) -> str:
-        """Handle event start - only processing LLM events."""
+        """Handle event start - processing LLM and EMBEDDING events."""
         if event_type == CBEventType.LLM:
             self._handle_llm_start(event_id, parent_id, payload, **kwargs)
+        elif event_type == CBEventType.EMBEDDING:
+            self._handle_embedding_start(event_id, parent_id, payload, **kwargs)
         return event_id
 
     def on_event_end(
@@ -65,9 +70,11 @@ def on_event_end(
         event_id: str = "",
         **kwargs: Any,
     ) -> None:
-        """Handle event end - only processing LLM events."""
+        """Handle event end - processing LLM and EMBEDDING events."""
         if event_type == CBEventType.LLM:
             self._handle_llm_end(event_id, payload, **kwargs)
+        elif event_type == CBEventType.EMBEDDING:
+            self._handle_embedding_end(event_id, payload, **kwargs)
 
     def _handle_llm_start(
         self,
@@ -220,3 +227,79 @@ def _handle_llm_end(
 
         # Stop the LLM invocation
         self._handler.stop_llm(llm_inv)
+
+    def _handle_embedding_start(
+        self,
+        event_id: str,
+        parent_id: str,
+        payload: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Handle embedding invocation start."""
+        if not self._handler or not payload:
+            return
+
+        # Extract model information from payload
+        serialized = payload.get("serialized", {})
+        model_name = (
+            serialized.get("model_name")
+            or serialized.get("model")
+            or "unknown"
+        )
+
+        # Detect provider from class name
+        class_name = serialized.get("class_name", "")
+        provider = detect_vendor_from_class(class_name)
+
+        # Note: input texts are not available at start time in LlamaIndex
+        # They will be available in the end event payload
+
+        # Create embedding invocation with event_id as run_id
+        emb_inv = EmbeddingInvocation(
+            request_model=_safe_str(model_name),
+            input_texts=[],  # Will be populated on end event
+            provider=provider,
+            attributes={},
+            run_id=event_id,
+        )
+        emb_inv.framework = "llamaindex"
+
+        # Start the embedding invocation
+        self._handler.start_embedding(emb_inv)
+
+    def _handle_embedding_end(
+        self,
+        event_id: str,
+        payload: Optional[Dict[str, Any]],
+        **kwargs: Any,
+    ) -> None:
+        """Handle embedding invocation end."""
+        if not self._handler:
+            return
+
+        # Get the embedding invocation from handler's registry using event_id
+        emb_inv = self._handler.get_entity(event_id)
+        if not emb_inv or not isinstance(emb_inv, EmbeddingInvocation):
+            return
+
+        if payload:
+            # Extract input chunks (texts) from response
+            # chunks is the list of input texts that were embedded
+            chunks = payload.get("chunks", [])
+            if chunks:
+                emb_inv.input_texts = [_safe_str(chunk) for chunk in chunks]
+            
+            # Extract embedding vectors from response
+            # embeddings is the list of output vectors
+            embeddings = payload.get("embeddings", [])
+            
+            # Determine dimension from first embedding vector
+            if embeddings and len(embeddings) > 0:
+                first_embedding = embeddings[0]
+                if isinstance(first_embedding, list):
+                    emb_inv.dimension_count = len(first_embedding)
+                elif hasattr(first_embedding, "__len__"):
+                    emb_inv.dimension_count = len(first_embedding)
+
+        # Stop the embedding invocation
+        self._handler.stop_embedding(emb_inv)
diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/vendor_detection.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/vendor_detection.py
new file mode 100644
index 0000000..6f9c9f0
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/vendor_detection.py
@@ -0,0 +1,119 @@
+"""Vendor detection for LlamaIndex embedding providers."""
+
+from dataclasses import dataclass
+from typing import List, Set
+
+
+@dataclass(frozen=True)
+class VendorRule:
+    """Rule for detecting vendor from LlamaIndex class names."""
+
+    exact_matches: Set[str]
+    patterns: List[str]
+    vendor_name: str
+
+    def matches(self, class_name: str) -> bool:
+        """Check if class name matches this vendor rule."""
+        if class_name in self.exact_matches:
+            return True
+        class_lower = class_name.lower()
+        return any(pattern in class_lower for pattern in self.patterns)
+
+
+def _get_vendor_rules() -> List[VendorRule]:
+    """
+    Get vendor detection rules ordered by specificity (most specific first).
+
+    Returns:
+        List of VendorRule objects for detecting embedding vendors from class names
+    """
+    return [
+        VendorRule(
+            exact_matches={"AzureOpenAIEmbedding"},
+            patterns=["azure"],
+            vendor_name="azure",
+        ),
+        VendorRule(
+            exact_matches={"OpenAIEmbedding"},
+            patterns=["openai"],
+            vendor_name="openai",
+        ),
+        VendorRule(
+            exact_matches={"BedrockEmbedding"},
+            patterns=["bedrock", "aws"],
+            vendor_name="aws",
+        ),
+        VendorRule(
+            exact_matches={"VertexTextEmbedding", "GeminiEmbedding", "GooglePaLMEmbedding"},
+            patterns=["vertex", "google", "palm", "gemini"],
+            vendor_name="google",
+        ),
+        VendorRule(
+            exact_matches={"CohereEmbedding"},
+            patterns=["cohere"],
+            vendor_name="cohere",
+        ),
+        VendorRule(
+            exact_matches={"HuggingFaceEmbedding", "HuggingFaceInferenceAPIEmbedding"},
+            patterns=["huggingface"],
+            vendor_name="huggingface",
+        ),
+        VendorRule(
+            exact_matches={"OllamaEmbedding"},
+            patterns=["ollama"],
+            vendor_name="ollama",
+        ),
+        VendorRule(
+            exact_matches={"AnthropicEmbedding"},
+            patterns=["anthropic"],
+            vendor_name="anthropic",
+        ),
+        VendorRule(
+            exact_matches={"MistralAIEmbedding"},
+            patterns=["mistral"],
+            vendor_name="mistralai",
+        ),
+        VendorRule(
+            exact_matches={"TogetherEmbedding"},
+            patterns=["together"],
+            vendor_name="together",
+        ),
+        VendorRule(
+            exact_matches={"FireworksEmbedding"},
+            patterns=["fireworks"],
+            vendor_name="fireworks",
+        ),
+        VendorRule(
+            exact_matches={"VoyageEmbedding"},
+            patterns=["voyage"],
+            vendor_name="voyage",
+        ),
+        VendorRule(
+            exact_matches={"JinaEmbedding"},
+            patterns=["jina"],
+            vendor_name="jina",
+        ),
+    ]
+
+
+def detect_vendor_from_class(class_name: str) -> str:
+    """
+    Detect vendor from LlamaIndex embedding class name.
+    Uses unified detection rules combining exact matches and patterns.
+
+    Args:
+        class_name: The class name from serialized embedding information
+
+    Returns:
+        Vendor string (lowercase), defaults to None if no match found
+    """
+    if not class_name:
+        return None
+
+    vendor_rules = _get_vendor_rules()
+
+    for rule in vendor_rules:
+        if rule.matches(class_name):
+            return rule.vendor_name
+
+    return None
diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_embedding_instrumentation.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_embedding_instrumentation.py
new file mode 100644
index 0000000..355a057
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_embedding_instrumentation.py
@@ -0,0 +1,151 @@
+"""Test embedding instrumentation for LlamaIndex."""
+
+import os
+
+from llama_index.core import Settings
+from llama_index.core.callbacks import CallbackManager
+from llama_index.embeddings.openai import OpenAIEmbedding
+from opentelemetry import metrics, trace
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import InMemoryMetricReader
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor
+
+from opentelemetry.instrumentation.llamaindex import LlamaindexInstrumentor
+
+
+# Global setup - shared across tests
+metric_reader = None
+instrumentor = None
+
+
+def setup_telemetry():
+    """Setup OpenTelemetry with span and metric exporters (once)."""
+    global metric_reader, instrumentor
+    
+    if metric_reader is not None:
+        return metric_reader
+    
+    # Enable metrics
+    os.environ["OTEL_INSTRUMENTATION_GENAI_EMITTERS"] = "span_metric"
+
+    # Setup tracing
+    trace.set_tracer_provider(TracerProvider())
+    trace.get_tracer_provider().add_span_processor(
+        SimpleSpanProcessor(ConsoleSpanExporter())
+    )
+
+    # Setup metrics with InMemoryMetricReader
+    metric_reader = InMemoryMetricReader()
+    meter_provider = MeterProvider(metric_readers=[metric_reader])
+    metrics.set_meter_provider(meter_provider)
+
+    # Enable instrumentation once
+    instrumentor = LlamaindexInstrumentor()
+    instrumentor.instrument(
+        tracer_provider=trace.get_tracer_provider(),
+        meter_provider=metrics.get_meter_provider(),
+    )
+
+    return metric_reader
+
+
+def test_embedding_single_text():
+    """Test single text embedding instrumentation."""
+    print("\nTest: Single Text Embedding")
+    print("=" * 60)
+
+    metric_reader = setup_telemetry()
+
+    # Configure embedding model
+    embed_model = OpenAIEmbedding(
+        model="text-embedding-3-small",
+        api_key=os.environ.get("OPENAI_API_KEY"),
+    )
+    Settings.embed_model = embed_model
+
+    # Make sure callback manager is initialized
+    if Settings.callback_manager is None:
+        Settings.callback_manager = CallbackManager()
+
+    # Generate single embedding
+    text = "LlamaIndex is a data framework for LLM applications"
+    embedding = embed_model.get_text_embedding(text)
+
+    print(f"\nText: {text}")
+    print(f"Embedding dimension: {len(embedding)}")
+    print(f"First 5 values: {embedding[:5]}")
+
+    # Validate metrics
+    print("\nMetrics:")
+    metrics_data = metric_reader.get_metrics_data()
+    for resource_metric in metrics_data.resource_metrics:
+        for scope_metric in resource_metric.scope_metrics:
+            for metric in scope_metric.metrics:
+                print(f"\nMetric: {metric.name}")
+                for data_point in metric.data.data_points:
+                    if hasattr(data_point, "bucket_counts"):
+                        # Histogram
+                        print(f"  Count: {sum(data_point.bucket_counts)}")
+                    else:
+                        # Counter
+                        print(f"  Value: {data_point.value}")
+
+    print("\nTest completed successfully")
+
+
+def test_embedding_batch():
+    """Test batch embedding instrumentation."""
+    print("\nTest: Batch Embeddings")
+    print("=" * 60)
+
+    metric_reader = setup_telemetry()
+
+    # Configure embedding model
+    embed_model = OpenAIEmbedding(
+        model="text-embedding-3-small",
+        api_key=os.environ.get("OPENAI_API_KEY"),
+    )
+    Settings.embed_model = embed_model
+
+    # Make sure callback manager is initialized
+    if Settings.callback_manager is None:
+        Settings.callback_manager = CallbackManager()
+
+    # Generate batch embeddings
+    texts = [
+        "Paris is the capital of France",
+        "Berlin is the capital of Germany",
+        "Rome is the capital of Italy",
+    ]
+    embeddings = embed_model.get_text_embedding_batch(texts)
+
+    print(f"\nEmbedded {len(embeddings)} texts")
+    print(f"Dimension: {len(embeddings[0])}")
+
+    # Validate metrics
+    print("\nMetrics:")
+    metrics_data = metric_reader.get_metrics_data()
+    for resource_metric in metrics_data.resource_metrics:
+        for scope_metric in resource_metric.scope_metrics:
+            for metric in scope_metric.metrics:
+                print(f"\nMetric: {metric.name}")
+                for data_point in metric.data.data_points:
+                    if hasattr(data_point, "bucket_counts"):
+                        # Histogram
+                        print(f"  Count: {sum(data_point.bucket_counts)}")
+                    else:
+                        # Counter
+                        print(f"  Value: {data_point.value}")
+
+    print("\nTest completed successfully")
+
+
+if __name__ == "__main__":
+    test_embedding_single_text()
+    print("\n" + "=" * 60 + "\n")
+    test_embedding_batch()
+    
+    # Cleanup
+    if instrumentor:
+        instrumentor.uninstrument()

From 5ed2d858ebf8fe33d0e897ed35537b889de79810 Mon Sep 17 00:00:00 2001
From: shuningc <shuningc@splunk.com>
Date: Tue, 25 Nov 2025 15:31:30 -0800
Subject: [PATCH 4/4] Adding temporary RAG instrumentation solution for
 llamaindex

---
 .../llamaindex/callback_handler.py            | 280 +++++++++++++++++-
 .../tests/test_rag.py                         |  97 ++++++
 2 files changed, 373 insertions(+), 4 deletions(-)
 create mode 100644 instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_rag.py

diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py
index fe9a1a1..a070e06 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/src/opentelemetry/instrumentation/llamaindex/callback_handler.py
@@ -2,14 +2,15 @@
 
 from llama_index.core.callbacks.base_handler import BaseCallbackHandler
 from llama_index.core.callbacks.schema import CBEventType
-
 from opentelemetry.util.genai.handler import TelemetryHandler
 from opentelemetry.util.genai.types import (
     EmbeddingInvocation,
     InputMessage,
     LLMInvocation,
     OutputMessage,
+    Step,
     Text,
+    Workflow,
 )
 
 from .vendor_detection import detect_vendor_from_class
@@ -35,6 +36,7 @@ def __init__(
             event_ends_to_ignore=[],
         )
         self._handler = telemetry_handler
+        self._auto_workflow_id: Optional[str] = None  # Track auto-created workflow
 
     def start_trace(self, trace_id: Optional[str] = None) -> None:
         """Start a trace - required by BaseCallbackHandler."""
@@ -48,6 +50,17 @@ def end_trace(
         """End a trace - required by BaseCallbackHandler."""
         pass
 
+    def _get_parent_span(self, parent_id: str) -> Optional[Any]:
+        """Get parent span from handler's registry using parent_id."""
+        if not self._handler or not parent_id:
+            return None
+        # Get the parent entity from handler's registry
+        parent_entity = self._handler.get_entity(parent_id)
+        if parent_entity:
+            # Return the span attribute if it exists
+            return getattr(parent_entity, "span", None)
+        return None
+
     def on_event_start(
         self,
         event_type: CBEventType,
@@ -56,11 +69,17 @@ def on_event_start(
         parent_id: str = "",
         **kwargs: Any,
     ) -> str:
-        """Handle event start - processing LLM and EMBEDDING events."""
+        """Handle event start - processing LLM, EMBEDDING, QUERY, RETRIEVE, and SYNTHESIZE events."""
         if event_type == CBEventType.LLM:
             self._handle_llm_start(event_id, parent_id, payload, **kwargs)
         elif event_type == CBEventType.EMBEDDING:
             self._handle_embedding_start(event_id, parent_id, payload, **kwargs)
+        elif event_type == CBEventType.QUERY:
+            self._handle_query_start(event_id, parent_id, payload, **kwargs)
+        elif event_type == CBEventType.RETRIEVE:
+            self._handle_retrieve_start(event_id, parent_id, payload, **kwargs)
+        elif event_type == CBEventType.SYNTHESIZE:
+            self._handle_synthesize_start(event_id, parent_id, payload, **kwargs)
         return event_id
 
     def on_event_end(
@@ -70,11 +89,17 @@ def on_event_end(
         event_id: str = "",
         **kwargs: Any,
     ) -> None:
-        """Handle event end - processing LLM and EMBEDDING events."""
+        """Handle event end - processing LLM, EMBEDDING, QUERY, RETRIEVE, and SYNTHESIZE events."""
         if event_type == CBEventType.LLM:
             self._handle_llm_end(event_id, payload, **kwargs)
         elif event_type == CBEventType.EMBEDDING:
             self._handle_embedding_end(event_id, payload, **kwargs)
+        elif event_type == CBEventType.QUERY:
+            self._handle_query_end(event_id, payload, **kwargs)
+        elif event_type == CBEventType.RETRIEVE:
+            self._handle_retrieve_end(event_id, payload, **kwargs)
+        elif event_type == CBEventType.SYNTHESIZE:
+            self._handle_synthesize_end(event_id, payload, **kwargs)
 
     def _handle_llm_start(
         self,
@@ -143,12 +168,18 @@ def _handle_llm_start(
             input_messages=input_messages,
             attributes={},
             run_id=event_id,  # Use event_id as run_id for registry lookup
+            parent_run_id=parent_id if parent_id else None,  # Set parent for hierarchy
         )
         llm_inv.framework = "llamaindex"
+        
+        # Resolve parent_id to parent_span for proper span context
+        parent_span = self._get_parent_span(parent_id)
+        if parent_span:
+            llm_inv.parent_span = parent_span  # type: ignore[attr-defined]
 
         # Start the LLM invocation (handler stores it in _entity_registry)
         self._handler.start_llm(llm_inv)
-
+    
     def _handle_llm_end(
         self,
         event_id: str,
@@ -261,8 +292,14 @@ def _handle_embedding_start(
             provider=provider,
             attributes={},
             run_id=event_id,
+            parent_run_id=parent_id if parent_id else None,  # Set parent for hierarchy
         )
         emb_inv.framework = "llamaindex"
+        
+        # Resolve parent_id to parent_span for proper span context
+        parent_span = self._get_parent_span(parent_id)
+        if parent_span:
+            emb_inv.parent_span = parent_span  # type: ignore[attr-defined]
 
         # Start the embedding invocation
         self._handler.start_embedding(emb_inv)
@@ -303,3 +340,238 @@ def _handle_embedding_end(
 
         # Stop the embedding invocation
         self._handler.stop_embedding(emb_inv)
+
+    def _handle_query_start(
+        self,
+        event_id: str,
+        parent_id: str,
+        payload: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Handle query pipeline start - create Workflow if no parent, else Step."""
+        if not self._handler or not payload:
+            return
+
+        query_str = payload.get("query_str", "")
+        
+        # If no parent, this is the root workflow
+        if not parent_id:
+            workflow = Workflow(
+                name="llama_index_query_pipeline",
+                workflow_type="workflow",
+                initial_input=_safe_str(query_str),
+                attributes={},
+                run_id=event_id,
+            )
+            workflow.framework = "llamaindex"
+            self._handler.start_workflow(workflow)
+
+    def _handle_query_end(
+        self,
+        event_id: str,
+        payload: Optional[Dict[str, Any]],
+        **kwargs: Any,
+    ) -> None:
+        """Handle query pipeline end."""
+        if not self._handler:
+            return
+
+        entity = self._handler.get_entity(event_id)
+        if not entity:
+            return
+
+        if isinstance(entity, Workflow):
+            if payload:
+                response = payload.get("response")
+                if response:
+                    # Extract response text
+                    response_text = ""
+                    if isinstance(response, dict):
+                        response_text = response.get("response", "")
+                    elif hasattr(response, "response"):
+                        response_text = getattr(response, "response", "")
+                    entity.final_output = _safe_str(response_text)
+            self._handler.stop_workflow(entity)
+        elif isinstance(entity, Step):
+            if payload:
+                response = payload.get("response")
+                if response:
+                    response_text = ""
+                    if isinstance(response, dict):
+                        response_text = response.get("response", "")
+                    elif hasattr(response, "response"):
+                        response_text = getattr(response, "response", "")
+                    entity.output_data = _safe_str(response_text)
+            self._handler.stop_step(entity)
+
+    def _handle_retrieve_start(
+        self,
+        event_id: str,
+        parent_id: str,
+        payload: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Handle retrieval start - create Step for retrieve task."""
+        if not self._handler or not payload:
+            return
+
+        query_str = payload.get("query_str", "")
+        
+        # If parent_id doesn't exist or doesn't resolve to a tracked entity,
+        # create a root Workflow to hold the RAG steps
+        parent_entity = self._handler.get_entity(parent_id) if parent_id else None
+        
+        if not parent_entity:
+            # No valid parent - create auto-workflow
+            workflow_id = f"{event_id}_workflow"
+            workflow = Workflow(
+                name="llama_index_rag",
+                workflow_type="rag",
+                initial_input=_safe_str(query_str),
+                attributes={},
+                run_id=workflow_id,
+            )
+            workflow.framework = "llamaindex"
+            self._handler.start_workflow(workflow)
+            # Track this auto-created workflow
+            self._auto_workflow_id = workflow_id
+            # Get the workflow's span to use as parent
+            workflow_entity = self._handler.get_entity(workflow_id)
+            if workflow_entity:
+                parent_span = getattr(workflow_entity, "span", None)
+            else:
+                parent_span = None
+        else:
+            # Valid parent exists - resolve to parent_span
+            parent_span = self._get_parent_span(parent_id)
+        
+        # Create a step for the retrieval task
+        step = Step(
+            name="retrieve.task",
+            step_type="retrieve",
+            objective="Retrieve relevant documents",
+            input_data=_safe_str(query_str),
+            run_id=event_id,
+            parent_run_id=parent_id if parent_id else None,
+            attributes={},
+        )
+        
+        # Set parent_span if we have one
+        if parent_span:
+            step.parent_span = parent_span  # type: ignore[attr-defined]
+        
+        self._handler.start_step(step)
+
+    def _handle_retrieve_end(
+        self,
+        event_id: str,
+        payload: Optional[Dict[str, Any]],
+        **kwargs: Any,
+    ) -> None:
+        """Handle retrieval end - update step with retrieved nodes."""
+        if not self._handler:
+            return
+
+        step = self._handler.get_entity(event_id)
+        if not step or not isinstance(step, Step):
+            return
+
+        if payload:
+            nodes = payload.get("nodes", [])
+            if nodes:
+                # Store document count and scores
+                step.attributes["retrieve.documents_count"] = len(nodes)
+                scores = []
+                doc_ids = []
+                for node in nodes:
+                    if hasattr(node, "score") and node.score is not None:
+                        scores.append(node.score)
+                    if hasattr(node, "node_id"):
+                        doc_ids.append(str(node.node_id))
+                    elif hasattr(node, "id_"):
+                        doc_ids.append(str(node.id_))
+                
+                if scores:
+                    step.attributes["retrieve.scores"] = scores
+                if doc_ids:
+                    step.attributes["retrieve.document_ids"] = doc_ids
+                
+                # Create output summary
+                step.output_data = f"Retrieved {len(nodes)} documents"
+
+        self._handler.stop_step(step)
+
+    def _handle_synthesize_start(
+        self,
+        event_id: str,
+        parent_id: str,
+        payload: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Handle synthesis start - create Step for synthesize task."""
+        if not self._handler or not payload:
+            return
+
+        query_str = payload.get("query_str", "")
+        
+        # Create a step for the synthesis task
+        step = Step(
+            name="synthesize.task",
+            step_type="synthesize",
+            objective="Synthesize response from retrieved documents",
+            input_data=_safe_str(query_str),
+            run_id=event_id,
+            parent_run_id=parent_id if parent_id else None,
+            attributes={},
+        )
+        
+        # Resolve parent_id to parent_span for proper span context
+        parent_span = self._get_parent_span(parent_id)
+        if parent_span:
+            step.parent_span = parent_span  # type: ignore[attr-defined]
+        
+        self._handler.start_step(step)
+
+    def _handle_synthesize_end(
+        self,
+        event_id: str,
+        payload: Optional[Dict[str, Any]],
+        **kwargs: Any,
+    ) -> None:
+        """Handle synthesis end - update step with synthesized response."""
+        if not self._handler:
+            return
+
+        step = self._handler.get_entity(event_id)
+        if not step or not isinstance(step, Step):
+            return
+
+        if payload:
+            response = payload.get("response")
+            if response:
+                # Extract response text
+                response_text = ""
+                if isinstance(response, dict):
+                    response_text = response.get("response", "")
+                elif hasattr(response, "response"):
+                    response_text = getattr(response, "response", "")
+                step.output_data = _safe_str(response_text)
+
+        self._handler.stop_step(step)
+        
+        # If we auto-created a workflow, close it after synthesize completes
+        if self._auto_workflow_id:
+            workflow = self._handler.get_entity(self._auto_workflow_id)
+            if workflow and isinstance(workflow, Workflow):
+                # Set final output from synthesize response
+                if payload:
+                    response = payload.get("response")
+                    if response:
+                        response_text = ""
+                        if isinstance(response, dict):
+                            response_text = response.get("response", "")
+                        elif hasattr(response, "response"):
+                            response_text = getattr(response, "response", "")
+                        workflow.final_output = _safe_str(response_text)
+                self._handler.stop_workflow(workflow)
+                self._auto_workflow_id = None  # Reset for next query
diff --git a/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_rag.py b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_rag.py
new file mode 100644
index 0000000..fb8b718
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-llamaindex/tests/test_rag.py
@@ -0,0 +1,97 @@
+"""
+Test LlamaIndex RAG instrumentation without agents.
+
+This test validates that:
+1. QUERY events create Workflow spans at the root level
+2. RETRIEVE events create Step spans with parent_run_id pointing to the Workflow
+3. SYNTHESIZE events create Step spans with parent_run_id pointing to the Workflow  
+4. LLM invocations nest under their Step parent via parent_run_id
+5. Embedding invocations nest under their Step parent via parent_run_id
+"""
+
+from llama_index.core import Document, Settings, VectorStoreIndex
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.llms.openai import OpenAI
+from opentelemetry import trace
+from opentelemetry.instrumentation.llamaindex import LlamaindexInstrumentor
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor
+
+
+def setup_telemetry():
+    """Setup OpenTelemetry with console exporter to see trace structure."""
+    trace.set_tracer_provider(TracerProvider())
+    tracer_provider = trace.get_tracer_provider()
+    tracer_provider.add_span_processor(SimpleSpanProcessor(ConsoleSpanExporter()))
+    return tracer_provider
+
+
+def test_rag_without_agents():
+    """Test RAG instrumentation creates correct hierarchy: Workflow -> Steps -> LLM/Embedding"""
+    
+    print("=" * 80)
+    print("Setting up telemetry...")
+    print("=" * 80)
+    setup_telemetry()
+
+    # Setup LlamaIndex
+    Settings.llm = OpenAI(model="gpt-4o-mini", temperature=0.1)
+    Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
+
+    # Instrument
+    instrumentor = LlamaindexInstrumentor()
+    instrumentor.instrument()
+    
+    # Debug: Check callback handler
+    from llama_index.core import Settings as LlamaSettings
+    print(f"\nCallbacks registered: {len(LlamaSettings.callback_manager.handlers)}")
+    for handler in LlamaSettings.callback_manager.handlers:
+        print(f"  Handler: {type(handler).__name__}")
+
+    # Create sample documents
+    documents = [
+        Document(
+            text="Paris is the capital of France. It has a population of over 2 million.",
+            metadata={"source": "geography", "country": "France"},
+        ),
+        Document(
+            text="The Eiffel Tower is in Paris. It was completed in 1889.",
+            metadata={"source": "landmarks", "country": "France"},
+        ),
+    ]
+
+    print("\n" + "=" * 80)
+    print("Creating vector index (should see Embedding spans)...")
+    print("=" * 80)
+    index = VectorStoreIndex.from_documents(documents)
+
+    print("\n" + "=" * 80)
+    print("Creating query engine...")
+    print("=" * 80)
+    query_engine = index.as_query_engine(similarity_top_k=2)
+
+    print("\n" + "=" * 80)
+    print("Executing RAG query (should see Workflow -> retrieve.task/synthesize.task -> LLM/Embedding)...")
+    print("=" * 80)
+    response = query_engine.query("What is the capital of France?")
+
+    print("\n" + "=" * 80)
+    print("RESULTS")
+    print("=" * 80)
+    print(f"Response: {response.response}")
+    print(f"Source nodes: {len(response.source_nodes)}")
+
+    print("\n" + "=" * 80)
+    print("✓ Test completed!")
+    print("=" * 80)
+    print("\nExpected trace structure:")
+    print("  Workflow (gen_ai.operation.name=query)")
+    print("    ├─ Step (gen_ai.operation.name=retrieve.task)")
+    print("    │   └─ EmbeddingInvocation")
+    print("    └─ Step (gen_ai.operation.name=synthesize.task)")
+    print("        └─ LLMInvocation")
+    print("=" * 80)
+
+
+if __name__ == "__main__":
+    test_rag_without_agents()