signalfx · shuningc · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 26, 2025
@@ -0,0 +1,217 @@
+OpenTelemetry LlamaIndex Instrumentation
+=========================================
+
+This library provides automatic instrumentation for LlamaIndex applications using OpenTelemetry.
+
+Installation
+------------
+
+Development installation::
+
+    # Install the package in editable mode
+    cd instrumentation-genai/opentelemetry-instrumentation-llamaindex
+    pip install -e .
+
+    # Install test dependencies
+    pip install -e ".[test]"
+
+    # Install util-genai (required for telemetry)
+    cd ../../util/opentelemetry-util-genai
+    pip install -e .
+
+
+Quick Start
+-----------
+
+.. code-block:: python
+
+    import os
+    from opentelemetry.instrumentation.llamaindex import LlamaindexInstrumentor
+    from opentelemetry import trace, metrics
+    from opentelemetry.sdk.trace import TracerProvider
+    from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor
+    from opentelemetry.sdk.metrics import MeterProvider
+    from opentelemetry.sdk.metrics.export import InMemoryMetricReader
+
+    # Enable metrics (default is spans only)
+    os.environ["OTEL_INSTRUMENTATION_GENAI_EMITTERS"] = "span_metric"
+
+    # Setup tracing
+    trace.set_tracer_provider(TracerProvider())
+    trace.get_tracer_provider().add_span_processor(
+        SimpleSpanProcessor(ConsoleSpanExporter())
+    )
+
+    # Setup metrics
+    metric_reader = InMemoryMetricReader()
+    meter_provider = MeterProvider(metric_readers=[metric_reader])
+    metrics.set_meter_provider(meter_provider)
+
+    # Enable instrumentation with providers
+    LlamaindexInstrumentor().instrument(
+        tracer_provider=trace.get_tracer_provider(),
+        meter_provider=meter_provider
+    )
+
+    # Use LlamaIndex as normal
+    from llama_index.llms.openai import OpenAI
+    from llama_index.core.llms import ChatMessage, MessageRole
+
+    llm = OpenAI(model="gpt-3.5-turbo")
+    messages = [ChatMessage(role=MessageRole.USER, content="Hello")]
+    response = llm.chat(messages)
+
+
+Running Tests
+-------------
+
+**LLM Tests**:
+
+.. code-block:: bash
+
+    # Set environment variables
+    export OPENAI_API_KEY=your-api-key
+    export OTEL_INSTRUMENTATION_GENAI_EMITTERS=span_metric
+
+    # Run the test
+    cd tests
+    python test_llm_instrumentation.py
+
+**Embedding Tests**:
+
+.. code-block:: bash
+
+    # Set environment variables
+    export OPENAI_API_KEY=your-api-key
+    export OTEL_INSTRUMENTATION_GENAI_EMITTERS=span_metric
+
+    # Run the test
+    cd tests
+    python test_embedding_instrumentation.py
+
+
+Expected Output
+---------------
+
+**LLM Span Attributes**::
+
+    {
+        "gen_ai.framework": "llamaindex",
+        "gen_ai.request.model": "gpt-3.5-turbo",
+        "gen_ai.operation.name": "chat",
+        "gen_ai.usage.input_tokens": 24,
+        "gen_ai.usage.output_tokens": 7
+    }
+
+**Embedding Span Attributes**::
+
+    {
+        "gen_ai.operation.name": "embeddings",
+        "gen_ai.request.model": "text-embedding-3-small",
+        "gen_ai.provider.name": "openai",
+        "gen_ai.embeddings.dimension.count": 1536
+    }
+
+**Metrics**::
+
+    Metric: gen_ai.client.operation.duration
+      Duration: 0.6900 seconds
+      Count: 1
+
+    Metric: gen_ai.client.token.usage
+      Token type: input, Sum: 24, Count: 1
+      Token type: output, Sum: 7, Count: 1
+
+
+Key Implementation Differences from LangChain
+----------------------------------------------
+
+**1. Event-Based Callbacks**
+
+LlamaIndex uses ``on_event_start(event_type, ...)`` and ``on_event_end(event_type, ...)`` 
+instead of LangChain's method-based callbacks (``on_llm_start``, ``on_llm_end``).
+
+Event types are dispatched via ``CBEventType`` enum::
+
+    CBEventType.LLM       # LLM invocations (chat, complete)
+    CBEventType.AGENT     # Agent steps (not yet instrumented)
+    CBEventType.EMBEDDING # Embedding operations (get_text_embedding, get_text_embedding_batch)
+
+**2. Handler Registration**
+
+LlamaIndex uses ``handlers`` list::
+
+    callback_manager.handlers.append(handler)
+
+LangChain uses ``inheritable_handlers``::
+
+    callback_manager.inheritable_handlers.append(handler)
+
+**3. Response Structure**
+
+LlamaIndex ``ChatMessage`` uses ``blocks`` (list of TextBlock objects)::
+
+    message.content  # Computed property from blocks[0].text
+
+LangChain uses simple strings::
+
+    message.content  # Direct string property
+
+**4. Token Usage**
+
+LlamaIndex returns objects (not dicts)::
+
+    response.raw.usage.prompt_tokens      # Object attribute
+    response.raw.usage.completion_tokens  # Object attribute
+
+LangChain returns dicts::
+
+    response["usage"]["prompt_tokens"]      # Dict key
+    response["usage"]["completion_tokens"]  # Dict key
+
+
+Supported Features
+------------------
+
+**LLM Operations**
+
+* ✅ Chat completion (``llm.chat()``, ``llm.stream_chat()``)
+* ✅ Text completion (``llm.complete()``, ``llm.stream_complete()``)
+* ✅ Token usage tracking
+* ✅ Model name detection
+* ✅ Framework attribution
+
+**Embedding Operations**
+
+* ✅ Single text embedding (``embed_model.get_text_embedding()``)
+* ✅ Batch embedding (``embed_model.get_text_embedding_batch()``)
+* ✅ Query embedding (``embed_model.get_query_embedding()``)
+* ✅ Provider detection (OpenAI, Azure, AWS Bedrock, Google, Cohere, HuggingFace, Ollama, and more)
+* ✅ Dimension count tracking
+* ✅ Input text capture
+
+**Provider Detection**
+
+Embedding instrumentation automatically detects the provider from class names:
+
+* **OpenAI**: ``OpenAIEmbedding``
+* **Azure**: ``AzureOpenAIEmbedding``
+* **AWS**: ``BedrockEmbedding``
+* **Google**: ``GeminiEmbedding``, ``VertexTextEmbedding``, ``GooglePaLMEmbedding``
+* **Cohere**: ``CohereEmbedding``
+* **HuggingFace**: ``HuggingFaceEmbedding``, ``HuggingFaceInferenceAPIEmbedding``
+* **Ollama**: ``OllamaEmbedding``
+* **Anthropic**: ``AnthropicEmbedding``
+* **MistralAI**: ``MistralAIEmbedding``
+* **Together**: ``TogetherEmbedding``
+* **Fireworks**: ``FireworksEmbedding``
+* **Voyage**: ``VoyageEmbedding``
+* **Jina**: ``JinaEmbedding``
+
+
+References
+----------
+
+* `OpenTelemetry Project <https://opentelemetry.io/>`_
+* `LlamaIndex <https://www.llamaindex.ai/>`_
+* `LlamaIndex Callbacks <https://docs.llamaindex.ai/en/stable/module_guides/observability/callbacks/>`_
@@ -0,0 +1,95 @@
+import asyncio
+import os
+import sys
+
+from llama_index.core.agent import ReActAgent
+from llama_index.core.tools import FunctionTool
+from llama_index.llms.openai import OpenAI
+from llama_index.core import Settings
+
+from opentelemetry import trace, metrics
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
+from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
+from opentelemetry.instrumentation.llamaindex import LlamaindexInstrumentor
+
+
+# 1. Setup Telemetry
+def setup_telemetry():
+    trace.set_tracer_provider(TracerProvider())
+    trace.get_tracer_provider().add_span_processor(
+        BatchSpanProcessor(OTLPSpanExporter(insecure=True))
+    )
+
+    metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter(insecure=True))
+    metrics.set_meter_provider(MeterProvider(metric_readers=[metric_reader]))
+
+
+# 2. Define Tools
+def search_flights(origin: str, destination: str, date: str) -> str:
+    """Search for flights between two cities on a specific date."""
+    print(f"  [Tool] Searching flights from {origin} to {destination} on {date}...")
+    return f"Flight UA123 from {origin} to {destination} on {date} costs $500."
+
+
+def search_hotels(city: str, check_in: str) -> str:
+    """Search for hotels in a city."""
+    print(f"  [Tool] Searching hotels in {city} for {check_in}...")
+    return f"Hotel Grand in {city} is available for $200/night."
+
+
+def book_ticket(flight_number: str) -> str:
+    """Book a flight ticket."""
+    print(f"  [Tool] Booking flight {flight_number}...")
+    return f"Confirmed booking for {flight_number}. Ticket #999."
+
+
+# 3. Main Agent Logic
+async def run_travel_planner():
+    # Check for API Key
+    if not os.getenv("OPENAI_API_KEY"):
+        print("Error: OPENAI_API_KEY environment variable is not set.")
+        sys.exit(1)
+
+    setup_telemetry()
+
+    # Instrument LlamaIndex
+    LlamaindexInstrumentor().instrument()
+
+    # Setup LLM
+    Settings.llm = OpenAI(model="gpt-4o-mini", temperature=0)
+
+    # Create Tools
+    tools = [
+        FunctionTool.from_defaults(fn=search_flights),
+        FunctionTool.from_defaults(fn=search_hotels),
+        FunctionTool.from_defaults(fn=book_ticket),
+    ]
+
+    # Create Agent
+    # ReActAgent in LlamaIndex uses the workflow engine internally
+    agent = ReActAgent(tools=tools, llm=Settings.llm, verbose=True)
+
+    # Run Workflow
+    user_request = "I want to fly from New York to Paris on 2023-12-01. Find a flight and book it, then find a hotel."
+
+    # We use the async run method which returns the handler we instrumented
+    # This triggers wrap_agent_run -> WorkflowEventInstrumentor
+    handler = agent.run(user_msg=user_request)
+    response = await handler
+
+    print(f"\nFinal Response: {response}")
+
+    # Ensure spans are flushed before exit
+    provider = trace.get_tracer_provider()
+    if hasattr(provider, "force_flush"):
+        provider.force_flush()
+    if hasattr(provider, "shutdown"):
+        provider.shutdown()
+
+
+if __name__ == "__main__":
+    asyncio.run(run_travel_planner())
@@ -0,0 +1,24 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Disable telemetry during build to avoid connection errors
+ENV OTEL_SDK_DISABLED=true
+
+# Install dependencies
+COPY instrumentation-genai/opentelemetry-instrumentation-llamaindex/examples/travel_planner_k8s/requirements.txt .
+RUN pip install --default-timeout=100 --retries=5 -r requirements.txt
+
+# Copy and install local instrumentation package
+COPY instrumentation-genai/opentelemetry-instrumentation-llamaindex /tmp/instrumentation-llamaindex/
+RUN pip install --no-cache-dir /tmp/instrumentation-llamaindex && \
+    rm -rf /tmp/instrumentation-llamaindex
+
+# Copy application code
+COPY instrumentation-genai/opentelemetry-instrumentation-llamaindex/examples/travel_planner_k8s/main_server.py .
+
+# Expose port
+EXPOSE 8080
+
+# Run the server
+CMD ["python", "main_server.py"]