From cbc15b4a900e6d1676632138c8bad46e5e01b0c8 Mon Sep 17 00:00:00 2001 From: JWinermaSplunk Date: Wed, 3 Dec 2025 16:14:00 -0800 Subject: [PATCH 1/6] initial commit --- .../CHANGELOG.md | 12 + .../LICENSE | 201 +++++++++ .../README.rst | 150 +++++++ .../examples/manual/example_v4.py | 265 +++++++++++ .../pyproject.toml | 65 +++ .../instrumentation/weaviate/__init__.py | 413 ++++++++++++++++++ .../instrumentation/weaviate/config.py | 22 + .../instrumentation/weaviate/mapping.py | 184 ++++++++ .../instrumentation/weaviate/package.py | 16 + .../instrumentation/weaviate/utils.py | 80 ++++ .../instrumentation/weaviate/version.py | 15 + 11 files changed, 1423 insertions(+) create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/CHANGELOG.md create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/LICENSE create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/README.rst create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/examples/manual/example_v4.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/pyproject.toml create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/config.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/mapping.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/package.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/utils.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/version.py diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-weaviate/CHANGELOG.md new file mode 100644 index 00000000..6942b8f3 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/CHANGELOG.md @@ -0,0 +1,12 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). + +## Unreleased + +### Added +- Initial release of OpenTelemetry Weaviate instrumentation +- Support for Weaviate client versions 3.x and 4.x +- Automatic tracing of Weaviate operations diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/LICENSE b/instrumentation-genai/opentelemetry-instrumentation-weaviate/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/README.rst b/instrumentation-genai/opentelemetry-instrumentation-weaviate/README.rst new file mode 100644 index 00000000..76a994b8 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/README.rst @@ -0,0 +1,150 @@ +OpenTelemetry Weaviate Instrumentation +======================================= + +|pypi| + +.. |pypi| image:: https://badge.fury.io/py/splunk-otel-instrumentation-weaviate.svg + :target: https://pypi.org/project/splunk-otel-instrumentation-weaviate/ + +This library allows tracing requests made by the Weaviate Python client to a Weaviate vector database. + +Installation +------------ + +:: + + pip install splunk-otel-instrumentation-weaviate + + +Usage +----- + +Instrumenting all Weaviate clients +*********************************** + +When using the instrumentor, all clients will automatically be instrumented. + +.. code-block:: python + + from opentelemetry.instrumentation.weaviate import WeaviateInstrumentor + import weaviate + + # Instrument Weaviate + WeaviateInstrumentor().instrument() + + # This client will be automatically instrumented + client = weaviate.connect_to_local() + + # Use the client as normal + collection = client.collections.get("MyCollection") + result = collection.query.fetch_objects(limit=10) + + +Retrieval Spans for RAG Pipelines +********************************** + +For RAG (Retrieval-Augmented Generation) pipelines, use the manual span helpers to create structured traces where embedding and database operations are children of a parent retrieval span. + +.. code-block:: python + + from opentelemetry.instrumentation.weaviate import ( + WeaviateInstrumentor, + retrieval_span, + embedding_span, + ) + + WeaviateInstrumentor().instrument() + + # Create retrieval span with embedding as child + with retrieval_span( + query_text="What are vector databases?", + top_k=10, + collection_name="Articles", + embedding_model="nomic-embed-text" + ) as span: + # Child 1: Generate embedding + with embedding_span(query, model="nomic-embed-text"): + embedding = lm_studio_client.embeddings.create(input=query) + + # Child 2: Query Weaviate (auto-instrumented) + results = collection.query.near_vector( + near_vector=embedding.data[0].embedding, + limit=10 + ) + + # Add custom attributes + span.set_attribute("db.retrieval.documents_retrieved", len(results.objects)) + +**Resulting span hierarchy:** + +:: + + db.retrieval.client + ├─ generate_embedding + └─ db.weaviate.collections.query.near_vector + └─ db.weaviate.collections.query.get + +**Builder pattern for complex scenarios:** + +.. code-block:: python + + from opentelemetry.instrumentation.weaviate import RetrievalSpanBuilder + + builder = RetrievalSpanBuilder( + query_text="search text", + top_k=10, + collection_name="Articles", + embedding_model="nomic-embed-text", + # Custom attributes via kwargs + user_id="user123", + session_id="session456" + ) + + with builder.span() as span: + # Your retrieval logic here + pass + +See ``RETRIEVAL_SPANS.md`` for detailed documentation. + + +Examples +-------- + +The ``examples/manual/`` directory contains working examples: + +* ``example_v4.py`` - Comprehensive example showing various Weaviate v4 operations +* ``example_rag_pipeline.py`` - Complete RAG pipeline with LM Studio embeddings and manual retrieval spans +* ``example_manual_retrieval_spans.py`` - Different patterns for manual retrieval spans + +Running the examples +******************** + +1. Install dependencies:: + + pip install weaviate-client>=4.0.0 opentelemetry-sdk opentelemetry-exporter-otlp-proto-grpc + +2. Start a local Weaviate instance:: + + docker run -d -p 8080:8080 -p 50051:50051 cr.weaviate.io/semitechnologies/weaviate:latest + +3. Run an example:: + + cd examples/manual + python3 example_v4.py + +4. (Optional) Configure OTLP endpoint:: + + export OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4317" + + +Supported Versions +------------------ + +This instrumentation supports Weaviate client versions 3.x and 4.x. + +References +---------- + +* `OpenTelemetry Weaviate Instrumentation `_ +* `OpenTelemetry Project `_ +* `Weaviate Documentation `_ diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/examples/manual/example_v4.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/examples/manual/example_v4.py new file mode 100644 index 00000000..76d5e88a --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/examples/manual/example_v4.py @@ -0,0 +1,265 @@ +""" +Example demonstrating OpenTelemetry instrumentation for Weaviate v4 client. + +This example shows various Weaviate operations including schema management, +data operations, and queries. All operations are automatically instrumented. + +For setup instructions, see ../../README.rst + +Tested with weaviate-client>=4.0.0 +Code adapted from: https://weaviate.io/developers/weaviate/client-libraries/python +""" +import os + +import weaviate +import weaviate.classes as wvc + +from opentelemetry import trace +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( + OTLPSpanExporter, +) +from opentelemetry.instrumentation.weaviate import WeaviateInstrumentor +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import ( + BatchSpanProcessor, + ConsoleSpanExporter, +) +from opentelemetry.semconv.resource import ResourceAttributes + +CLASS_NAME = "Article" +RAW_QUERY = """ + { + Get { + Article(limit: 2) { + author + text + } + } + } + """ + +# Set up the tracer provider with service name +resource = Resource(attributes={ + ResourceAttributes.SERVICE_NAME: "weaviate-example", +}) +tracer_provider = TracerProvider(resource=resource) +trace.set_tracer_provider(tracer_provider) + +# Add OTLP exporter (reads from OTEL_EXPORTER_OTLP_ENDPOINT env var) +otlp_exporter = OTLPSpanExporter( + endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4317"), + headers=(), +) +otlp_processor = BatchSpanProcessor(otlp_exporter) +tracer_provider.add_span_processor(otlp_processor) + +# Add console exporter to see traces in terminal as well +console_exporter = ConsoleSpanExporter() +console_processor = BatchSpanProcessor(console_exporter) +tracer_provider.add_span_processor(console_processor) + +# Now instrument Weaviate +WeaviateInstrumentor().instrument() + + +def create_schema(client): + """Create a simple collection without vectorization (no external dependencies).""" + client.collections.create( + name=CLASS_NAME, + description="An Article class to store a text", + vectorizer_config=wvc.config.Configure.Vectorizer.none(), + properties=[ + wvc.config.Property( + name="author", + data_type=wvc.config.DataType.TEXT, + description="The name of the author", + ), + wvc.config.Property( + name="text", + data_type=wvc.config.DataType.TEXT, + description="The text content", + ), + ], + ) + + +def get_collection(client): + """Get the collection to test connection""" + return client.collections.get(CLASS_NAME) + + +def delete_collection(client): + client.collections.delete(CLASS_NAME) + + +def create_object(collection): + return collection.data.insert( + { + "author": "Robert", + "text": "Once upon a time, someone wrote a book...", + } + ) + + +def create_batch(collection): + objs = [ + { + "author": "Robert", + "text": "Once upon a time, R. wrote a book...", + }, + { + "author": "Johnson", + "text": "Once upon a time, J. wrote some news...", + }, + { + "author": "Maverick", + "text": "Never again, M. will write a book...", + }, + { + "author": "Wilson", + "text": "Lost in the island, W. did not write anything...", + }, + { + "author": "Ludwig", + "text": "As king, he ruled...", + }, + ] + with collection.batch.dynamic() as batch: + for obj in objs: + batch.add_object(properties=obj) + + +def query_get(collection): + # This now automatically creates: + # - Parent: db.retrieval.client span with db.retrieval.type=fetch + # - Child: db.weaviate.collections.query.fetch_objects span + return collection.query.fetch_objects( + limit=5, + return_properties=[ + "author", + "text", + ] + ) + + +def query_aggregate(collection): + return collection.aggregate.over_all(total_count=True) + + +def query_raw(client): + return client.graphql_raw_query(RAW_QUERY) + + +def query_near_text(collection, text): + """Query using nearText to find similar articles. + + Note: This requires a vectorizer to be configured on the collection. + Skipped in this example since we use Vectorizer.none(). + """ + # Commented out because it requires vectorization + # query_result = collection.query.near_text( + # query=text, + # limit=2, + # return_metadata=weaviate.classes.query.MetadataQuery(distance=True), + # ) + # return query_result + print("Skipping near_text query (requires vectorizer)") + return None + + +def validate(collection, uuid=None): + """Validate by attempting to fetch an object by ID.""" + if uuid: + return collection.query.fetch_object_by_id(uuid) + return None + + +def create_schemas(client): + client.collections.create_from_dict( + { + "class": "Author", + "description": "An author that writes an article", + "properties": [ + { + "name": "name", + "dataType": ["string"], + "description": "The name of the author", + }, + ], + }, + ) + client.collections.create_from_dict( + { + "class": CLASS_NAME, + "description": "An Article class to store a text", + "properties": [ + { + "name": "author", + "dataType": ["Author"], + "description": "The author", + }, + { + "name": "text", + "dataType": ["text"], + "description": "The text content", + }, + ], + }, + ) + + +def delete_all(client): + client.collections.delete_all() + + +def example_schema_workflow(client): + delete_all(client) + + create_schema(client) + print("Created schema") + collection = get_collection(client) + print("Retrieved collection: ", collection.name) + + uuid = create_object(collection) + print("Created object of UUID: ", uuid) + obj = collection.query.fetch_object_by_id(uuid) + print("Retrieved obj: ", obj) + + create_batch(collection) + result = query_get(collection) + print("Query result:", result) + aggregate_result = query_aggregate(collection) + print("Aggregate result:", aggregate_result) + raw_result = query_raw(client) + print("Raw result: ", raw_result) + # Skip near_text query since we're not using a vectorizer + # near_text_result = query_near_text(collection, "book") + # print("Near text result: ", near_text_result) + + delete_collection(client) + print("Deleted schema") + + +def example_schema_workflow2(client): + delete_all(client) + create_schemas(client) + + +if __name__ == "__main__": + print("OpenTelemetry Weaviate instrumentation initialized") + + # Connect to local Weaviate instance (default: http://localhost:8080) + # Make sure Weaviate is running locally, e.g., via Docker: + # docker run -d -p 8080:8080 -p 50051:50051 cr.weaviate.io/semitechnologies/weaviate:latest + client = weaviate.connect_to_local() + print("Connected to local Weaviate instance") + + try: + example_schema_workflow2(client) + example_schema_workflow(client) + delete_all(client) + finally: + # Ensure all spans are exported before exiting + tracer_provider.force_flush(timeout_millis=5000) + client.close() \ No newline at end of file diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/pyproject.toml b/instrumentation-genai/opentelemetry-instrumentation-weaviate/pyproject.toml new file mode 100644 index 00000000..90b1a456 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/pyproject.toml @@ -0,0 +1,65 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "splunk-otel-instrumentation-weaviate" +dynamic = ["version"] +description = "OpenTelemetry Weaviate instrumentation" +readme = "README.rst" +license = "Apache-2.0" +requires-python = ">=3.9" +authors = [ + { name = "OpenTelemetry Authors", email = "cncf-opentelemetry-contributors@lists.cncf.io" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] +dependencies = [ + "opentelemetry-api ~= 1.38.0.dev0", + "opentelemetry-instrumentation ~= 0.59b0.dev0", + "opentelemetry-semantic-conventions ~= 0.59b0.dev0", + "splunk-otel-util-genai>=0.1.4", +] + +[project.optional-dependencies] +instruments = [ + "weaviate-client >= 3.0.0, <5.0.0", +] +test = [ + "pytest >= 7.0.0", + "pytest-recording >= 0.13.0", + "vcrpy >= 7.0.0", +] + +[project.entry-points.opentelemetry_instrumentor] +weaviate = "opentelemetry.instrumentation.weaviate:WeaviateInstrumentor" + +[project.urls] +Homepage = "https://github.com/open-telemetry/opentelemetry-python-contrib/tree/main/instrumentation-genai/opentelemetry-instrumentation-weaviate" +Repository = "https://github.com/open-telemetry/opentelemetry-python-contrib" + +[tool.hatch.version] +path = "src/opentelemetry/instrumentation/weaviate/version.py" + +[tool.hatch.build.targets.sdist] +include = [ + "/src", + "/tests", +] + +[tool.hatch.build.targets.wheel] +packages = ["src/opentelemetry"] + +[tool.ruff] +exclude = [ + "./", +] diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py new file mode 100644 index 00000000..16a670a2 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py @@ -0,0 +1,413 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Weaviate client instrumentation supporting `weaviate-client`, it can be enabled by +using ``WeaviateInstrumentor``. + +.. _weaviate-client: https://pypi.org/project/weaviate-client/ + +Usage +----- + +.. code:: python + + import weaviate + from opentelemetry.instrumentation.weaviate import WeaviateInstrumentor + + WeaviateInstrumentor().instrument() + + client = weaviate.Client("http://localhost:8080") + # Your Weaviate operations will now be traced + +API +--- +""" + +import json +from contextvars import ContextVar +from typing import Any, Collection, Dict, Optional + +import weaviate +from wrapt import wrap_function_wrapper # type: ignore + +from opentelemetry.instrumentation.instrumentor import BaseInstrumentor +from opentelemetry.instrumentation.utils import ( + is_instrumentation_enabled, + unwrap, +) +from opentelemetry.instrumentation.weaviate.config import Config +from opentelemetry.instrumentation.weaviate.version import __version__ + +# from opentelemetry.metrics import get_meter +# from opentelemetry._events import get_event_logger +from opentelemetry.semconv.attributes import ( + db_attributes as DbAttributes, +) +from opentelemetry.semconv.attributes import ( + server_attributes as ServerAttributes, +) + +# Potentially not needed. +from opentelemetry.semconv.schemas import Schemas +from opentelemetry.trace import SpanKind, Tracer, get_tracer + +from .mapping import MAPPING_V3, MAPPING_V4, SPAN_NAME_PREFIX +from .utils import ( + extract_collection_name, + parse_url_to_host_port, +) + +WEAVIATE_V3 = 3 +WEAVIATE_V4 = 4 + +weaviate_version = None +_instruments = ("weaviate-client >= 3.0.0, < 5",) + + +# Context variable for passing connection info within operation call stacks +_connection_host_context: ContextVar[Optional[str]] = ContextVar( + "weaviate_connection_host", default=None +) +_connection_port_context: ContextVar[Optional[int]] = ContextVar( + "weaviate_connection_port", default=None +) + + +class WeaviateInstrumentor(BaseInstrumentor): + """An instrumentor for Weaviate's client library.""" + + def __init__(self, exception_logger: Optional[Any] = None) -> None: + super().__init__() + Config.exception_logger = exception_logger + + def instrumentation_dependencies(self) -> Collection[str]: + return _instruments + + def _instrument(self, **kwargs: Any) -> None: + global weaviate_version + tracer_provider = kwargs.get("tracer_provider") + tracer = get_tracer( + __name__, + __version__, + tracer_provider, + schema_url=Schemas.V1_28_0.value, + ) + + try: + major_version = int(weaviate.__version__.split(".")[0]) + if major_version >= 4: + weaviate_version = WEAVIATE_V4 + else: + weaviate_version = WEAVIATE_V3 + except (ValueError, IndexError): + # Default to V3 if version parsing fails + weaviate_version = WEAVIATE_V3 + + self._get_server_details(weaviate_version, tracer) + + wrappings = ( + MAPPING_V3 if weaviate_version == WEAVIATE_V3 else MAPPING_V4 + ) + for to_wrap in wrappings: + name = ".".join([to_wrap["name"], to_wrap["function"]]) + wrap_function_wrapper( + module=to_wrap["module"], + name=name, + wrapper=_WeaviateTraceInjectionWrapper( + tracer, wrap_properties=to_wrap + ), + ) + + def _uninstrument(self, **kwargs: Any) -> None: + global weaviate_version + wrappings = ( + MAPPING_V3 if weaviate_version == WEAVIATE_V3 else MAPPING_V4 + ) + for to_unwrap in wrappings: + try: + module = ".".join([to_unwrap["module"], to_unwrap["name"]]) + unwrap( + module, + to_unwrap["function"], + ) + except (ImportError, AttributeError, ValueError): + # Ignore errors when unwrapping - module might not be loaded + # or function might not be wrapped + pass + + # unwrap the connection initialization to remove the context variable injection + try: + if weaviate_version == WEAVIATE_V3: + unwrap("weaviate.Client", "__init__") + elif weaviate_version == WEAVIATE_V4: + unwrap("weaviate.WeaviateClient", "__init__") + except (ImportError, AttributeError, ValueError): + # Ignore errors when unwrapping connection methods + pass + + def _get_server_details(self, version: int, tracer: Tracer) -> None: + name = "Client.__init__" + if version == WEAVIATE_V4: + name = "WeaviateClient.__init__" + + wrap_function_wrapper( + module="weaviate", + name=name, + wrapper=_WeaviateConnectionInjectionWrapper(tracer), + ) + + +class _WeaviateConnectionInjectionWrapper: + """ + A wrapper that intercepts calls to weaviate connection methods to inject tracing headers. + This is used to create spans for Weaviate connection operations. + """ + + def __init__(self, tracer: Tracer): + self.tracer = tracer + + def __call__( + self, wrapped: Any, instance: Any, args: Any, kwargs: Any + ) -> Any: + if not is_instrumentation_enabled(): + return wrapped(*args, **kwargs) + + # Extract connection details from args/kwargs before calling wrapped function + connection_host = None + connection_port = None + connection_url = None + + # For v3, extract URL from constructor arguments + # weaviate.Client(url="http://localhost:8080", ...) + if args and len(args) > 0: + # First positional argument is typically the URL + connection_url = args[0] + elif "url" in kwargs: + # URL passed as keyword argument + connection_url = kwargs["url"] + + if connection_url: + connection_host, connection_port = parse_url_to_host_port( + connection_url + ) + + return_value = wrapped(*args, **kwargs) + + # For v4, try to extract from instance after creation (fallback) + if ( + not connection_url + and hasattr(instance, "_connection") + and instance._connection is not None + ): + connection_url = instance._connection.url + if connection_url: + connection_host, connection_port = parse_url_to_host_port( + connection_url + ) + + _connection_host_context.set(connection_host) + _connection_port_context.set(connection_port) + return return_value + + +class _WeaviateTraceInjectionWrapper: + """ + A wrapper that intercepts calls to weaviate to inject tracing headers. + This is used to create spans for Weaviate operations. + """ + + def __init__( + self, tracer: Tracer, wrap_properties: Optional[Dict[str, str]] = None + ) -> None: + self.tracer = tracer + self.wrap_properties = wrap_properties or {} + + def __call__( + self, wrapped: Any, instance: Any, args: Any, kwargs: Any + ) -> Any: + """ + Wraps the original function to inject tracing headers. + """ + if not is_instrumentation_enabled(): + return wrapped(*args, **kwargs) + + # Create DB span for all operations + return self._create_db_span(wrapped, instance, args, kwargs) + + def _create_db_span( + self, wrapped: Any, instance: Any, args: Any, kwargs: Any + ) -> Any: + """Create a regular DB operation span.""" + name = self.wrap_properties.get( + "span_name", + getattr(wrapped, "__name__", "unknown"), + ) + name = f"{SPAN_NAME_PREFIX}.{name}" + with self.tracer.start_as_current_span( + name, kind=SpanKind.CLIENT + ) as span: + span.set_attribute(DbAttributes.DB_SYSTEM_NAME, "weaviate") + + # Extract operation name dynamically from the function call + module_name = self.wrap_properties.get("module", "") + function_name = self.wrap_properties.get("function", "") + span.set_attribute(DbAttributes.DB_OPERATION_NAME, function_name) + + # Extract collection name from the operation + collection_name = extract_collection_name( + wrapped, instance, args, kwargs, module_name, function_name + ) + if collection_name: + # Use a Weaviate-specific collection attribute similar to MongoDB's DB_MONGODB_COLLECTION + span.set_attribute( + "db.weaviate.collection.name", collection_name + ) + + connection_host = _connection_host_context.get() + connection_port = _connection_port_context.get() + if connection_host is not None: + span.set_attribute( + ServerAttributes.SERVER_ADDRESS, connection_host + ) + if connection_port is not None: + span.set_attribute( + ServerAttributes.SERVER_PORT, connection_port + ) + + return_value = wrapped(*args, **kwargs) + + # Extract documents from similarity search operations + if self._is_similarity_search(): + documents = self._extract_documents_from_response(return_value) + if documents: + span.set_attribute( + "db.weaviate.documents.count", len(documents) + ) + # emit the documents as events + for doc in documents: + # emit the document content as an event + query = "" + if "query" in kwargs: + query = json.dumps(kwargs["query"]) + attributes = { + "db.weaviate.document.content": json.dumps( + doc["content"] + ), + } + + # Only add non-None values to attributes + if doc.get("distance") is not None: + attributes["db.weaviate.document.distance"] = doc[ + "distance" + ] + if doc.get("certainty") is not None: + attributes["db.weaviate.document.certainty"] = doc[ + "certainty" + ] + if doc.get("score") is not None: + attributes["db.weaviate.document.score"] = doc[ + "score" + ] + if query: + attributes["db.weaviate.document.query"] = query + span.add_event( + "weaviate.document", attributes=attributes + ) + + return return_value + + def _is_similarity_search(self) -> bool: + """ + Check if this is a similarity search operation. + """ + module_name = self.wrap_properties.get("module", "") + function_name = self.wrap_properties.get("function", "") + return ( + "query" in module_name.lower() + or "do" in function_name.lower() + or "near_text" in function_name.lower() + or "fetch_objects" in function_name.lower() + ) + + def _extract_documents_from_response( + self, response: Any + ) -> list[dict[str, Any]]: + """ + Extract documents from weaviate response. + """ + # TODO: Pagination, cursor? + documents: list[dict[str, Any]] = [] + try: + if hasattr(response, "objects"): + for obj in response.objects: + doc: dict[str, Any] = {} + if hasattr(obj, "properties"): + doc["content"] = obj.properties + + # Extract similarity scores + if hasattr(obj, "metadata") and obj.metadata: + metadata = obj.metadata + if ( + hasattr(metadata, "distance") + and metadata.distance is not None + ): + doc["distance"] = metadata.distance + if ( + hasattr(metadata, "certainty") + and metadata.certainty is not None + ): + doc["certainty"] = metadata.certainty + if ( + hasattr(metadata, "score") + and metadata.score is not None + ): + doc["score"] = metadata.score + + documents.append(doc) + elif "data" in response: + # Handle GraphQL responses + for response_key in response["data"].keys(): + for collection in response["data"][response_key]: + for obj in response["data"][response_key][collection]: + doc: dict[str, Any] = {} + doc["content"] = dict(obj) + del doc["content"]["_additional"] + if "_additional" in obj: + metadata = obj["_additional"] + if ( + "distance" in metadata + and metadata["distance"] is not None + ): + doc["distance"] = metadata["distance"] + if ( + "certainty" in metadata + and metadata["certainty"] is not None + ): + doc["certainty"] = metadata["certainty"] + if ( + "score" in metadata + and metadata["score"] is not None + ): + doc["score"] = metadata["score"] + documents.append(doc) + except Exception: + # silently handle extraction errors + pass + return documents + + +__all__ = [ + "WeaviateInstrumentor", +] \ No newline at end of file diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/config.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/config.py new file mode 100644 index 00000000..d6b554c8 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/config.py @@ -0,0 +1,22 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class Config: + """ + Shared static configuration for Weaviate instrumentation. + """ + + # logger to handle exceptions during instrumentation + exception_logger = None \ No newline at end of file diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/mapping.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/mapping.py new file mode 100644 index 00000000..60b3a4d6 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/mapping.py @@ -0,0 +1,184 @@ +SPAN_NAME_PREFIX: str = "db.weaviate" + +CONNECTION_WRAPPING: list[dict[str, str]] = [ + {"module": "weaviate", "name": "connect_to_local"}, + {"module": "weaviate", "name": "connect_to_weaviate_cloud"}, + {"module": "weaviate", "name": "connect_to_custom"}, +] + +MAPPING_V3: list[dict[str, str]] = [ + # Schema operations + { + "module": "weaviate.schema", + "name": "Schema", + "function": "get", + "span_name": "schema.get", + }, + { + "module": "weaviate.schema", + "name": "Schema", + "function": "create_class", + "span_name": "schema.create_class", + }, + { + "module": "weaviate.schema", + "name": "Schema", + "function": "create", + "span_name": "schema.create", + }, + { + "module": "weaviate.schema", + "name": "Schema", + "function": "delete_class", + "span_name": "schema.delete_class", + }, + { + "module": "weaviate.schema", + "name": "Schema", + "function": "delete_all", + "span_name": "schema.delete_all", + }, + # Data CRUD operations + { + "module": "weaviate.data.crud_data", + "name": "DataObject", + "function": "create", + "span_name": "data.crud_data.create", + }, + { + "module": "weaviate.data.crud_data", + "name": "DataObject", + "function": "validate", + "span_name": "data.crud_data.validate", + }, + { + "module": "weaviate.data.crud_data", + "name": "DataObject", + "function": "get", + "span_name": "data.crud_data.get", + }, + # Batch operations + { + "module": "weaviate.batch.crud_batch", + "name": "Batch", + "function": "add_data_object", + "span_name": "batch.crud_batch.add_data_object", + }, + { + "module": "weaviate.batch.crud_batch", + "name": "Batch", + "function": "flush", + "span_name": "batch.crud_batch.flush", + }, + # GraphQL query operations + { + "module": "weaviate.gql.query", + "name": "Query", + "function": "get", + "span_name": "gql.query.get", + }, + { + "module": "weaviate.gql.query", + "name": "Query", + "function": "aggregate", + "span_name": "gql.query.aggregate", + }, + { + "module": "weaviate.gql.query", + "name": "Query", + "function": "raw", + "span_name": "gql.query.raw", + }, + { + "module": "weaviate.gql.get", + "name": "GetBuilder", + "function": "do", + "span_name": "gql.query.get.do", + }, +] + + +MAPPING_V4: list[dict[str, str]] = [ + { + "module": "weaviate.collections.queries.near_text.query", + "name": "_NearTextQuery", + "function": "near_text", + "span_name": "collections.query.near_text", + }, + { + "module": "weaviate.collections.queries.near_vector.query", + "name": "_NearVectorQuery", + "function": "near_vector", + "span_name": "collections.query.near_vector", + }, + { + "module": "weaviate.collections.queries.fetch_objects.query", + "name": "_FetchObjectsQuery", + "function": "fetch_objects", + "span_name": "collections.query.fetch_objects", + }, + { + "module": "weaviate.collections.grpc.query", + "name": "_QueryGRPC", + "function": "get", + "span_name": "collections.query.get", + # Note: get is an internal method called by fetch_objects and near_text + # We don't mark it as retrieval to avoid nested retrieval spans + }, + { + "module": "weaviate.collections.data", + "name": "_DataCollection", + "function": "insert", + "span_name": "collections.data.insert", + }, + { + "module": "weaviate.collections.data", + "name": "_DataCollection", + "function": "replace", + "span_name": "collections.data.replace", + }, + { + "module": "weaviate.collections.data", + "name": "_DataCollection", + "function": "update", + "span_name": "collections.data.update", + }, + # Collections + { + "module": "weaviate.collections.collections", + "name": "_Collections", + "function": "get", + "span_name": "collections.get", + }, + { + "module": "weaviate.collections.collections", + "name": "_Collections", + "function": "create", + "span_name": "collections.create", + }, + { + "module": "weaviate.collections.collections", + "name": "_Collections", + "function": "delete", + "span_name": "collections.delete", + }, + { + "module": "weaviate.collections.collections", + "name": "_Collections", + "function": "delete_all", + "span_name": "collections.delete_all", + }, + { + "module": "weaviate.collections.collections", + "name": "_Collections", + "function": "create_from_dict", + "span_name": "collections.create_from_dict", + }, + # Batch + { + "module": "weaviate.collections.batch.collection", + "name": "_BatchCollection", + "function": "add_object", + "span_name": "collections.batch.add_object", + }, +] \ No newline at end of file diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/package.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/package.py new file mode 100644 index 00000000..96cb378d --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/package.py @@ -0,0 +1,16 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +_instruments = ("weaviate-client >= 3.0.0,<5.0.0",) \ No newline at end of file diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/utils.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/utils.py new file mode 100644 index 00000000..f21f72d0 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/utils.py @@ -0,0 +1,80 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import Any, Optional, Tuple +from urllib.parse import urlparse + +# TODO: get semconv for vector databases +# from opentelemetry.semconv._incubating.attributes import gen_ai_attributes as GenAI + +logger = logging.getLogger(__name__) + + +def parse_url_to_host_port(url: str) -> Tuple[Optional[str], Optional[int]]: + parsed = urlparse(url) + host: Optional[str] = parsed.hostname + port: Optional[int] = parsed.port + return host, port + + +def extract_collection_name( + wrapped: Any, + instance: Any, + args: Any, + kwargs: Any, + module_name: str, + function_name: str, +) -> Optional[str]: + """ + Extract collection name from Weaviate function calls. + + Args: + wrapped: The wrapped function + instance: The instance object (if any) + args: Function arguments + kwargs: Function keyword arguments + module_name: The module name from mapping + function_name: The function name from mapping + + Returns: + Collection name if found, None otherwise + """ + collection_name = None + + try: + # Weaviate Client V4 stores this in the "request" attribute of the kwargs + if ( + kwargs + and "request" in kwargs + and hasattr(kwargs["request"], "collection") + ): + collection_name = kwargs["request"].collection + + # Check if the instance has a collection attribute + # TODO: Check V3 + elif hasattr(instance, "_collection"): + if hasattr(instance._collection, "_name"): + collection_name = instance._collection._name + elif hasattr(instance._collection, "name"): + collection_name = instance._collection.name + + return collection_name + + except Exception: + # Silently ignore any errors during extraction to avoid breaking the tracing + + pass + + return None \ No newline at end of file diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/version.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/version.py new file mode 100644 index 00000000..d3cc86dd --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/version.py @@ -0,0 +1,15 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "2.0b0.dev" \ No newline at end of file From 30071e485cb1af97bed68bdfd8407e7bfdb133d0 Mon Sep 17 00:00:00 2001 From: JWinermaSplunk Date: Mon, 8 Dec 2025 14:21:46 -0800 Subject: [PATCH 2/6] update version, CHANGELOG, README --- .../CHANGELOG.md | 2 +- .../README.rst | 86 ++----------------- .../instrumentation/weaviate/version.py | 2 +- 3 files changed, 9 insertions(+), 81 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-weaviate/CHANGELOG.md index 6942b8f3..2ced53d7 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). -## Unreleased +## [0.1.0] - 2025-12-08 ### Added - Initial release of OpenTelemetry Weaviate instrumentation diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/README.rst b/instrumentation-genai/opentelemetry-instrumentation-weaviate/README.rst index 76a994b8..fb68c996 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/README.rst +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/README.rst @@ -19,10 +19,7 @@ Installation Usage ----- -Instrumenting all Weaviate clients -*********************************** - -When using the instrumentor, all clients will automatically be instrumented. +When using the instrumentor, all Weaviate clients will automatically be instrumented. .. code-block:: python @@ -35,89 +32,20 @@ When using the instrumentor, all clients will automatically be instrumented. # This client will be automatically instrumented client = weaviate.connect_to_local() - # Use the client as normal + # Use the client as normal - all operations will be traced collection = client.collections.get("MyCollection") result = collection.query.fetch_objects(limit=10) -Retrieval Spans for RAG Pipelines -********************************** - -For RAG (Retrieval-Augmented Generation) pipelines, use the manual span helpers to create structured traces where embedding and database operations are children of a parent retrieval span. - -.. code-block:: python - - from opentelemetry.instrumentation.weaviate import ( - WeaviateInstrumentor, - retrieval_span, - embedding_span, - ) - - WeaviateInstrumentor().instrument() - - # Create retrieval span with embedding as child - with retrieval_span( - query_text="What are vector databases?", - top_k=10, - collection_name="Articles", - embedding_model="nomic-embed-text" - ) as span: - # Child 1: Generate embedding - with embedding_span(query, model="nomic-embed-text"): - embedding = lm_studio_client.embeddings.create(input=query) - - # Child 2: Query Weaviate (auto-instrumented) - results = collection.query.near_vector( - near_vector=embedding.data[0].embedding, - limit=10 - ) - - # Add custom attributes - span.set_attribute("db.retrieval.documents_retrieved", len(results.objects)) - -**Resulting span hierarchy:** - -:: - - db.retrieval.client - ├─ generate_embedding - └─ db.weaviate.collections.query.near_vector - └─ db.weaviate.collections.query.get - -**Builder pattern for complex scenarios:** - -.. code-block:: python - - from opentelemetry.instrumentation.weaviate import RetrievalSpanBuilder - - builder = RetrievalSpanBuilder( - query_text="search text", - top_k=10, - collection_name="Articles", - embedding_model="nomic-embed-text", - # Custom attributes via kwargs - user_id="user123", - session_id="session456" - ) - - with builder.span() as span: - # Your retrieval logic here - pass - -See ``RETRIEVAL_SPANS.md`` for detailed documentation. - - Examples -------- -The ``examples/manual/`` directory contains working examples: +The ``examples/manual/`` directory contains a working example: -* ``example_v4.py`` - Comprehensive example showing various Weaviate v4 operations -* ``example_rag_pipeline.py`` - Complete RAG pipeline with LM Studio embeddings and manual retrieval spans -* ``example_manual_retrieval_spans.py`` - Different patterns for manual retrieval spans +* ``example_v4.py`` - Comprehensive example showing various Weaviate v4 operations with automatic tracing -Running the examples -******************** +Running the example +******************* 1. Install dependencies:: @@ -127,7 +55,7 @@ Running the examples docker run -d -p 8080:8080 -p 50051:50051 cr.weaviate.io/semitechnologies/weaviate:latest -3. Run an example:: +3. Run the example:: cd examples/manual python3 example_v4.py diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/version.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/version.py index d3cc86dd..73604ab1 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/version.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.0b0.dev" \ No newline at end of file +__version__ = "0.1.0" \ No newline at end of file From 0bc4201a77afe2aa3728ca945f762dd6220176c4 Mon Sep 17 00:00:00 2001 From: JWinermaSplunk Date: Mon, 8 Dec 2025 16:18:41 -0800 Subject: [PATCH 3/6] update example --- .../examples/manual/example_v4.py | 26 +------------------ .../pyproject.toml | 5 +--- 2 files changed, 2 insertions(+), 29 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/examples/manual/example_v4.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/examples/manual/example_v4.py index 76d5e88a..ed1b4bfa 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/examples/manual/example_v4.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/examples/manual/example_v4.py @@ -64,11 +64,9 @@ def create_schema(client): - """Create a simple collection without vectorization (no external dependencies).""" client.collections.create( name=CLASS_NAME, description="An Article class to store a text", - vectorizer_config=wvc.config.Configure.Vectorizer.none(), properties=[ wvc.config.Property( name="author", @@ -131,9 +129,6 @@ def create_batch(collection): def query_get(collection): - # This now automatically creates: - # - Parent: db.retrieval.client span with db.retrieval.type=fetch - # - Child: db.weaviate.collections.query.fetch_objects span return collection.query.fetch_objects( limit=5, return_properties=[ @@ -151,23 +146,6 @@ def query_raw(client): return client.graphql_raw_query(RAW_QUERY) -def query_near_text(collection, text): - """Query using nearText to find similar articles. - - Note: This requires a vectorizer to be configured on the collection. - Skipped in this example since we use Vectorizer.none(). - """ - # Commented out because it requires vectorization - # query_result = collection.query.near_text( - # query=text, - # limit=2, - # return_metadata=weaviate.classes.query.MetadataQuery(distance=True), - # ) - # return query_result - print("Skipping near_text query (requires vectorizer)") - return None - - def validate(collection, uuid=None): """Validate by attempting to fetch an object by ID.""" if uuid: @@ -233,9 +211,6 @@ def example_schema_workflow(client): print("Aggregate result:", aggregate_result) raw_result = query_raw(client) print("Raw result: ", raw_result) - # Skip near_text query since we're not using a vectorizer - # near_text_result = query_near_text(collection, "book") - # print("Near text result: ", near_text_result) delete_collection(client) print("Deleted schema") @@ -252,6 +227,7 @@ def example_schema_workflow2(client): # Connect to local Weaviate instance (default: http://localhost:8080) # Make sure Weaviate is running locally, e.g., via Docker: # docker run -d -p 8080:8080 -p 50051:50051 cr.weaviate.io/semitechnologies/weaviate:latest + client = weaviate.connect_to_local() print("Connected to local Weaviate instance") diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/pyproject.toml b/instrumentation-genai/opentelemetry-instrumentation-weaviate/pyproject.toml index 90b1a456..857770f9 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/pyproject.toml +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/pyproject.toml @@ -5,13 +5,10 @@ build-backend = "hatchling.build" [project] name = "splunk-otel-instrumentation-weaviate" dynamic = ["version"] -description = "OpenTelemetry Weaviate instrumentation" +description = "Splunk OpenTelemetry Weaviate instrumentation" readme = "README.rst" license = "Apache-2.0" requires-python = ">=3.9" -authors = [ - { name = "OpenTelemetry Authors", email = "cncf-opentelemetry-contributors@lists.cncf.io" }, -] classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", From eb18175f545eceee0034077ac44dc5415c5055ac Mon Sep 17 00:00:00 2001 From: JWinermaSplunk Date: Tue, 9 Dec 2025 14:52:38 -0800 Subject: [PATCH 4/6] updates and add tests --- .../pyproject.toml | 1 - .../pytest.ini | 11 ++ .../instrumentation/weaviate/__init__.py | 7 +- .../instrumentation/weaviate/mapping.py | 2 - .../tests/README.md | 70 +++++++++++ .../tests/__init__.py | 0 .../tests/conftest.py | 40 +++++++ .../tests/test_instrumentation.py | 47 ++++++++ .../tests/test_span_attributes.py | 110 ++++++++++++++++++ .../tests/test_utils.py | 103 ++++++++++++++++ .../tests/test_weaviate_v3.py | 72 ++++++++++++ .../tests/test_weaviate_v4.py | 83 +++++++++++++ 12 files changed, 541 insertions(+), 5 deletions(-) create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/pytest.ini create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/README.md create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/__init__.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/conftest.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_instrumentation.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_span_attributes.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_utils.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v3.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v4.py diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/pyproject.toml b/instrumentation-genai/opentelemetry-instrumentation-weaviate/pyproject.toml index 857770f9..52e4f7c9 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/pyproject.toml +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/pyproject.toml @@ -24,7 +24,6 @@ dependencies = [ "opentelemetry-api ~= 1.38.0.dev0", "opentelemetry-instrumentation ~= 0.59b0.dev0", "opentelemetry-semantic-conventions ~= 0.59b0.dev0", - "splunk-otel-util-genai>=0.1.4", ] [project.optional-dependencies] diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/pytest.ini b/instrumentation-genai/opentelemetry-instrumentation-weaviate/pytest.ini new file mode 100644 index 00000000..40121eb3 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/pytest.ini @@ -0,0 +1,11 @@ +[pytest] +testpaths = tests +python_files = test_*.py +python_classes = Test* +python_functions = test_* +markers = + integration: marks tests as integration tests (require running Weaviate instance) +addopts = + -v + --strict-markers + --tb=short diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py index 16a670a2..f13fae84 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py @@ -28,9 +28,13 @@ WeaviateInstrumentor().instrument() - client = weaviate.Client("http://localhost:8080") + # Weaviate v4 API + client = weaviate.connect_to_local() # Your Weaviate operations will now be traced + # Weaviate v3 API (also supported) + # client = weaviate.Client("http://localhost:8080") + API --- """ @@ -347,7 +351,6 @@ def _extract_documents_from_response( """ Extract documents from weaviate response. """ - # TODO: Pagination, cursor? documents: list[dict[str, Any]] = [] try: if hasattr(response, "objects"): diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/mapping.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/mapping.py index 60b3a4d6..53c15c8d 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/mapping.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/mapping.py @@ -122,8 +122,6 @@ "name": "_QueryGRPC", "function": "get", "span_name": "collections.query.get", - # Note: get is an internal method called by fetch_objects and near_text - # We don't mark it as retrieval to avoid nested retrieval spans }, { "module": "weaviate.collections.data", diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/README.md b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/README.md new file mode 100644 index 00000000..80580aca --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/README.md @@ -0,0 +1,70 @@ +# Weaviate Instrumentation Tests + +This directory contains tests for the OpenTelemetry Weaviate instrumentation. + +## Test Structure + +- `conftest.py` - Pytest fixtures and configuration +- `test_instrumentation.py` - Basic instrumentation tests +- `test_weaviate_v3.py` - Weaviate v3 client specific tests +- `test_weaviate_v4.py` - Weaviate v4 client specific tests +- `test_utils.py` - Utility function tests + +## Running Tests + +### Run all tests +```bash +pytest tests/ +``` + +### Run specific test file +```bash +pytest tests/test_instrumentation.py +``` + +### Run with coverage +```bash +pytest --cov=opentelemetry.instrumentation.weaviate tests/ +``` + +### Run only unit tests (skip integration tests) +```bash +pytest tests/ -m "not integration" +``` + +## Test Requirements + +Install test dependencies: +```bash +pip install pytest pytest-cov weaviate-client +``` + +## Integration Tests + +Some tests are marked with `@pytest.mark.integration` and require a running Weaviate instance: + +```bash +docker run -d -p 8080:8080 -p 50051:50051 cr.weaviate.io/semitechnologies/weaviate:latest +``` + +Run integration tests: +```bash +pytest tests/ -m integration +``` + +## Test Coverage + +The tests cover: +- ✅ Instrumentation initialization and cleanup +- ✅ Version detection (v3 vs v4) +- ✅ Span name mapping for all operations +- ✅ Utility functions (URL parsing, collection name extraction) +- ✅ Double instrumentation handling +- ✅ Module structure validation + +## Adding New Tests + +When adding new Weaviate operations to the instrumentation: +1. Add the operation mapping to `mapping.py` +2. Add corresponding test in `test_weaviate_v3.py` or `test_weaviate_v4.py` +3. Verify span names and attributes are correct diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/conftest.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/conftest.py new file mode 100644 index 00000000..a9f03be2 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/conftest.py @@ -0,0 +1,40 @@ +"""Unit tests configuration module.""" + +import pytest + +from opentelemetry.instrumentation.weaviate import WeaviateInstrumentor +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, +) + + +@pytest.fixture(scope="function") +def span_exporter(): + """Create an in-memory span exporter for testing.""" + return InMemorySpanExporter() + + +@pytest.fixture(scope="function") +def tracer_provider(span_exporter): + """Create a tracer provider with in-memory exporter.""" + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(span_exporter)) + return provider + + +@pytest.fixture(scope="function") +def instrumentor(): + """Create and return a WeaviateInstrumentor instance.""" + return WeaviateInstrumentor() + + +@pytest.fixture(scope="function", autouse=True) +def reset_instrumentor(instrumentor): + """Ensure instrumentor is uninstrumented after each test.""" + yield + try: + instrumentor.uninstrument() + except Exception: + pass # Ignore errors if not instrumented diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_instrumentation.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_instrumentation.py new file mode 100644 index 00000000..533e5685 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_instrumentation.py @@ -0,0 +1,47 @@ +"""Tests for Weaviate instrumentation.""" + +import pytest + +from opentelemetry import trace +from opentelemetry.instrumentation.weaviate import WeaviateInstrumentor + + +class TestWeaviateInstrumentation: + """Test basic instrumentation functionality.""" + + def test_instrumentor_initialization(self, instrumentor): + """Test that instrumentor can be initialized.""" + assert instrumentor is not None + assert isinstance(instrumentor, WeaviateInstrumentor) + + def test_instrument_uninstrument(self, instrumentor, tracer_provider): + """Test that instrumentation can be applied and removed.""" + trace.set_tracer_provider(tracer_provider) + + # Instrument + instrumentor.instrument(tracer_provider=tracer_provider) + + # Uninstrument + instrumentor.uninstrument() + + def test_instrumentation_dependencies(self, instrumentor): + """Test that instrumentation dependencies are correctly specified.""" + dependencies = instrumentor.instrumentation_dependencies() + assert dependencies is not None + assert len(dependencies) > 0 + assert any("weaviate-client" in dep for dep in dependencies) + + def test_double_instrument(self, instrumentor, tracer_provider): + """Test that double instrumentation doesn't cause errors.""" + trace.set_tracer_provider(tracer_provider) + + instrumentor.instrument(tracer_provider=tracer_provider) + # Second instrumentation should be idempotent + instrumentor.instrument(tracer_provider=tracer_provider) + + instrumentor.uninstrument() + + def test_uninstrument_without_instrument(self, instrumentor): + """Test that uninstrument works even if not instrumented.""" + # Should not raise an error + instrumentor.uninstrument() diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_span_attributes.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_span_attributes.py new file mode 100644 index 00000000..82efddd4 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_span_attributes.py @@ -0,0 +1,110 @@ +"""Tests for span attributes and semantic conventions.""" + +import pytest + +from opentelemetry.instrumentation.weaviate.mapping import ( + MAPPING_V3, + MAPPING_V4, + SPAN_NAME_PREFIX, +) + + +class TestSpanAttributes: + """Test span naming and attribute conventions.""" + + def test_span_name_prefix(self): + """Test that span name prefix is correct.""" + assert SPAN_NAME_PREFIX == "db.weaviate" + + def test_v3_span_names_have_prefix(self): + """Test that all v3 span names follow naming convention.""" + for mapping in MAPPING_V3: + span_name = mapping.get("span_name") + assert span_name is not None + # Span names should not include the prefix (it's added at runtime) + assert not span_name.startswith("db.weaviate.") + + def test_v4_span_names_have_prefix(self): + """Test that all v4 span names follow naming convention.""" + for mapping in MAPPING_V4: + span_name = mapping.get("span_name") + assert span_name is not None + # Span names should not include the prefix (it's added at runtime) + assert not span_name.startswith("db.weaviate.") + + def test_v3_mappings_have_required_fields(self): + """Test that all v3 mappings have required fields.""" + required_fields = ["module", "name", "function", "span_name"] + + for mapping in MAPPING_V3: + for field in required_fields: + assert field in mapping, f"Missing {field} in mapping: {mapping}" + assert mapping[field], f"Empty {field} in mapping: {mapping}" + + def test_v4_mappings_have_required_fields(self): + """Test that all v4 mappings have required fields.""" + required_fields = ["module", "name", "function", "span_name"] + + for mapping in MAPPING_V4: + for field in required_fields: + assert field in mapping, f"Missing {field} in mapping: {mapping}" + assert mapping[field], f"Empty {field} in mapping: {mapping}" + + def test_span_names_are_unique_v3(self): + """Test that v3 span names are unique.""" + span_names = [m["span_name"] for m in MAPPING_V3] + assert len(span_names) == len(set(span_names)), "Duplicate span names found in v3 mappings" + + def test_span_names_are_unique_v4(self): + """Test that v4 span names are unique.""" + span_names = [m["span_name"] for m in MAPPING_V4] + assert len(span_names) == len(set(span_names)), "Duplicate span names found in v4 mappings" + + def test_v3_operation_categories(self): + """Test that v3 operations are properly categorized.""" + span_names = [m["span_name"] for m in MAPPING_V3] + + # Should have schema operations + schema_ops = [s for s in span_names if s.startswith("schema.")] + assert len(schema_ops) > 0, "No schema operations found" + + # Should have data operations + data_ops = [s for s in span_names if "data" in s] + assert len(data_ops) > 0, "No data operations found" + + # Should have query operations + query_ops = [s for s in span_names if "query" in s or "gql" in s] + assert len(query_ops) > 0, "No query operations found" + + # Should have batch operations + batch_ops = [s for s in span_names if "batch" in s] + assert len(batch_ops) > 0, "No batch operations found" + + def test_v4_operation_categories(self): + """Test that v4 operations are properly categorized.""" + span_names = [m["span_name"] for m in MAPPING_V4] + + # Should have collection operations + collection_ops = [s for s in span_names if s.startswith("collections.")] + assert len(collection_ops) > 0, "No collection operations found" + + # Should have query operations + query_ops = [s for s in span_names if "query" in s] + assert len(query_ops) > 0, "No query operations found" + + # Should have data operations + data_ops = [s for s in span_names if "data" in s] + assert len(data_ops) > 0, "No data operations found" + + def test_module_paths_are_valid_python(self): + """Test that module paths follow Python naming conventions.""" + all_mappings = MAPPING_V3 + MAPPING_V4 + + for mapping in all_mappings: + module = mapping["module"] + # Should be valid Python module path + assert module.replace(".", "").replace("_", "").isalnum(), \ + f"Invalid module path: {module}" + # Should not start or end with dot + assert not module.startswith("."), f"Module starts with dot: {module}" + assert not module.endswith("."), f"Module ends with dot: {module}" diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_utils.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_utils.py new file mode 100644 index 00000000..d0538d2d --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_utils.py @@ -0,0 +1,103 @@ +"""Tests for utility functions.""" + +import pytest + +from opentelemetry.instrumentation.weaviate.utils import ( + extract_collection_name, + parse_url_to_host_port, +) + + +class TestParseUrlToHostPort: + """Test URL parsing utility.""" + + def test_parse_http_url(self): + """Test parsing HTTP URL.""" + host, port = parse_url_to_host_port("http://localhost:8080") + assert host == "localhost" + assert port == 8080 + + def test_parse_https_url(self): + """Test parsing HTTPS URL.""" + host, port = parse_url_to_host_port("https://example.com:443") + assert host == "example.com" + assert port == 443 + + def test_parse_url_without_port(self): + """Test parsing URL without explicit port.""" + host, port = parse_url_to_host_port("http://localhost") + assert host == "localhost" + assert port is None # urlparse returns None when port is not specified + + def test_parse_https_url_without_port(self): + """Test parsing HTTPS URL without explicit port.""" + host, port = parse_url_to_host_port("https://example.com") + assert host == "example.com" + assert port is None # urlparse returns None when port is not specified + + def test_parse_url_with_path(self): + """Test parsing URL with path.""" + host, port = parse_url_to_host_port("http://localhost:8080/v1") + assert host == "localhost" + assert port == 8080 + + def test_parse_invalid_url(self): + """Test parsing invalid URL returns None.""" + host, port = parse_url_to_host_port("not-a-url") + assert host is None + assert port is None + + def test_parse_none_url(self): + """Test parsing None URL.""" + host, port = parse_url_to_host_port(None) + assert host is None + assert port is None + + +class TestExtractCollectionName: + """Test collection name extraction utility.""" + + def test_extract_from_args(self): + """Test extracting collection name from positional args.""" + # Mock function and instance + def mock_func(): + pass + + instance = None + args = ("MyCollection",) + kwargs = {} + + result = extract_collection_name( + mock_func, instance, args, kwargs, "weaviate.schema", "get" + ) + # Result depends on implementation - this is a basic structure test + assert result is None or isinstance(result, str) + + def test_extract_from_kwargs(self): + """Test extracting collection name from keyword args.""" + def mock_func(): + pass + + instance = None + args = () + kwargs = {"class_name": "MyCollection"} + + result = extract_collection_name( + mock_func, instance, args, kwargs, "weaviate.data", "create" + ) + # Result depends on implementation + assert result is None or isinstance(result, str) + + def test_extract_with_no_collection(self): + """Test extraction when no collection name is present.""" + def mock_func(): + pass + + instance = None + args = () + kwargs = {} + + result = extract_collection_name( + mock_func, instance, args, kwargs, "weaviate.query", "raw" + ) + assert result is None or isinstance(result, str) diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v3.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v3.py new file mode 100644 index 00000000..77e9e8e4 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v3.py @@ -0,0 +1,72 @@ +"""Tests for Weaviate v3 client instrumentation.""" + +import pytest + +try: + import weaviate + + WEAVIATE_AVAILABLE = True + WEAVIATE_VERSION = int(weaviate.__version__.split(".")[0]) +except ImportError: + WEAVIATE_AVAILABLE = False + WEAVIATE_VERSION = 0 + +from opentelemetry import trace +from opentelemetry.instrumentation.weaviate import WeaviateInstrumentor + + +@pytest.mark.skipif( + not WEAVIATE_AVAILABLE, + reason="Weaviate client not available" +) +class TestWeaviateV3Instrumentation: + """Test Weaviate v3 client instrumentation.""" + + def test_v3_operations_mapped(self): + """Test that v3 operations are properly mapped.""" + from opentelemetry.instrumentation.weaviate.mapping import MAPPING_V3 + + # Verify key v3 operations are mapped + span_names = [m["span_name"] for m in MAPPING_V3] + + # Schema operations + assert "schema.get" in span_names + assert "schema.create_class" in span_names + assert "schema.delete_class" in span_names + + # Data operations + assert "data.crud_data.create" in span_names + assert "data.crud_data.get" in span_names + + # Batch operations + assert "batch.crud_batch.add_data_object" in span_names + + # Query operations + assert "gql.query.get" in span_names + assert "gql.query.aggregate" in span_names + assert "gql.query.raw" in span_names + + def test_v3_modules_structure(self): + """Test that v3 module paths are correct.""" + from opentelemetry.instrumentation.weaviate.mapping import MAPPING_V3 + + # Check that module paths follow v3 structure + modules = [m["module"] for m in MAPPING_V3] + + assert "weaviate.schema" in modules + assert "weaviate.data.crud_data" in modules + assert "weaviate.batch.crud_batch" in modules + assert "weaviate.gql.query" in modules + assert "weaviate.gql.get" in modules + + @pytest.mark.skipif( + WEAVIATE_VERSION >= 4, + reason="Test only applicable for v3 client" + ) + def test_v3_client_detection(self, instrumentor, tracer_provider): + """Test that v3 client is correctly detected when installed.""" + trace.set_tracer_provider(tracer_provider) + instrumentor.instrument(tracer_provider=tracer_provider) + + from opentelemetry.instrumentation.weaviate import weaviate_version, WEAVIATE_V3 + assert weaviate_version == WEAVIATE_V3 diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v4.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v4.py new file mode 100644 index 00000000..103244c3 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v4.py @@ -0,0 +1,83 @@ +"""Tests for Weaviate v4 client instrumentation.""" + +import pytest + +try: + import weaviate + from weaviate.collections.classes.config import Configure + + WEAVIATE_AVAILABLE = True + WEAVIATE_VERSION = int(weaviate.__version__.split(".")[0]) +except ImportError: + WEAVIATE_AVAILABLE = False + WEAVIATE_VERSION = 0 + +from opentelemetry import trace +from opentelemetry.instrumentation.weaviate import WeaviateInstrumentor +from opentelemetry.semconv.attributes import db_attributes as DbAttributes +from opentelemetry.semconv.attributes import server_attributes as ServerAttributes + + +@pytest.mark.skipif( + not WEAVIATE_AVAILABLE or WEAVIATE_VERSION < 4, + reason="Weaviate v4 client not available" +) +class TestWeaviateV4Instrumentation: + """Test Weaviate v4 client instrumentation.""" + + def test_version_detection(self, instrumentor, tracer_provider): + """Test that v4 client is correctly detected.""" + trace.set_tracer_provider(tracer_provider) + instrumentor.instrument(tracer_provider=tracer_provider) + + # Version should be detected as v4 + from opentelemetry.instrumentation.weaviate import weaviate_version, WEAVIATE_V4 + assert weaviate_version == WEAVIATE_V4 + + @pytest.mark.integration + def test_connect_to_local_instrumented( + self, instrumentor, tracer_provider, span_exporter + ): + """Test that connect_to_local creates spans.""" + trace.set_tracer_provider(tracer_provider) + instrumentor.instrument(tracer_provider=tracer_provider) + + try: + # This will fail if Weaviate is not running, but we can still check instrumentation + client = weaviate.connect_to_local() + client.close() + except Exception: + # Expected if Weaviate is not running + pass + + # Check that some instrumentation occurred + # Note: This test requires a running Weaviate instance for full validation + + def test_collection_operations_span_names(self): + """Test that collection operations have correct span names.""" + from opentelemetry.instrumentation.weaviate.mapping import MAPPING_V4 + + # Verify key operations are mapped + span_names = [m["span_name"] for m in MAPPING_V4] + + assert "collections.create" in span_names + assert "collections.get" in span_names + assert "collections.delete" in span_names + assert "collections.data.insert" in span_names + assert "collections.query.fetch_objects" in span_names + + def test_query_operations_mapped(self): + """Test that query operations are properly mapped.""" + from opentelemetry.instrumentation.weaviate.mapping import MAPPING_V4 + + query_operations = [ + m for m in MAPPING_V4 if "query" in m["span_name"] + ] + + assert len(query_operations) > 0 + + # Check for specific query operations + query_span_names = [op["span_name"] for op in query_operations] + assert "collections.query.near_text" in query_span_names + assert "collections.query.near_vector" in query_span_names + assert "collections.query.fetch_objects" in query_span_names From 5448e63f5449b6b39978f6ecf3ba33b48341c016 Mon Sep 17 00:00:00 2001 From: JWinermaSplunk Date: Tue, 9 Dec 2025 15:24:04 -0800 Subject: [PATCH 5/6] linting --- .../examples/manual/example_v4.py | 13 ++-- .../instrumentation/weaviate/__init__.py | 67 +++++-------------- .../instrumentation/weaviate/config.py | 2 +- .../instrumentation/weaviate/mapping.py | 2 +- .../instrumentation/weaviate/package.py | 2 +- .../instrumentation/weaviate/utils.py | 8 +-- .../instrumentation/weaviate/version.py | 2 +- .../tests/test_instrumentation.py | 10 ++- .../tests/test_span_attributes.py | 35 +++++----- .../tests/test_utils.py | 17 ++--- .../tests/test_weaviate_v3.py | 28 ++++---- .../tests/test_weaviate_v4.py | 29 ++++---- 12 files changed, 87 insertions(+), 128 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/examples/manual/example_v4.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/examples/manual/example_v4.py index ed1b4bfa..5f270e75 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/examples/manual/example_v4.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/examples/manual/example_v4.py @@ -9,6 +9,7 @@ Tested with weaviate-client>=4.0.0 Code adapted from: https://weaviate.io/developers/weaviate/client-libraries/python """ + import os import weaviate @@ -40,9 +41,11 @@ """ # Set up the tracer provider with service name -resource = Resource(attributes={ - ResourceAttributes.SERVICE_NAME: "weaviate-example", -}) +resource = Resource( + attributes={ + ResourceAttributes.SERVICE_NAME: "weaviate-example", + } +) tracer_provider = TracerProvider(resource=resource) trace.set_tracer_provider(tracer_provider) @@ -134,7 +137,7 @@ def query_get(collection): return_properties=[ "author", "text", - ] + ], ) @@ -238,4 +241,4 @@ def example_schema_workflow2(client): finally: # Ensure all spans are exported before exiting tracer_provider.force_flush(timeout_millis=5000) - client.close() \ No newline at end of file + client.close() diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py index f13fae84..4cd3b00b 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py @@ -121,24 +121,18 @@ def _instrument(self, **kwargs: Any) -> None: self._get_server_details(weaviate_version, tracer) - wrappings = ( - MAPPING_V3 if weaviate_version == WEAVIATE_V3 else MAPPING_V4 - ) + wrappings = MAPPING_V3 if weaviate_version == WEAVIATE_V3 else MAPPING_V4 for to_wrap in wrappings: name = ".".join([to_wrap["name"], to_wrap["function"]]) wrap_function_wrapper( module=to_wrap["module"], name=name, - wrapper=_WeaviateTraceInjectionWrapper( - tracer, wrap_properties=to_wrap - ), + wrapper=_WeaviateTraceInjectionWrapper(tracer, wrap_properties=to_wrap), ) def _uninstrument(self, **kwargs: Any) -> None: global weaviate_version - wrappings = ( - MAPPING_V3 if weaviate_version == WEAVIATE_V3 else MAPPING_V4 - ) + wrappings = MAPPING_V3 if weaviate_version == WEAVIATE_V3 else MAPPING_V4 for to_unwrap in wrappings: try: module = ".".join([to_unwrap["module"], to_unwrap["name"]]) @@ -182,9 +176,7 @@ class _WeaviateConnectionInjectionWrapper: def __init__(self, tracer: Tracer): self.tracer = tracer - def __call__( - self, wrapped: Any, instance: Any, args: Any, kwargs: Any - ) -> Any: + def __call__(self, wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any: if not is_instrumentation_enabled(): return wrapped(*args, **kwargs) @@ -203,9 +195,7 @@ def __call__( connection_url = kwargs["url"] if connection_url: - connection_host, connection_port = parse_url_to_host_port( - connection_url - ) + connection_host, connection_port = parse_url_to_host_port(connection_url) return_value = wrapped(*args, **kwargs) @@ -238,9 +228,7 @@ def __init__( self.tracer = tracer self.wrap_properties = wrap_properties or {} - def __call__( - self, wrapped: Any, instance: Any, args: Any, kwargs: Any - ) -> Any: + def __call__(self, wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any: """ Wraps the original function to inject tracing headers. """ @@ -259,9 +247,7 @@ def _create_db_span( getattr(wrapped, "__name__", "unknown"), ) name = f"{SPAN_NAME_PREFIX}.{name}" - with self.tracer.start_as_current_span( - name, kind=SpanKind.CLIENT - ) as span: + with self.tracer.start_as_current_span(name, kind=SpanKind.CLIENT) as span: span.set_attribute(DbAttributes.DB_SYSTEM_NAME, "weaviate") # Extract operation name dynamically from the function call @@ -275,20 +261,14 @@ def _create_db_span( ) if collection_name: # Use a Weaviate-specific collection attribute similar to MongoDB's DB_MONGODB_COLLECTION - span.set_attribute( - "db.weaviate.collection.name", collection_name - ) + span.set_attribute("db.weaviate.collection.name", collection_name) connection_host = _connection_host_context.get() connection_port = _connection_port_context.get() if connection_host is not None: - span.set_attribute( - ServerAttributes.SERVER_ADDRESS, connection_host - ) + span.set_attribute(ServerAttributes.SERVER_ADDRESS, connection_host) if connection_port is not None: - span.set_attribute( - ServerAttributes.SERVER_PORT, connection_port - ) + span.set_attribute(ServerAttributes.SERVER_PORT, connection_port) return_value = wrapped(*args, **kwargs) @@ -296,9 +276,7 @@ def _create_db_span( if self._is_similarity_search(): documents = self._extract_documents_from_response(return_value) if documents: - span.set_attribute( - "db.weaviate.documents.count", len(documents) - ) + span.set_attribute("db.weaviate.documents.count", len(documents)) # emit the documents as events for doc in documents: # emit the document content as an event @@ -306,9 +284,7 @@ def _create_db_span( if "query" in kwargs: query = json.dumps(kwargs["query"]) attributes = { - "db.weaviate.document.content": json.dumps( - doc["content"] - ), + "db.weaviate.document.content": json.dumps(doc["content"]), } # Only add non-None values to attributes @@ -321,14 +297,10 @@ def _create_db_span( "certainty" ] if doc.get("score") is not None: - attributes["db.weaviate.document.score"] = doc[ - "score" - ] + attributes["db.weaviate.document.score"] = doc["score"] if query: attributes["db.weaviate.document.query"] = query - span.add_event( - "weaviate.document", attributes=attributes - ) + span.add_event("weaviate.document", attributes=attributes) return return_value @@ -345,9 +317,7 @@ def _is_similarity_search(self) -> bool: or "fetch_objects" in function_name.lower() ) - def _extract_documents_from_response( - self, response: Any - ) -> list[dict[str, Any]]: + def _extract_documents_from_response(self, response: Any) -> list[dict[str, Any]]: """ Extract documents from weaviate response. """ @@ -372,10 +342,7 @@ def _extract_documents_from_response( and metadata.certainty is not None ): doc["certainty"] = metadata.certainty - if ( - hasattr(metadata, "score") - and metadata.score is not None - ): + if hasattr(metadata, "score") and metadata.score is not None: doc["score"] = metadata.score documents.append(doc) @@ -413,4 +380,4 @@ def _extract_documents_from_response( __all__ = [ "WeaviateInstrumentor", -] \ No newline at end of file +] diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/config.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/config.py index d6b554c8..f80b607e 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/config.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/config.py @@ -19,4 +19,4 @@ class Config: """ # logger to handle exceptions during instrumentation - exception_logger = None \ No newline at end of file + exception_logger = None diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/mapping.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/mapping.py index 53c15c8d..d8c132ad 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/mapping.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/mapping.py @@ -179,4 +179,4 @@ "function": "add_object", "span_name": "collections.batch.add_object", }, -] \ No newline at end of file +] diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/package.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/package.py index 96cb378d..a66b3bb0 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/package.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/package.py @@ -13,4 +13,4 @@ # limitations under the License. -_instruments = ("weaviate-client >= 3.0.0,<5.0.0",) \ No newline at end of file +_instruments = ("weaviate-client >= 3.0.0,<5.0.0",) diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/utils.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/utils.py index f21f72d0..3e2c69bb 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/utils.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/utils.py @@ -55,11 +55,7 @@ def extract_collection_name( try: # Weaviate Client V4 stores this in the "request" attribute of the kwargs - if ( - kwargs - and "request" in kwargs - and hasattr(kwargs["request"], "collection") - ): + if kwargs and "request" in kwargs and hasattr(kwargs["request"], "collection"): collection_name = kwargs["request"].collection # Check if the instance has a collection attribute @@ -77,4 +73,4 @@ def extract_collection_name( pass - return None \ No newline at end of file + return None diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/version.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/version.py index 73604ab1..5fd301e2 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/version.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.1.0" \ No newline at end of file +__version__ = "0.1.0" diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_instrumentation.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_instrumentation.py index 533e5685..afdc5406 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_instrumentation.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_instrumentation.py @@ -1,7 +1,5 @@ """Tests for Weaviate instrumentation.""" -import pytest - from opentelemetry import trace from opentelemetry.instrumentation.weaviate import WeaviateInstrumentor @@ -17,10 +15,10 @@ def test_instrumentor_initialization(self, instrumentor): def test_instrument_uninstrument(self, instrumentor, tracer_provider): """Test that instrumentation can be applied and removed.""" trace.set_tracer_provider(tracer_provider) - + # Instrument instrumentor.instrument(tracer_provider=tracer_provider) - + # Uninstrument instrumentor.uninstrument() @@ -34,11 +32,11 @@ def test_instrumentation_dependencies(self, instrumentor): def test_double_instrument(self, instrumentor, tracer_provider): """Test that double instrumentation doesn't cause errors.""" trace.set_tracer_provider(tracer_provider) - + instrumentor.instrument(tracer_provider=tracer_provider) # Second instrumentation should be idempotent instrumentor.instrument(tracer_provider=tracer_provider) - + instrumentor.uninstrument() def test_uninstrument_without_instrument(self, instrumentor): diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_span_attributes.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_span_attributes.py index 82efddd4..6907a24e 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_span_attributes.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_span_attributes.py @@ -1,7 +1,5 @@ """Tests for span attributes and semantic conventions.""" -import pytest - from opentelemetry.instrumentation.weaviate.mapping import ( MAPPING_V3, MAPPING_V4, @@ -35,7 +33,7 @@ def test_v4_span_names_have_prefix(self): def test_v3_mappings_have_required_fields(self): """Test that all v3 mappings have required fields.""" required_fields = ["module", "name", "function", "span_name"] - + for mapping in MAPPING_V3: for field in required_fields: assert field in mapping, f"Missing {field} in mapping: {mapping}" @@ -44,7 +42,7 @@ def test_v3_mappings_have_required_fields(self): def test_v4_mappings_have_required_fields(self): """Test that all v4 mappings have required fields.""" required_fields = ["module", "name", "function", "span_name"] - + for mapping in MAPPING_V4: for field in required_fields: assert field in mapping, f"Missing {field} in mapping: {mapping}" @@ -53,29 +51,33 @@ def test_v4_mappings_have_required_fields(self): def test_span_names_are_unique_v3(self): """Test that v3 span names are unique.""" span_names = [m["span_name"] for m in MAPPING_V3] - assert len(span_names) == len(set(span_names)), "Duplicate span names found in v3 mappings" + assert len(span_names) == len( + set(span_names) + ), "Duplicate span names found in v3 mappings" def test_span_names_are_unique_v4(self): """Test that v4 span names are unique.""" span_names = [m["span_name"] for m in MAPPING_V4] - assert len(span_names) == len(set(span_names)), "Duplicate span names found in v4 mappings" + assert len(span_names) == len( + set(span_names) + ), "Duplicate span names found in v4 mappings" def test_v3_operation_categories(self): """Test that v3 operations are properly categorized.""" span_names = [m["span_name"] for m in MAPPING_V3] - + # Should have schema operations schema_ops = [s for s in span_names if s.startswith("schema.")] assert len(schema_ops) > 0, "No schema operations found" - + # Should have data operations data_ops = [s for s in span_names if "data" in s] assert len(data_ops) > 0, "No data operations found" - + # Should have query operations query_ops = [s for s in span_names if "query" in s or "gql" in s] assert len(query_ops) > 0, "No query operations found" - + # Should have batch operations batch_ops = [s for s in span_names if "batch" in s] assert len(batch_ops) > 0, "No batch operations found" @@ -83,15 +85,15 @@ def test_v3_operation_categories(self): def test_v4_operation_categories(self): """Test that v4 operations are properly categorized.""" span_names = [m["span_name"] for m in MAPPING_V4] - + # Should have collection operations collection_ops = [s for s in span_names if s.startswith("collections.")] assert len(collection_ops) > 0, "No collection operations found" - + # Should have query operations query_ops = [s for s in span_names if "query" in s] assert len(query_ops) > 0, "No query operations found" - + # Should have data operations data_ops = [s for s in span_names if "data" in s] assert len(data_ops) > 0, "No data operations found" @@ -99,12 +101,13 @@ def test_v4_operation_categories(self): def test_module_paths_are_valid_python(self): """Test that module paths follow Python naming conventions.""" all_mappings = MAPPING_V3 + MAPPING_V4 - + for mapping in all_mappings: module = mapping["module"] # Should be valid Python module path - assert module.replace(".", "").replace("_", "").isalnum(), \ - f"Invalid module path: {module}" + assert ( + module.replace(".", "").replace("_", "").isalnum() + ), f"Invalid module path: {module}" # Should not start or end with dot assert not module.startswith("."), f"Module starts with dot: {module}" assert not module.endswith("."), f"Module ends with dot: {module}" diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_utils.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_utils.py index d0538d2d..569d797d 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_utils.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_utils.py @@ -1,7 +1,5 @@ """Tests for utility functions.""" -import pytest - from opentelemetry.instrumentation.weaviate.utils import ( extract_collection_name, parse_url_to_host_port, @@ -59,14 +57,15 @@ class TestExtractCollectionName: def test_extract_from_args(self): """Test extracting collection name from positional args.""" + # Mock function and instance def mock_func(): pass - + instance = None args = ("MyCollection",) kwargs = {} - + result = extract_collection_name( mock_func, instance, args, kwargs, "weaviate.schema", "get" ) @@ -75,13 +74,14 @@ def mock_func(): def test_extract_from_kwargs(self): """Test extracting collection name from keyword args.""" + def mock_func(): pass - + instance = None args = () kwargs = {"class_name": "MyCollection"} - + result = extract_collection_name( mock_func, instance, args, kwargs, "weaviate.data", "create" ) @@ -90,13 +90,14 @@ def mock_func(): def test_extract_with_no_collection(self): """Test extraction when no collection name is present.""" + def mock_func(): pass - + instance = None args = () kwargs = {} - + result = extract_collection_name( mock_func, instance, args, kwargs, "weaviate.query", "raw" ) diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v3.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v3.py index 77e9e8e4..aca57a7a 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v3.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v3.py @@ -4,7 +4,7 @@ try: import weaviate - + WEAVIATE_AVAILABLE = True WEAVIATE_VERSION = int(weaviate.__version__.split(".")[0]) except ImportError: @@ -12,35 +12,31 @@ WEAVIATE_VERSION = 0 from opentelemetry import trace -from opentelemetry.instrumentation.weaviate import WeaviateInstrumentor -@pytest.mark.skipif( - not WEAVIATE_AVAILABLE, - reason="Weaviate client not available" -) +@pytest.mark.skipif(not WEAVIATE_AVAILABLE, reason="Weaviate client not available") class TestWeaviateV3Instrumentation: """Test Weaviate v3 client instrumentation.""" def test_v3_operations_mapped(self): """Test that v3 operations are properly mapped.""" from opentelemetry.instrumentation.weaviate.mapping import MAPPING_V3 - + # Verify key v3 operations are mapped span_names = [m["span_name"] for m in MAPPING_V3] - + # Schema operations assert "schema.get" in span_names assert "schema.create_class" in span_names assert "schema.delete_class" in span_names - + # Data operations assert "data.crud_data.create" in span_names assert "data.crud_data.get" in span_names - + # Batch operations assert "batch.crud_batch.add_data_object" in span_names - + # Query operations assert "gql.query.get" in span_names assert "gql.query.aggregate" in span_names @@ -49,10 +45,10 @@ def test_v3_operations_mapped(self): def test_v3_modules_structure(self): """Test that v3 module paths are correct.""" from opentelemetry.instrumentation.weaviate.mapping import MAPPING_V3 - + # Check that module paths follow v3 structure modules = [m["module"] for m in MAPPING_V3] - + assert "weaviate.schema" in modules assert "weaviate.data.crud_data" in modules assert "weaviate.batch.crud_batch" in modules @@ -60,13 +56,13 @@ def test_v3_modules_structure(self): assert "weaviate.gql.get" in modules @pytest.mark.skipif( - WEAVIATE_VERSION >= 4, - reason="Test only applicable for v3 client" + WEAVIATE_VERSION >= 4, reason="Test only applicable for v3 client" ) def test_v3_client_detection(self, instrumentor, tracer_provider): """Test that v3 client is correctly detected when installed.""" trace.set_tracer_provider(tracer_provider) instrumentor.instrument(tracer_provider=tracer_provider) - + from opentelemetry.instrumentation.weaviate import weaviate_version, WEAVIATE_V3 + assert weaviate_version == WEAVIATE_V3 diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v4.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v4.py index 103244c3..471dbf68 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v4.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/tests/test_weaviate_v4.py @@ -4,8 +4,7 @@ try: import weaviate - from weaviate.collections.classes.config import Configure - + WEAVIATE_AVAILABLE = True WEAVIATE_VERSION = int(weaviate.__version__.split(".")[0]) except ImportError: @@ -13,14 +12,11 @@ WEAVIATE_VERSION = 0 from opentelemetry import trace -from opentelemetry.instrumentation.weaviate import WeaviateInstrumentor -from opentelemetry.semconv.attributes import db_attributes as DbAttributes -from opentelemetry.semconv.attributes import server_attributes as ServerAttributes @pytest.mark.skipif( not WEAVIATE_AVAILABLE or WEAVIATE_VERSION < 4, - reason="Weaviate v4 client not available" + reason="Weaviate v4 client not available", ) class TestWeaviateV4Instrumentation: """Test Weaviate v4 client instrumentation.""" @@ -29,9 +25,10 @@ def test_version_detection(self, instrumentor, tracer_provider): """Test that v4 client is correctly detected.""" trace.set_tracer_provider(tracer_provider) instrumentor.instrument(tracer_provider=tracer_provider) - + # Version should be detected as v4 from opentelemetry.instrumentation.weaviate import weaviate_version, WEAVIATE_V4 + assert weaviate_version == WEAVIATE_V4 @pytest.mark.integration @@ -41,7 +38,7 @@ def test_connect_to_local_instrumented( """Test that connect_to_local creates spans.""" trace.set_tracer_provider(tracer_provider) instrumentor.instrument(tracer_provider=tracer_provider) - + try: # This will fail if Weaviate is not running, but we can still check instrumentation client = weaviate.connect_to_local() @@ -49,17 +46,17 @@ def test_connect_to_local_instrumented( except Exception: # Expected if Weaviate is not running pass - + # Check that some instrumentation occurred # Note: This test requires a running Weaviate instance for full validation def test_collection_operations_span_names(self): """Test that collection operations have correct span names.""" from opentelemetry.instrumentation.weaviate.mapping import MAPPING_V4 - + # Verify key operations are mapped span_names = [m["span_name"] for m in MAPPING_V4] - + assert "collections.create" in span_names assert "collections.get" in span_names assert "collections.delete" in span_names @@ -69,13 +66,11 @@ def test_collection_operations_span_names(self): def test_query_operations_mapped(self): """Test that query operations are properly mapped.""" from opentelemetry.instrumentation.weaviate.mapping import MAPPING_V4 - - query_operations = [ - m for m in MAPPING_V4 if "query" in m["span_name"] - ] - + + query_operations = [m for m in MAPPING_V4 if "query" in m["span_name"]] + assert len(query_operations) > 0 - + # Check for specific query operations query_span_names = [op["span_name"] for op in query_operations] assert "collections.query.near_text" in query_span_names From e945219788423abd2b1d08d7b8901c7527afafff Mon Sep 17 00:00:00 2001 From: JWinermaSplunk Date: Wed, 10 Dec 2025 15:50:08 -0800 Subject: [PATCH 6/6] address comments --- .../instrumentation/weaviate/__init__.py | 31 ++++++------------- .../instrumentation/weaviate/utils.py | 7 +++-- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py index 4cd3b00b..d9bb8116 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/__init__.py @@ -119,7 +119,7 @@ def _instrument(self, **kwargs: Any) -> None: # Default to V3 if version parsing fails weaviate_version = WEAVIATE_V3 - self._get_server_details(weaviate_version, tracer) + self._instrument_client(weaviate_version, tracer) wrappings = MAPPING_V3 if weaviate_version == WEAVIATE_V3 else MAPPING_V4 for to_wrap in wrappings: @@ -155,7 +155,7 @@ def _uninstrument(self, **kwargs: Any) -> None: # Ignore errors when unwrapping connection methods pass - def _get_server_details(self, version: int, tracer: Tracer) -> None: + def _instrument_client(self, version: int, tracer: Tracer) -> None: name = "Client.__init__" if version == WEAVIATE_V4: name = "WeaviateClient.__init__" @@ -168,10 +168,7 @@ def _get_server_details(self, version: int, tracer: Tracer) -> None: class _WeaviateConnectionInjectionWrapper: - """ - A wrapper that intercepts calls to weaviate connection methods to inject tracing headers. - This is used to create spans for Weaviate connection operations. - """ + """A wrapper that intercepts Weaviate client initialization to capture server connection details.""" def __init__(self, tracer: Tracer): self.tracer = tracer @@ -217,10 +214,7 @@ def __call__(self, wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any: class _WeaviateTraceInjectionWrapper: - """ - A wrapper that intercepts calls to weaviate to inject tracing headers. - This is used to create spans for Weaviate operations. - """ + """A wrapper that intercepts Weaviate client operations to create database spans with Weaviate attributes.""" def __init__( self, tracer: Tracer, wrap_properties: Optional[Dict[str, str]] = None @@ -229,9 +223,7 @@ def __init__( self.wrap_properties = wrap_properties or {} def __call__(self, wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any: - """ - Wraps the original function to inject tracing headers. - """ + """Wraps the original Weaviate operation to create a tracing span.""" if not is_instrumentation_enabled(): return wrapped(*args, **kwargs) @@ -305,9 +297,7 @@ def _create_db_span( return return_value def _is_similarity_search(self) -> bool: - """ - Check if this is a similarity search operation. - """ + """Check if the operation is a similarity search.""" module_name = self.wrap_properties.get("module", "") function_name = self.wrap_properties.get("function", "") return ( @@ -318,9 +308,7 @@ def _is_similarity_search(self) -> bool: ) def _extract_documents_from_response(self, response: Any) -> list[dict[str, Any]]: - """ - Extract documents from weaviate response. - """ + """Extract documents from weaviate response.""" documents: list[dict[str, Any]] = [] try: if hasattr(response, "objects"): @@ -372,8 +360,9 @@ def _extract_documents_from_response(self, response: Any) -> list[dict[str, Any] ): doc["score"] = metadata["score"] documents.append(doc) - except Exception: - # silently handle extraction errors + except Exception as e: + if Config.exception_logger: + Config.exception_logger(e) pass return documents diff --git a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/utils.py b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/utils.py index 3e2c69bb..9b4837da 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/utils.py +++ b/instrumentation-genai/opentelemetry-instrumentation-weaviate/src/opentelemetry/instrumentation/weaviate/utils.py @@ -16,6 +16,8 @@ from typing import Any, Optional, Tuple from urllib.parse import urlparse +from opentelemetry.instrumentation.weaviate.config import Config + # TODO: get semconv for vector databases # from opentelemetry.semconv._incubating.attributes import gen_ai_attributes as GenAI @@ -68,9 +70,10 @@ def extract_collection_name( return collection_name - except Exception: + except Exception as e: # Silently ignore any errors during extraction to avoid breaking the tracing - + if Config.exception_logger: + Config.exception_logger(e) pass return None