From ea32814ef1d4aff63edee0fb73c48b53c25969ee Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Thu, 20 Nov 2025 15:04:04 -0500 Subject: [PATCH 1/4] Add performance tracking to embedding search --- server/api/services/embedding_services.py | 94 ++++++++++++++++++++--- 1 file changed, 84 insertions(+), 10 deletions(-) diff --git a/server/api/services/embedding_services.py b/server/api/services/embedding_services.py index 6fd34d35..2c51d8cb 100644 --- a/server/api/services/embedding_services.py +++ b/server/api/services/embedding_services.py @@ -1,5 +1,7 @@ # services/embedding_services.py +import time +import logging from pgvector.django import L2Distance from .sentencetTransformer_model import TransformerModel @@ -7,9 +9,12 @@ # Adjust import path as needed from ..models.model_embeddings import Embeddings +# Configure logging +logger = logging.getLogger(__name__) + def get_closest_embeddings( - user, message_data, document_name=None, guid=None, num_results=10 + user, message_data, document_name=None, guid=None, num_results=10, return_metrics=False ): """ Find the closest embeddings to a given message for a specific user. @@ -26,22 +31,46 @@ def get_closest_embeddings( Filter results to a specific document GUID (takes precedence over document_name) num_results : int, default 10 Maximum number of results to return + return_metrics : bool, default False + If True, return a tuple of (results, metrics) instead of just results Returns ------- - list[dict] - List of dictionaries containing embedding results with keys: - - name: document name - - text: embedded text content - - page_number: page number in source document - - chunk_number: chunk number within the document - - distance: L2 distance from query embedding - - file_id: GUID of the source file + list[dict] or tuple[list[dict], dict] + If return_metrics is False (default): + List of dictionaries containing embedding results with keys: + - name: document name + - text: embedded text content + - page_number: page number in source document + - chunk_number: chunk number within the document + - distance: L2 distance from query embedding + - file_id: GUID of the source file + + If return_metrics is True: + Tuple of (results, metrics) where metrics is a dictionary containing: + - encoding_time: Time to encode query (seconds) + - db_query_time: Time for database query (seconds) + - total_time: Total execution time (seconds) + - total_embeddings: Number of embeddings searched + - num_results_returned: Number of results returned + - avg_similarity: Average similarity score (0-1) + - min_distance: Minimum L2 distance + - max_distance: Maximum L2 distance + - avg_distance: Average L2 distance """ - # + # Track total execution time + start_time = time.time() + + # Track transformer encoding time + encoding_start = time.time() transformerModel = TransformerModel.get_instance().model embedding_message = transformerModel.encode(message_data) + encoding_time = time.time() - encoding_start + + # Track database query time + db_query_start = time.time() + # Start building the query based on the message's embedding closest_embeddings_query = ( Embeddings.objects.filter(upload_file__uploaded_by=user) @@ -51,6 +80,9 @@ def get_closest_embeddings( .order_by("distance") ) + # Get total embeddings in search space before filtering + total_embeddings = closest_embeddings_query.count() + # Filter by GUID if provided, otherwise filter by document name if provided if guid: closest_embeddings_query = closest_embeddings_query.filter( @@ -75,4 +107,46 @@ def get_closest_embeddings( for obj in closest_embeddings_query ] + db_query_time = time.time() - db_query_start + total_time = time.time() - start_time + + # Calculate distance/similarity statistics + num_results_returned = len(results) + if num_results_returned > 0: + distances = [r["distance"] for r in results] + min_distance = min(distances) + max_distance = max(distances) + avg_distance = sum(distances) / num_results_returned + # Convert distance to similarity score (1 - distance for L2) + avg_similarity = 1 - avg_distance + else: + min_distance = max_distance = avg_distance = avg_similarity = 0.0 + + # Log performance metrics similar to assistant/views.py pattern + logger.info( + f"Embedding search completed: " + f"Encoding time: {encoding_time:.3f}s, " + f"DB query time: {db_query_time:.3f}s, " + f"Total time: {total_time:.3f}s, " + f"Searched: {total_embeddings} embeddings, " + f"Returned: {num_results_returned} results, " + f"Avg similarity: {avg_similarity:.3f}, " + f"Distance range: [{min_distance:.3f}, {max_distance:.3f}]" + ) + + # Optionally return metrics along with results + if return_metrics: + metrics = { + "encoding_time": encoding_time, + "db_query_time": db_query_time, + "total_time": total_time, + "total_embeddings": total_embeddings, + "num_results_returned": num_results_returned, + "avg_similarity": avg_similarity, + "min_distance": min_distance, + "max_distance": max_distance, + "avg_distance": avg_distance, + } + return results, metrics + return results From 1fc41a76ba12963b707ecc46157645bba56db449 Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Mon, 24 Nov 2025 17:08:21 -0500 Subject: [PATCH 2/4] Simplify embedding search --- server/api/services/embedding_services.py | 45 +++++++---------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/server/api/services/embedding_services.py b/server/api/services/embedding_services.py index 2c51d8cb..1828b81c 100644 --- a/server/api/services/embedding_services.py +++ b/server/api/services/embedding_services.py @@ -1,18 +1,13 @@ -# services/embedding_services.py - import time import logging + from pgvector.django import L2Distance from .sentencetTransformer_model import TransformerModel - -# Adjust import path as needed from ..models.model_embeddings import Embeddings -# Configure logging logger = logging.getLogger(__name__) - def get_closest_embeddings( user, message_data, document_name=None, guid=None, num_results=10, return_metrics=False ): @@ -51,24 +46,19 @@ def get_closest_embeddings( - encoding_time: Time to encode query (seconds) - db_query_time: Time for database query (seconds) - total_time: Total execution time (seconds) - - total_embeddings: Number of embeddings searched - num_results_returned: Number of results returned - - avg_similarity: Average similarity score (0-1) - min_distance: Minimum L2 distance - max_distance: Maximum L2 distance - avg_distance: Average L2 distance """ - # Track total execution time start_time = time.time() - # Track transformer encoding time encoding_start = time.time() transformerModel = TransformerModel.get_instance().model embedding_message = transformerModel.encode(message_data) encoding_time = time.time() - encoding_start - # Track database query time db_query_start = time.time() # Start building the query based on the message's embedding @@ -80,10 +70,7 @@ def get_closest_embeddings( .order_by("distance") ) - # Get total embeddings in search space before filtering - total_embeddings = closest_embeddings_query.count() - - # Filter by GUID if provided, otherwise filter by document name if provided + # Filtering results to a document GUID takes precedence over filtering results to document name if guid: closest_embeddings_query = closest_embeddings_query.filter( upload_file__guid=guid @@ -95,6 +82,7 @@ def get_closest_embeddings( closest_embeddings_query = closest_embeddings_query[:num_results] # Format the results to be returned + # TODO: Research improving the query evaluation performance results = [ { "name": obj.name, @@ -112,37 +100,30 @@ def get_closest_embeddings( # Calculate distance/similarity statistics num_results_returned = len(results) - if num_results_returned > 0: - distances = [r["distance"] for r in results] - min_distance = min(distances) - max_distance = max(distances) - avg_distance = sum(distances) / num_results_returned - # Convert distance to similarity score (1 - distance for L2) - avg_similarity = 1 - avg_distance - else: - min_distance = max_distance = avg_distance = avg_similarity = 0.0 - - # Log performance metrics similar to assistant/views.py pattern + + #TODO: Handle user having no uploaded docs or doc filtering returning no matches + + distances = [r["distance"] for r in results] + min_distance = min(distances) + max_distance = max(distances) + avg_distance = sum(distances) / num_results_returned + logger.info( f"Embedding search completed: " f"Encoding time: {encoding_time:.3f}s, " f"DB query time: {db_query_time:.3f}s, " f"Total time: {total_time:.3f}s, " - f"Searched: {total_embeddings} embeddings, " f"Returned: {num_results_returned} results, " - f"Avg similarity: {avg_similarity:.3f}, " - f"Distance range: [{min_distance:.3f}, {max_distance:.3f}]" + f"Distance range: [{min_distance:.3f}, {max_distance:.3f}], " + f"Average distance: {avg_distance:.3f}" ) - # Optionally return metrics along with results if return_metrics: metrics = { "encoding_time": encoding_time, "db_query_time": db_query_time, "total_time": total_time, - "total_embeddings": total_embeddings, "num_results_returned": num_results_returned, - "avg_similarity": avg_similarity, "min_distance": min_distance, "max_distance": max_distance, "avg_distance": avg_distance, From 156644be05058b6afe8519bf2ae266158a9d00f2 Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Tue, 25 Nov 2025 19:18:40 -0500 Subject: [PATCH 3/4] Add persistent tracking for semantic search performance and usage --- server/api/models/model_search_usage.py | 42 +++++++++ server/api/services/embedding_services.py | 110 ++++++++++------------ 2 files changed, 92 insertions(+), 60 deletions(-) create mode 100644 server/api/models/model_search_usage.py diff --git a/server/api/models/model_search_usage.py b/server/api/models/model_search_usage.py new file mode 100644 index 00000000..cdc3dee6 --- /dev/null +++ b/server/api/models/model_search_usage.py @@ -0,0 +1,42 @@ +import uuid + +from django.db import models +from django.conf import settings + +class SemanticSearchUsage(models.Model): + """ + Tracks performance metrics and usage data for embedding searches. + """ + guid = models.UUIDField(unique=True, default=uuid.uuid4, editable=False) + timestamp = models.DateTimeField(auto_now_add=True) + query_text = models.TextField(blank=True, null=True, help_text="The search query text") + document_name = models.TextField(blank=True, null=True, help_text="Document name filter if used") + document_guid = models.UUIDField(blank=True, null=True, help_text="Document GUID filter if used") + num_results_requested = models.IntegerField(default=10, help_text="Number of results requested") + user = models.ForeignKey( + settings.AUTH_USER_MODEL, + on_delete=models.CASCADE, + related_name='semantic_searches', + null=True, + blank=True, + help_text="User who performed the search (null for unauthenticated users)" + ) + encoding_time = models.FloatField(help_text="Time to encode query in seconds") + db_query_time = models.FloatField(help_text="Time for database query in seconds") + num_results_returned = models.IntegerField(help_text="Number of results returned") + min_distance = models.FloatField(null=True, blank=True, help_text="Minimum L2 distance (null if no results)") + max_distance = models.FloatField(null=True, blank=True, help_text="Maximum L2 distance (null if no results)") + median_distance = models.FloatField(null=True, blank=True, help_text="Median L2 distance (null if no results)") + + + class Meta: + ordering = ['-timestamp'] + indexes = [ + models.Index(fields=['-timestamp']), + models.Index(fields=['user', '-timestamp']), + ] + + def __str__(self): + total_time = self.encoding_time + self.db_query_time + user_display = self.user.email if self.user else "Anonymous" + return f"Search by {user_display} at {self.timestamp} ({total_time:.3f}s)" diff --git a/server/api/services/embedding_services.py b/server/api/services/embedding_services.py index 1828b81c..c937f757 100644 --- a/server/api/services/embedding_services.py +++ b/server/api/services/embedding_services.py @@ -1,15 +1,17 @@ import time import logging +from statistics import median from pgvector.django import L2Distance from .sentencetTransformer_model import TransformerModel from ..models.model_embeddings import Embeddings +from ..models.model_search_usage import SemanticSearchUsage logger = logging.getLogger(__name__) def get_closest_embeddings( - user, message_data, document_name=None, guid=None, num_results=10, return_metrics=False + user, message_data, document_name=None, guid=None, num_results=10 ): """ Find the closest embeddings to a given message for a specific user. @@ -26,34 +28,19 @@ def get_closest_embeddings( Filter results to a specific document GUID (takes precedence over document_name) num_results : int, default 10 Maximum number of results to return - return_metrics : bool, default False - If True, return a tuple of (results, metrics) instead of just results Returns ------- - list[dict] or tuple[list[dict], dict] - If return_metrics is False (default): - List of dictionaries containing embedding results with keys: - - name: document name - - text: embedded text content - - page_number: page number in source document - - chunk_number: chunk number within the document - - distance: L2 distance from query embedding - - file_id: GUID of the source file - - If return_metrics is True: - Tuple of (results, metrics) where metrics is a dictionary containing: - - encoding_time: Time to encode query (seconds) - - db_query_time: Time for database query (seconds) - - total_time: Total execution time (seconds) - - num_results_returned: Number of results returned - - min_distance: Minimum L2 distance - - max_distance: Maximum L2 distance - - avg_distance: Average L2 distance + list[dict] + List of dictionaries containing embedding results with keys: + - name: document name + - text: embedded text content + - page_number: page number in source document + - chunk_number: chunk number within the document + - distance: L2 distance from query embedding + - file_id: GUID of the source file """ - start_time = time.time() - encoding_start = time.time() transformerModel = TransformerModel.get_instance().model embedding_message = transformerModel.encode(message_data) @@ -61,7 +48,7 @@ def get_closest_embeddings( db_query_start = time.time() - # Start building the query based on the message's embedding + # Django QuerySets are lazily evaluated closest_embeddings_query = ( Embeddings.objects.filter(upload_file__uploaded_by=user) .annotate( @@ -70,7 +57,7 @@ def get_closest_embeddings( .order_by("distance") ) - # Filtering results to a document GUID takes precedence over filtering results to document name + # Filtering to a document GUID takes precedence over a document name if guid: closest_embeddings_query = closest_embeddings_query.filter( upload_file__guid=guid @@ -78,10 +65,10 @@ def get_closest_embeddings( elif document_name: closest_embeddings_query = closest_embeddings_query.filter(name=document_name) - # Slice the results to limit to num_results + # Slicing is equivalent to SQL's LIMIT clause closest_embeddings_query = closest_embeddings_query[:num_results] - # Format the results to be returned + # Iterating evaluates the QuerySet and hits the database # TODO: Research improving the query evaluation performance results = [ { @@ -96,38 +83,41 @@ def get_closest_embeddings( ] db_query_time = time.time() - db_query_start - total_time = time.time() - start_time - - # Calculate distance/similarity statistics - num_results_returned = len(results) - - #TODO: Handle user having no uploaded docs or doc filtering returning no matches - - distances = [r["distance"] for r in results] - min_distance = min(distances) - max_distance = max(distances) - avg_distance = sum(distances) / num_results_returned - - logger.info( - f"Embedding search completed: " - f"Encoding time: {encoding_time:.3f}s, " - f"DB query time: {db_query_time:.3f}s, " - f"Total time: {total_time:.3f}s, " - f"Returned: {num_results_returned} results, " - f"Distance range: [{min_distance:.3f}, {max_distance:.3f}], " - f"Average distance: {avg_distance:.3f}" - ) - if return_metrics: - metrics = { - "encoding_time": encoding_time, - "db_query_time": db_query_time, - "total_time": total_time, - "num_results_returned": num_results_returned, - "min_distance": min_distance, - "max_distance": max_distance, - "avg_distance": avg_distance, - } - return results, metrics + try: + # Handle user having no uploaded docs or doc filtering returning no matches + if results: + distances = [r["distance"] for r in results] + SemanticSearchUsage.objects.create( + query_text=message_data, + user=user if (user and user.is_authenticated) else None, + document_guid=guid, + document_name=document_name, + num_results_requested=num_results, + encoding_time=encoding_time, + db_query_time=db_query_time, + num_results_returned=len(results), + max_distance=max(distances), + median_distance=median(distances), + min_distance=min(distances) + ) + else: + logger.warning("Semantic search returned no results") + + SemanticSearchUsage.objects.create( + query_text=message_data, + user=user if (user and user.is_authenticated) else None, + document_guid=guid, + document_name=document_name, + num_results_requested=num_results, + encoding_time=encoding_time, + db_query_time=db_query_time, + num_results_returned=0, + max_distance=None, + median_distance=None, + min_distance=None + ) + except Exception as e: + logger.error(f"Failed to create semantic search usage database record: {e}") return results From 6a843596d50076e1c1877b0ebf0a32396880a0e0 Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Wed, 26 Nov 2025 17:20:25 -0500 Subject: [PATCH 4/4] Add semantic search usage migration file --- .../migrations/0015_semanticsearchusage.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 server/api/migrations/0015_semanticsearchusage.py diff --git a/server/api/migrations/0015_semanticsearchusage.py b/server/api/migrations/0015_semanticsearchusage.py new file mode 100644 index 00000000..0475b71f --- /dev/null +++ b/server/api/migrations/0015_semanticsearchusage.py @@ -0,0 +1,39 @@ +# Generated by Django 4.2.3 on 2025-11-26 21:02 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion +import uuid + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0014_alter_medrule_rule_type'), + ] + + operations = [ + migrations.CreateModel( + name='SemanticSearchUsage', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('guid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)), + ('timestamp', models.DateTimeField(auto_now_add=True)), + ('query_text', models.TextField(blank=True, help_text='The search query text', null=True)), + ('document_name', models.TextField(blank=True, help_text='Document name filter if used', null=True)), + ('document_guid', models.UUIDField(blank=True, help_text='Document GUID filter if used', null=True)), + ('num_results_requested', models.IntegerField(default=10, help_text='Number of results requested')), + ('encoding_time', models.FloatField(help_text='Time to encode query in seconds')), + ('db_query_time', models.FloatField(help_text='Time for database query in seconds')), + ('num_results_returned', models.IntegerField(help_text='Number of results returned')), + ('min_distance', models.FloatField(blank=True, help_text='Minimum L2 distance (null if no results)', null=True)), + ('max_distance', models.FloatField(blank=True, help_text='Maximum L2 distance (null if no results)', null=True)), + ('median_distance', models.FloatField(blank=True, help_text='Median L2 distance (null if no results)', null=True)), + ('user', models.ForeignKey(blank=True, help_text='User who performed the search (null for unauthenticated users)', null=True, on_delete=django.db.models.deletion.CASCADE, related_name='semantic_searches', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'ordering': ['-timestamp'], + 'indexes': [models.Index(fields=['-timestamp'], name='api_semanti_timesta_0b5730_idx'), models.Index(fields=['user', '-timestamp'], name='api_semanti_user_id_e11ecb_idx')], + }, + ), + ]