diff --git a/generate-combinations.py b/generate-combinations.py index fbb93dc..3f4c77a 100644 --- a/generate-combinations.py +++ b/generate-combinations.py @@ -1,4 +1,3 @@ - import itertools import argparse import sys @@ -42,7 +41,11 @@ invariants["vectorDimension"] = dataset_info["vector_dimension"] print("sweep: " + sweep) for param, value in sweeps[sweep].get("common-params", {}).items(): - if not isinstance(value, list): + # efSearch is always passed through as a list — Java handles the iteration + if param == 'efSearch': + # Ensure it's always a list + invariants[param] = value if isinstance(value, list) else [value] + elif not isinstance(value, list): invariants[param] = value else: variants[param] = value @@ -54,7 +57,10 @@ for param, value in algorithms[algo].items(): if param not in ["params"]: - if not isinstance(value, list): + # efSearch is always passed through as a list — Java handles the iteration + if param == 'efSearch': + algo_invariants[param] = value if isinstance(value, list) else [value] + elif not isinstance(value, list): algo_invariants[param] = value else: algo_variants[param] = value @@ -62,138 +68,53 @@ # Generate all combination of variants. For each combination, generate a hashed ID, and a file with the # name pattern as --.json. The file should contain the invariants as is, and the variants as the current combination. if algo_variants: - # Separate efSearch from other variants if it exists - efSearch_values = None - other_variant_keys = [] - other_variant_values = [] - - for key, value in algo_variants.items(): - if key == 'efSearch': - efSearch_values = value - else: - other_variant_keys.append(key) - other_variant_values.append(value) - - # Generate combinations with efSearch at the beginning (innermost loop) - if efSearch_values and other_variant_keys: - # Generate combinations of other parameters first - for other_combination in itertools.product(*other_variant_values): - other_variants = dict(zip(other_variant_keys, other_combination)) - # Then iterate through efSearch values - for ef_index, ef_value in enumerate(efSearch_values): - current_variants = other_variants.copy() - current_variants['efSearch'] = ef_value - - # Skip if cagraIntermediateDegree < cagraGraphDegree - if 'cagraIntermediateDegree' in current_variants and 'cagraGraphDegree' in current_variants: - if current_variants['cagraIntermediateDegree'] < current_variants['cagraGraphDegree']: - print(f"\t\tSkipping combination: cagraIntermediateDegree ({current_variants['cagraIntermediateDegree']}) < cagraGraphDegree ({current_variants['cagraGraphDegree']})") - continue - - # Skip if hnswMaxConn > hnswBeamWidth - if 'hnswMaxConn' in current_variants and 'hnswBeamWidth' in current_variants: - if current_variants['hnswMaxConn'] > current_variants['hnswBeamWidth']: - print(f"\t\tSkipping combination: hnswMaxConn ({current_variants['hnswMaxConn']}) > hnswBeamWidth ({current_variants['hnswBeamWidth']})") - continue - - # Generate hash only from other_variants (excluding efSearch) - base_hash = hashlib.md5(json.dumps(other_variants, sort_keys=True).encode()).hexdigest()[:8] - hash_id = f"{base_hash}-ef{ef_value}" - - config = algo_invariants.copy() - config.update(current_variants) - - # For multiple efSearch combinations: subsequent ones skip indexing - if len(efSearch_values) > 1 and ef_index > 0: - config['skipIndexing'] = True - - # Set cleanIndexDirectory based on position - if ef_index == 0: - config['cleanIndexDirectory'] = False - elif ef_index == len(efSearch_values) - 1: - config['cleanIndexDirectory'] = True - else: - config['cleanIndexDirectory'] = False - - # Use base_hash for index directory paths - if 'hnswIndexDirPath' in config: - config['hnswIndexDirPath'] = f"hnswIndex-{base_hash}" - if 'cuvsIndexDirPath' in config: - config['cuvsIndexDirPath'] = f"cuvsIndex-{base_hash}" - - filename = f"{algo}-{hash_id}.json" - sweep_dir = f"{args.configs_dir}/{sweep}" - filepath = f"{sweep_dir}/{filename}" - os.makedirs(sweep_dir, exist_ok=True) - with open(filepath, 'w') as f: - json.dump(config, f, indent=2) - print(f"\tGenerated config file: {filepath}") - elif efSearch_values: - # Only efSearch values, no other variants - for ef_index, ef_value in enumerate(efSearch_values): - current_variants = {'efSearch': ef_value} - # Generate hash from empty dict since no other variants exist - base_hash = hashlib.md5(json.dumps({}, sort_keys=True).encode()).hexdigest()[:8] - hash_id = f"{base_hash}-ef{ef_value}" - - config = algo_invariants.copy() - config.update(current_variants) - - # For multiple efSearch combinations: subsequent ones skip indexing - if len(efSearch_values) > 1 and ef_index > 0: - config['skipIndexing'] = True - - # Set cleanIndexDirectory based on position - if ef_index == 0: - config['cleanIndexDirectory'] = False - elif ef_index == len(efSearch_values) - 1: - config['cleanIndexDirectory'] = True - else: - config['cleanIndexDirectory'] = False - - # Use base_hash for index directory paths - if 'hnswIndexDirPath' in config: - config['hnswIndexDirPath'] = f"hnswIndex-{base_hash}" - if 'cuvsIndexDirPath' in config: - config['cuvsIndexDirPath'] = f"cuvsIndex-{base_hash}" - - filename = f"{algo}-{hash_id}.json" - sweep_dir = f"{args.configs_dir}/{sweep}" - filepath = f"{sweep_dir}/{filename}" - os.makedirs(sweep_dir, exist_ok=True) - with open(filepath, 'w') as f: - json.dump(config, f, indent=2) - print(f"\tGenerated config file: {filepath}") - else: - # No efSearch, use original logic - variant_keys = list(algo_variants.keys()) - variant_values = list(algo_variants.values()) - for combination in itertools.product(*variant_values): - current_variants = dict(zip(variant_keys, combination)) - - # Skip if cagraIntermediateDegree < cagraGraphDegree - if 'cagraIntermediateDegree' in current_variants and 'cagraGraphDegree' in current_variants: - if current_variants['cagraIntermediateDegree'] < current_variants['cagraGraphDegree']: - print(f"\t\tSkipping combination: cagraIntermediateDegree ({current_variants['cagraIntermediateDegree']}) < cagraGraphDegree ({current_variants['cagraGraphDegree']})") - continue - - # Skip if hnswMaxConn > hnswBeamWidth - if 'hnswMaxConn' in current_variants and 'hnswBeamWidth' in current_variants: - if current_variants['hnswMaxConn'] > current_variants['hnswBeamWidth']: - print(f"\t\tSkipping combination: hnswMaxConn ({current_variants['hnswMaxConn']}) > hnswBeamWidth ({current_variants['hnswBeamWidth']})") - continue - - hash_id = hashlib.md5(json.dumps(current_variants, sort_keys=True).encode()).hexdigest()[:8] - - config = algo_invariants.copy() - config.update(current_variants) - filename = f"{algo}-{hash_id}.json" - sweep_dir = f"{args.configs_dir}/{sweep}" - filepath = f"{sweep_dir}/{filename}" - os.makedirs(sweep_dir, exist_ok=True) - with open(filepath, 'w') as f: - json.dump(config, f, indent=2) - print(f"\tGenerated config file: {filepath}") + variant_keys = list(algo_variants.keys()) + variant_values = list(algo_variants.values()) + for combination in itertools.product(*variant_values): + current_variants = dict(zip(variant_keys, combination)) + + hash_id = hashlib.md5(json.dumps(current_variants, sort_keys=True).encode()).hexdigest()[:8] + + config = algo_invariants.copy() + config.update(current_variants) + + # Skip if cagraIntermediateGraphDegree < cagraGraphDegree + # (CAGRA silently clamps graphDegree down to intermediateGraphDegree, + # which produces duplicate test runs) + if config.get('cagraIntermediateGraphDegree', float('inf')) < config.get('cagraGraphDegree', 0): + print(f"\t\tSkipping combination: cagraIntermediateGraphDegree ({config['cagraIntermediateGraphDegree']}) < cagraGraphDegree ({config['cagraGraphDegree']})") + continue + + # Skip if hnswMaxConn > hnswBeamWidth + if config.get('hnswMaxConn', 0) > config.get('hnswBeamWidth', float('inf')): + print(f"\t\tSkipping combination: hnswMaxConn ({config['hnswMaxConn']}) > hnswBeamWidth ({config['hnswBeamWidth']})") + continue + + # Set indexDirPath based on hash + config['indexDirPath'] = f"index-{hash_id}" + + filename = f"{algo}-{hash_id}.json" + sweep_dir = f"{args.configs_dir}/{sweep}" + filepath = f"{sweep_dir}/{filename}" + os.makedirs(sweep_dir, exist_ok=True) + with open(filepath, 'w') as f: + json.dump(config, f, indent=2) + print(f"\tGenerated config file: {filepath}") + else: + # No variants at all, just generate a single config + hash_id = hashlib.md5(json.dumps(algo_invariants, sort_keys=True).encode()).hexdigest()[:8] + config = algo_invariants.copy() + + # Set indexDirPath based on hash + config['indexDirPath'] = f"index-{hash_id}" + + filename = f"{algo}-{hash_id}.json" + sweep_dir = f"{args.configs_dir}/{sweep}" + filepath = f"{sweep_dir}/{filename}" + os.makedirs(sweep_dir, exist_ok=True) + with open(filepath, 'w') as f: + json.dump(config, f, indent=2) + print(f"\tGenerated config file: {filepath}") print("----------------------") diff --git a/src/main/java/com/searchscale/lucene/cuvs/benchmarks/BenchmarkConfiguration.java b/src/main/java/com/searchscale/lucene/cuvs/benchmarks/BenchmarkConfiguration.java index 1d97838..35e208a 100644 --- a/src/main/java/com/searchscale/lucene/cuvs/benchmarks/BenchmarkConfiguration.java +++ b/src/main/java/com/searchscale/lucene/cuvs/benchmarks/BenchmarkConfiguration.java @@ -5,6 +5,7 @@ import com.nvidia.cuvs.CagraIndexParams.CudaDataType; import com.nvidia.cuvs.CagraIndexParams.CuvsDistanceType; import com.searchscale.lucene.cuvs.benchmarks.LuceneCuvsBenchmarks.Codex; +import java.util.List; public class BenchmarkConfiguration { @@ -47,7 +48,7 @@ public class BenchmarkConfiguration { public int cagraITopK; public int cagraSearchWidth; public int cagraHnswLayers; // layers in CAGRA->HNSW conversion - public int efSearch; + public List efSearch; // e.g. [64] or [64, 128, 256] public CagraGraphBuildAlgo cagraGraphBuildAlgo; // CAGRA IVF_PQ parameters @@ -89,11 +90,22 @@ public boolean isCagraHNSWScalar() { return Codex.CAGRA_HNSW_SCALAR.equals(algoToRun); } - public int getEffectiveEfSearch() { - if (efSearch > 0) { + /** + * Returns the list of efSearch values to use during search. + * + *

If {@code efSearch} is set in the config JSON (e.g. [64, 128, 256]), + * those values are returned directly. Otherwise, falls back to a single-element + * list containing a default derived from topK. + * + *

The benchmark runner iterates over these values and runs search once per value + * against the same index — no rebuild is needed. + */ + public List getEfSearchValues() { + if (efSearch != null && !efSearch.isEmpty()) { return efSearch; } - return Math.max(topK, (int) Math.ceil(topK * 1.5)); + // Default: 1.5x topK, but at least topK + return List.of(Math.max(topK, (int) Math.ceil(topK * 1.5))); } public String prettyString() { @@ -128,6 +140,7 @@ public String prettyString() { sb.append("Enable TieredMerge: ").append(enableTieredMerge).append('\n'); sb.append("Num HNSW merge threads: ").append(hnswMergeThreads).append('\n'); sb.append("enableIndexWriterInfoStream: ").append(enableIndexWriterInfoStream).append('\n'); + sb.append("efSearch: ").append(getEfSearchValues()).append('\n'); sb.append("------- algo parameters ------\n"); if (isLucene()) { diff --git a/src/main/java/com/searchscale/lucene/cuvs/benchmarks/LuceneCuvsBenchmarks.java b/src/main/java/com/searchscale/lucene/cuvs/benchmarks/LuceneCuvsBenchmarks.java index 4798c7e..b06b140 100644 --- a/src/main/java/com/searchscale/lucene/cuvs/benchmarks/LuceneCuvsBenchmarks.java +++ b/src/main/java/com/searchscale/lucene/cuvs/benchmarks/LuceneCuvsBenchmarks.java @@ -296,43 +296,87 @@ public static void main(String[] args) throws Throwable { } } + // Resolve the list of efSearch values to iterate over. + // The index is built once above; we search it once per efSearch value. + List efSearchValues = config.getEfSearchValues(); + log.info( + "Will run search with {} efSearch value(s): {}", efSearchValues.size(), efSearchValues); + + // Read ground truth once (shared across all efSearch runs) + List groundTruth = Util.readGroundTruthFile(config.groundTruthFile); + Directory indexDir = MMapDirectory.open(Path.of(config.indexDirPath)); log.info("Index directory is: {} (using memory-mapped files)", indexDir); - log.info("Querying documents using {} ...", config.algoToRun); - // Always use standard Lucene search since we always create Lucene HNSW indexes - search( - indexDir, - config, - metrics, - queryResults, - Util.readGroundTruthFile(config.groundTruthFile)); - - Util.calculateRecallAccuracy(queryResults, metrics, config.algoToRun); - - String resultsJson = - Util.newObjectMapper() - .writerWithDefaultPrettyPrinter() - .writeValueAsString(Map.of("configuration", config, "metrics", metrics)); - - if (config.saveResultsOnDisk) { - // Use the resultsDirectory directly if provided - String resultsDir = config.resultsDirectory != null ? config.resultsDirectory : "results"; - File results = new File(resultsDir); - if (!results.exists()) { - results.mkdirs(); - } - // Save results.json directly to the specified directory - FileUtils.write( - new File(results.toString() + "/results.json"), resultsJson, Charset.forName("UTF-8")); + // Snapshot indexing-only metrics before the loop so that each efSearch run + // starts from the same base and doesn't inherit results from prior runs. + Map indexingMetrics = new LinkedHashMap<>(metrics); + + for (int efSearch : efSearchValues) { + log.info("--- Running search with efSearch={} ---", efSearch); + + // Fresh collections for this efSearch run + List efSearchQueryResults = + Collections.synchronizedList(new ArrayList()); + Map efSearchMetrics = new LinkedHashMap(); + + // Copy over indexing metrics (only) so they appear in every result file + efSearchMetrics.putAll(indexingMetrics); + efSearchMetrics.put("efSearch", efSearch); - // Save CSV with neighbors data - Util.writeCSV(queryResults, results.toString() + "/neighbors.csv"); + log.info("Querying documents using {} with efSearch={} ...", config.algoToRun, efSearch); + search(indexDir, config, efSearchMetrics, efSearchQueryResults, groundTruth, efSearch); - log.info("Results saved to directory: {}", resultsDir); + Util.calculateRecallAccuracy(efSearchQueryResults, efSearchMetrics, config.algoToRun); + + String resultsJson = + Util.newObjectMapper() + .writerWithDefaultPrettyPrinter() + .writeValueAsString(Map.of("configuration", config, "metrics", efSearchMetrics)); + + if (config.saveResultsOnDisk) { + // Use the resultsDirectory directly if provided + String resultsDir = config.resultsDirectory != null ? config.resultsDirectory : "results"; + + // When there are multiple efSearch values, create a subdirectory per value + if (efSearchValues.size() > 1) { + resultsDir = resultsDir + "/efSearch_" + efSearch; + } + + File results = new File(resultsDir); + if (!results.exists()) { + results.mkdirs(); + } + + // Save results.json directly to the specified directory + FileUtils.write( + new File(results.toString() + "/results.json"), + resultsJson, + Charset.forName("UTF-8")); + + // Save CSV with neighbors data + Util.writeCSV(efSearchQueryResults, results.toString() + "/neighbors.csv"); + + log.info("Results for efSearch={} saved to directory: {}", efSearch, resultsDir); + } + + log.info( + "\n-----\nMetrics for efSearch={}: {}\n{}\n-----", + efSearch, + efSearchMetrics, + resultsJson); + + // Accumulate per-efSearch metrics into the top-level metrics map + for (Map.Entry entry : efSearchMetrics.entrySet()) { + if (efSearchValues.size() > 1) { + metrics.put("efSearch_" + efSearch + "/" + entry.getKey(), entry.getValue()); + } else { + metrics.put(entry.getKey(), entry.getValue()); + } + } } - log.info("\n-----\nOverall metrics: " + metrics + "\nMetrics: \n" + resultsJson + "\n-----"); + log.info("\n-----\nOverall metrics: {}\n-----", metrics); // Close the index directory before cleaning indexDir.close(); @@ -430,12 +474,23 @@ private static void indexDocuments( writer.close(); } + /** + * Runs search queries against the given index directory using the specified efSearch value. + * + * @param directory the Lucene index directory to search + * @param config benchmark configuration + * @param metrics map to populate with search performance metrics + * @param queryResults list to populate with per-query results + * @param groundTruth ground truth neighbor lists for recall calculation + * @param efSearch the efSearch (number of candidates) to use for this search run + */ private static void search( Directory directory, BenchmarkConfiguration config, Map metrics, List queryResults, - List groundTruth) { + List groundTruth, + int efSearch) { DB db = null; try (IndexReader indexReader = DirectoryReader.open(directory)) { @@ -481,123 +536,130 @@ private static void search( for (int t = 0; t < config.queryThreads; t++) { pool.submit( () -> { - while (queryId.getAndIncrement() <= config.numQueriesToRun) { - int currentQueryId = queryId.get(); - KnnFloatVectorQuery query; - - if (config.algoToRun.equals(Codex.CAGRA_SEARCH)) { - int effectiveEfSearch = config.getEffectiveEfSearch(); - query = - new GPUKnnFloatVectorQuery( - config.vectorColName, - queries.get(currentQueryId), - effectiveEfSearch, - null, - config.cagraITopK, - config.cagraSearchWidth); - } else { - int effectiveEfSearch = config.getEffectiveEfSearch(); - query = - new KnnFloatVectorQuery( - config.vectorColName, queries.get(currentQueryId), effectiveEfSearch); + while (true) { + int currentQueryId = queryId.getAndIncrement(); + if (currentQueryId >= config.numQueriesToRun) { + break; } - - TopDocs topDocs; - long searchStartTime = System.nanoTime(); try { - int effectiveEfSearch = config.getEffectiveEfSearch(); - TopScoreDocCollectorManager collectorManager = - new TopScoreDocCollectorManager( - effectiveEfSearch, null, Integer.MAX_VALUE, true); - topDocs = indexSearcher.search(query, collectorManager); - } catch (IOException e) { - throw new RuntimeException("Problem during executing a query: ", e); - } - double searchTimeTakenMs = - TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - searchStartTime); - // log.info("End to end search took: " + searchTimeTakenMs); - if (currentQueryId > config.numWarmUpQueries) { - queryLatencies.put(queryId.get(), searchTimeTakenMs); - } - int finishedCount = queriesFinished.incrementAndGet(); - - // Log progress every 1000 queries - if (finishedCount % 1000 == 0 || finishedCount == config.numQueriesToRun) { - log.info( - "Done querying " - + finishedCount - + " out of " - + config.numQueriesToRun - + " queries."); - } - - ScoreDoc[] hits = topDocs.scoreDocs; - List neighbors = new ArrayList<>(); - List scores = new ArrayList<>(); + KnnFloatVectorQuery query; + + if (config.algoToRun.equals(Codex.CAGRA_SEARCH)) { + query = + new GPUKnnFloatVectorQuery( + config.vectorColName, + queries.get(currentQueryId), + efSearch, + null, + config.cagraITopK, + config.cagraSearchWidth); + } else { + query = + new KnnFloatVectorQuery( + config.vectorColName, queries.get(currentQueryId), efSearch); + } - // Debug: Log search results for first query - if (queryId.get() == 0) { - log.info( - "Debug: First query returned " - + hits.length - + " hits (ef-search candidates)"); - log.info( - "Debug: Will select top " - + config.topK - + " from " - + hits.length - + " candidates"); - } - int numResultsToTake = Math.min(config.topK, hits.length); - long retrievalStartTime = System.nanoTime(); - for (int i = 0; i < numResultsToTake; i++) { - ScoreDoc hit = hits[i]; + TopDocs topDocs; + long searchStartTime = System.nanoTime(); try { - Document d = indexReader.storedFields().document(hit.doc); - neighbors.add(Integer.parseInt(d.get("id"))); + TopScoreDocCollectorManager collectorManager = + new TopScoreDocCollectorManager(efSearch, null, Integer.MAX_VALUE, true); + topDocs = indexSearcher.search(query, collectorManager); } catch (IOException e) { - e.printStackTrace(); + throw new RuntimeException("Problem during executing a query: ", e); + } + double searchTimeTakenMs = + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - searchStartTime); + if (currentQueryId > config.numWarmUpQueries) { + queryLatencies.put(queryId.get(), searchTimeTakenMs); + } + int finishedCount = queriesFinished.incrementAndGet(); + + // Log progress every 1000 queries + if (finishedCount % 1000 == 0 || finishedCount == config.numQueriesToRun) { + log.info( + "Done querying " + + finishedCount + + " out of " + + config.numQueriesToRun + + " queries."); } - scores.add(hit.score); - } - double retrievalTimeTakenMs = - TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - retrievalStartTime); - if (currentQueryId > config.numWarmUpQueries) { - retrievalLatencies.put(queryId.get(), retrievalTimeTakenMs); - } - // Debug: Log results for all queries - log.debug( - "Query " - + currentQueryId - + " - First 5 neighbors: " - + neighbors.subList(0, Math.min(5, neighbors.size()))); - log.debug( - "Query " - + currentQueryId - + " - First 5 distances: " - + scores.subList(0, Math.min(5, scores.size()))); - int[] expectedNeighbors = groundTruth.get(currentQueryId); - log.debug( - "Query " - + currentQueryId - + " - Expected neighbors: " - + java.util.Arrays.toString( - java.util.Arrays.copyOf( - expectedNeighbors, Math.min(5, expectedNeighbors.length)))); - - if (currentQueryId > config.numWarmUpQueries) { - QueryResult result = - new QueryResult( - config.algoToRun.toString(), - currentQueryId, - neighbors, - groundTruth.get(currentQueryId), - scores, - searchTimeTakenMs); - queryResults.add(result); - } else { - log.info("Skipping warmup query: {}", currentQueryId); + ScoreDoc[] hits = topDocs.scoreDocs; + List neighbors = new ArrayList<>(); + List scores = new ArrayList<>(); + + // Debug: Log search results for first query + if (queryId.get() == 0) { + log.info( + "Debug: First query returned " + + hits.length + + " hits (ef-search candidates)"); + log.info( + "Debug: Will select top " + + config.topK + + " from " + + hits.length + + " candidates"); + } + int numResultsToTake = Math.min(config.topK, hits.length); + long retrievalStartTime = System.nanoTime(); + for (int i = 0; i < numResultsToTake; i++) { + ScoreDoc hit = hits[i]; + try { + Document d = indexReader.storedFields().document(hit.doc); + neighbors.add(Integer.parseInt(d.get("id"))); + } catch (IOException e) { + e.printStackTrace(); + } + scores.add(hit.score); + } + double retrievalTimeTakenMs = + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - retrievalStartTime); + if (currentQueryId > config.numWarmUpQueries) { + retrievalLatencies.put(queryId.get(), retrievalTimeTakenMs); + } + + // Debug: Log results for all queries + log.debug( + "Query " + + currentQueryId + + " - First 5 neighbors: " + + neighbors.subList(0, Math.min(5, neighbors.size()))); + log.debug( + "Query " + + currentQueryId + + " - First 5 distances: " + + scores.subList(0, Math.min(5, scores.size()))); + int[] expectedNeighbors = groundTruth.get(currentQueryId); + log.debug( + "Query " + + currentQueryId + + " - Expected neighbors: " + + java.util.Arrays.toString( + java.util.Arrays.copyOf( + expectedNeighbors, Math.min(5, expectedNeighbors.length)))); + + if (currentQueryId > config.numWarmUpQueries) { + QueryResult result = + new QueryResult( + config.algoToRun.toString(), + currentQueryId, + neighbors, + groundTruth.get(currentQueryId), + scores, + searchTimeTakenMs); + queryResults.add(result); + } else { + log.info("Skipping warmup query: {}", currentQueryId); + } + } catch (Exception e) { + log.error( + "Exception during query {}: {} - {}", + currentQueryId, + e.getClass().getSimpleName(), + e.getMessage(), + e); } } }); @@ -611,17 +673,33 @@ private static void search( metrics.put(config.algoToRun + "-query-time", (endTime - startTime)); metrics.put( config.algoToRun + "-query-throughput", - (config.numQueriesToRun / ((endTime - startTime) / 1000.0))); + (endTime - startTime) > 0 + ? (config.numQueriesToRun / ((endTime - startTime) / 1000.0)) + : 0.0); double avgLatency = - new ArrayList<>(queryLatencies.values()).stream().reduce(0.0, Double::sum) - / queryLatencies.size(); + queryLatencies.isEmpty() + ? 0.0 + : new ArrayList<>(queryLatencies.values()).stream().reduce(0.0, Double::sum) + / queryLatencies.size(); double avgRetLatency = - new ArrayList<>(retrievalLatencies.values()).stream().reduce(0.0, Double::sum) - / retrievalLatencies.size(); + retrievalLatencies.isEmpty() + ? 0.0 + : new ArrayList<>(retrievalLatencies.values()).stream().reduce(0.0, Double::sum) + / retrievalLatencies.size(); metrics.put(config.algoToRun + "-mean-latency", avgLatency); metrics.put(config.algoToRun + "-mean-retrieval-latency", avgRetLatency); + // Log warning if no queries completed successfully + if (queryLatencies.isEmpty()) { + log.error( + "WARNING: Zero queries completed successfully! " + + "Check the query thread exception logs above for the root cause. " + + "queriesFinished={}, queryResults.size={}", + queriesFinished.get(), + queryResults.size()); + } + int segmentCount = indexReader.leaves().size(); metrics.put(config.algoToRun + "-segment-count", segmentCount);