SearchScale · nvzm123 · Mar 30, 2026
diff --git a/convert_to_nvidia_format.py b/convert_to_nvidia_format.py
@@ -7,10 +7,14 @@
 from typing import List, Dict, Optional, Tuple
 
 
-def create_index_name(config: Dict) -> str:
-    """Create index name from configuration parameters"""
+def create_index_name(config: Dict, metrics: Dict) -> str:
+    """Create index name from configuration and metrics.
+
+    efSearch is read from metrics (where it's a scalar int per search run)
+    rather than config (where it's now a list of values to sweep).
+    """
     algorithm = config.get('algoToRun', 'UNKNOWN')
-    ef_search = config.get('efSearch', 0)
+    ef_search = metrics.get('efSearch', 0)
 
     if algorithm in ['LUCENE_HNSW', 'hnsw']:
         beam_width = config.get('hnswBeamWidth', 0)
@@ -38,7 +42,7 @@ def convert_results_to_nvidia_format(results_json_path: str, output_dir: str, da
     elif algorithm in ['hnsw', 'LUCENE_HNSW']:
         algorithm = 'LUCENE_HNSW'
 
-    index_name = create_index_name(config)
+    index_name = create_index_name(config, metrics)
 
     recall_key = next((key for key in metrics.keys() if 'recall-accuracy' in key.lower()), None)
     if not recall_key:
@@ -97,12 +101,13 @@ def convert_results_to_nvidia_format(results_json_path: str, output_dir: str, da
 
     if build_time_key:
         build_time_ms = float(metrics[build_time_key])
+        build_time_s = build_time_ms / 1000.0
 
         build_benchmark = {
             "name": f"{algorithm}/{index_name}",
-            "real_time": build_time_ms,
+            "real_time": build_time_s,
             "iterations": 1,
-            "time_unit": "ms",
+            "time_unit": "s",
             "run_name": "run_1",
             "run_type": "iteration",
             "repetitions": 1,

diff --git a/run_pareto_analysis.sh b/run_pareto_analysis.sh
@@ -81,9 +81,14 @@ import csv
 import json
 import glob
 
-def create_index_name_from_config(config):
+def create_index_name_from_results(config, metrics):
+    \"\"\"Create index name from config and metrics.
+
+    efSearch is read from metrics (scalar int per search run)
+    rather than config (now a list of values to sweep).
+    \"\"\"
     algorithm = config.get('algoToRun', 'UNKNOWN')
-    ef_search = config.get('efSearch', 0)
+    ef_search = metrics.get('efSearch', 0)
 
     if algorithm in ['LUCENE_HNSW', 'hnsw']:
         beam_width = config.get('hnswBeamWidth', 0)
@@ -144,13 +149,17 @@ for algorithm, pareto_indices in pareto_runs_by_algo.items():
 
     index_to_dir = {}
     for benchmark_dir in benchmark_dirs:
-        results_json_path = os.path.join(benchmark_dir, 'results.json')
-        if os.path.exists(results_json_path):
+        # Walk into subdirectories to find efSearch_* results
+        for root, dirs, files in os.walk(benchmark_dir):
+            if 'results.json' not in files:
+                continue
+            results_json_path = os.path.join(root, 'results.json')
             try:
                 with open(results_json_path, 'r') as f:
                     results_data = json.load(f)
 
                 config = results_data['configuration']
+                metrics = results_data['metrics']
                 algo_to_run = config.get('algoToRun')
 
                 algorithm_match = False
@@ -160,20 +169,20 @@ for algorithm, pareto_indices in pareto_runs_by_algo.items():
                     algorithm_match = True
 
                 if algorithm_match:
-                    index_name = create_index_name_from_config(config)
+                    index_name = create_index_name_from_results(config, metrics)
                     if index_name not in index_to_dir:
-                        index_to_dir[index_name] = benchmark_dir
+                        index_to_dir[index_name] = root
             except Exception as e:
-                print(f'  Error processing {benchmark_dir}: {e}')
+                print(f'  Error processing {root}: {e}')
 
     print(f'Mapped {len(index_to_dir)} configurations')
 
     matched = 0
     unmatched = 0
     for index_name, pareto_run in pareto_indices.items():
         if index_name in index_to_dir:
-            benchmark_dir = index_to_dir[index_name]
-            is_pareto_file = os.path.join(benchmark_dir, 'is_pareto')
+            result_dir = index_to_dir[index_name]
+            is_pareto_file = os.path.join(result_dir, 'is_pareto')
 
             with open(is_pareto_file, 'w') as f:
                 f.write(f'Pareto optimal run\\n')
@@ -217,4 +226,4 @@ echo ""
 echo "Final output:"
 echo "- Pareto optimal runs marked with is_pareto files"
 echo "- Plots: ${OUTPUT_DIR}/plots/"
-echo "- No intermediate files (completely cleaned up)"
+echo "- No intermediate files (completely cleaned up)"