Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions convert_to_nvidia_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@
from typing import List, Dict, Optional, Tuple


def create_index_name(config: Dict) -> str:
"""Create index name from configuration parameters"""
def create_index_name(config: Dict, metrics: Dict) -> str:
"""Create index name from configuration and metrics.

efSearch is read from metrics (where it's a scalar int per search run)
rather than config (where it's now a list of values to sweep).
"""
algorithm = config.get('algoToRun', 'UNKNOWN')
ef_search = config.get('efSearch', 0)
ef_search = metrics.get('efSearch', 0)

if algorithm in ['LUCENE_HNSW', 'hnsw']:
beam_width = config.get('hnswBeamWidth', 0)
Expand Down Expand Up @@ -38,7 +42,7 @@ def convert_results_to_nvidia_format(results_json_path: str, output_dir: str, da
elif algorithm in ['hnsw', 'LUCENE_HNSW']:
algorithm = 'LUCENE_HNSW'

index_name = create_index_name(config)
index_name = create_index_name(config, metrics)

recall_key = next((key for key in metrics.keys() if 'recall-accuracy' in key.lower()), None)
if not recall_key:
Expand Down Expand Up @@ -97,12 +101,13 @@ def convert_results_to_nvidia_format(results_json_path: str, output_dir: str, da

if build_time_key:
build_time_ms = float(metrics[build_time_key])
build_time_s = build_time_ms / 1000.0

build_benchmark = {
"name": f"{algorithm}/{index_name}",
"real_time": build_time_ms,
"real_time": build_time_s,
"iterations": 1,
"time_unit": "ms",
"time_unit": "s",
"run_name": "run_1",
"run_type": "iteration",
"repetitions": 1,
Expand Down
29 changes: 19 additions & 10 deletions run_pareto_analysis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,14 @@ import csv
import json
import glob

def create_index_name_from_config(config):
def create_index_name_from_results(config, metrics):
\"\"\"Create index name from config and metrics.

efSearch is read from metrics (scalar int per search run)
rather than config (now a list of values to sweep).
\"\"\"
algorithm = config.get('algoToRun', 'UNKNOWN')
ef_search = config.get('efSearch', 0)
ef_search = metrics.get('efSearch', 0)

if algorithm in ['LUCENE_HNSW', 'hnsw']:
beam_width = config.get('hnswBeamWidth', 0)
Expand Down Expand Up @@ -144,13 +149,17 @@ for algorithm, pareto_indices in pareto_runs_by_algo.items():

index_to_dir = {}
for benchmark_dir in benchmark_dirs:
results_json_path = os.path.join(benchmark_dir, 'results.json')
if os.path.exists(results_json_path):
# Walk into subdirectories to find efSearch_* results
for root, dirs, files in os.walk(benchmark_dir):
if 'results.json' not in files:
continue
results_json_path = os.path.join(root, 'results.json')
try:
with open(results_json_path, 'r') as f:
results_data = json.load(f)

config = results_data['configuration']
metrics = results_data['metrics']
algo_to_run = config.get('algoToRun')

algorithm_match = False
Expand All @@ -160,20 +169,20 @@ for algorithm, pareto_indices in pareto_runs_by_algo.items():
algorithm_match = True

if algorithm_match:
index_name = create_index_name_from_config(config)
index_name = create_index_name_from_results(config, metrics)
if index_name not in index_to_dir:
index_to_dir[index_name] = benchmark_dir
index_to_dir[index_name] = root
except Exception as e:
print(f' Error processing {benchmark_dir}: {e}')
print(f' Error processing {root}: {e}')

print(f'Mapped {len(index_to_dir)} configurations')

matched = 0
unmatched = 0
for index_name, pareto_run in pareto_indices.items():
if index_name in index_to_dir:
benchmark_dir = index_to_dir[index_name]
is_pareto_file = os.path.join(benchmark_dir, 'is_pareto')
result_dir = index_to_dir[index_name]
is_pareto_file = os.path.join(result_dir, 'is_pareto')

with open(is_pareto_file, 'w') as f:
f.write(f'Pareto optimal run\\n')
Expand Down Expand Up @@ -217,4 +226,4 @@ echo ""
echo "Final output:"
echo "- Pareto optimal runs marked with is_pareto files"
echo "- Plots: ${OUTPUT_DIR}/plots/"
echo "- No intermediate files (completely cleaned up)"
echo "- No intermediate files (completely cleaned up)"