diff --git a/convert_to_nvidia_format.py b/convert_to_nvidia_format.py index 54f3931..d3caa2f 100755 --- a/convert_to_nvidia_format.py +++ b/convert_to_nvidia_format.py @@ -7,10 +7,14 @@ from typing import List, Dict, Optional, Tuple -def create_index_name(config: Dict) -> str: - """Create index name from configuration parameters""" +def create_index_name(config: Dict, metrics: Dict) -> str: + """Create index name from configuration and metrics. + + efSearch is read from metrics (where it's a scalar int per search run) + rather than config (where it's now a list of values to sweep). + """ algorithm = config.get('algoToRun', 'UNKNOWN') - ef_search = config.get('efSearch', 0) + ef_search = metrics.get('efSearch', 0) if algorithm in ['LUCENE_HNSW', 'hnsw']: beam_width = config.get('hnswBeamWidth', 0) @@ -38,7 +42,7 @@ def convert_results_to_nvidia_format(results_json_path: str, output_dir: str, da elif algorithm in ['hnsw', 'LUCENE_HNSW']: algorithm = 'LUCENE_HNSW' - index_name = create_index_name(config) + index_name = create_index_name(config, metrics) recall_key = next((key for key in metrics.keys() if 'recall-accuracy' in key.lower()), None) if not recall_key: @@ -97,12 +101,13 @@ def convert_results_to_nvidia_format(results_json_path: str, output_dir: str, da if build_time_key: build_time_ms = float(metrics[build_time_key]) + build_time_s = build_time_ms / 1000.0 build_benchmark = { "name": f"{algorithm}/{index_name}", - "real_time": build_time_ms, + "real_time": build_time_s, "iterations": 1, - "time_unit": "ms", + "time_unit": "s", "run_name": "run_1", "run_type": "iteration", "repetitions": 1, diff --git a/run_pareto_analysis.sh b/run_pareto_analysis.sh index 33020a8..1466617 100755 --- a/run_pareto_analysis.sh +++ b/run_pareto_analysis.sh @@ -81,9 +81,14 @@ import csv import json import glob -def create_index_name_from_config(config): +def create_index_name_from_results(config, metrics): + \"\"\"Create index name from config and metrics. + + efSearch is read from metrics (scalar int per search run) + rather than config (now a list of values to sweep). + \"\"\" algorithm = config.get('algoToRun', 'UNKNOWN') - ef_search = config.get('efSearch', 0) + ef_search = metrics.get('efSearch', 0) if algorithm in ['LUCENE_HNSW', 'hnsw']: beam_width = config.get('hnswBeamWidth', 0) @@ -144,13 +149,17 @@ for algorithm, pareto_indices in pareto_runs_by_algo.items(): index_to_dir = {} for benchmark_dir in benchmark_dirs: - results_json_path = os.path.join(benchmark_dir, 'results.json') - if os.path.exists(results_json_path): + # Walk into subdirectories to find efSearch_* results + for root, dirs, files in os.walk(benchmark_dir): + if 'results.json' not in files: + continue + results_json_path = os.path.join(root, 'results.json') try: with open(results_json_path, 'r') as f: results_data = json.load(f) config = results_data['configuration'] + metrics = results_data['metrics'] algo_to_run = config.get('algoToRun') algorithm_match = False @@ -160,11 +169,11 @@ for algorithm, pareto_indices in pareto_runs_by_algo.items(): algorithm_match = True if algorithm_match: - index_name = create_index_name_from_config(config) + index_name = create_index_name_from_results(config, metrics) if index_name not in index_to_dir: - index_to_dir[index_name] = benchmark_dir + index_to_dir[index_name] = root except Exception as e: - print(f' Error processing {benchmark_dir}: {e}') + print(f' Error processing {root}: {e}') print(f'Mapped {len(index_to_dir)} configurations') @@ -172,8 +181,8 @@ for algorithm, pareto_indices in pareto_runs_by_algo.items(): unmatched = 0 for index_name, pareto_run in pareto_indices.items(): if index_name in index_to_dir: - benchmark_dir = index_to_dir[index_name] - is_pareto_file = os.path.join(benchmark_dir, 'is_pareto') + result_dir = index_to_dir[index_name] + is_pareto_file = os.path.join(result_dir, 'is_pareto') with open(is_pareto_file, 'w') as f: f.write(f'Pareto optimal run\\n') @@ -217,4 +226,4 @@ echo "" echo "Final output:" echo "- Pareto optimal runs marked with is_pareto files" echo "- Plots: ${OUTPUT_DIR}/plots/" -echo "- No intermediate files (completely cleaned up)" \ No newline at end of file +echo "- No intermediate files (completely cleaned up)"