Skip to content

Commit fc31867

Browse files
chore: allows evaluator to run on existing predictions (#734)
# Motivation Allows evaluation to be run on an existing predictions jsonl file. # Content - modified logic that loads predictions to check for a consolidated jsonl file before creating one. # Testing Tested by running locally # Please check the following before marking your PR as ready for review - [x] I have added tests for my changes - [X] I have updated the documentation or added new documentation as needed
1 parent e23dac4 commit fc31867

File tree

2 files changed

+20
-20
lines changed

2 files changed

+20
-20
lines changed

codegen-examples/examples/swebench_agent_run/run_eval.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,14 +278,14 @@ async def run_eval(
278278
"verified": SWEBenchDataset.VERIFIED,
279279
}
280280
dataset_enum = dataset_dict[dataset]
281-
print(repo)
281+
282282
examples = get_swe_bench_examples(dataset=dataset_enum, length=length, instance_id=instance_id, repo=repo)
283-
print(f"Examples:\n{'\n'.join([f'{e.instance_id} - {e.repo} - {e.base_commit}' for e in examples])}")
284283

285284
try:
286285
if use_existing_preds is None:
286+
print(f"Repo: {repo}")
287+
print(f"Examples:\n{'\n'.join([f'{e.instance_id} - {e.repo} - {e.base_commit}' for e in examples])}")
287288
print(f"Processing {len(examples)} examples...")
288-
289289
# Create output directory if it doesn't exist
290290
predictions_dir.mkdir(exist_ok=True, parents=True)
291291

src/codegen/extensions/swebench/report.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ def generate_report(predictions_dir: Path, logs_dir: Path, dataset: SWEBenchData
113113
print(f"Directory does not exist: {predictions_dir}")
114114
return 1
115115

116+
predictions_jsonl = predictions_dir / "all_preds.jsonl"
117+
existing_preds = predictions_jsonl.exists()
116118
prediction_files = list(predictions_dir.glob("*.json"))
117119
print(f"Found {len(prediction_files)} prediction files")
118120

@@ -126,29 +128,27 @@ def generate_report(predictions_dir: Path, logs_dir: Path, dataset: SWEBenchData
126128
except json.JSONDecodeError:
127129
print(f"Error reading JSON from {file_path}")
128130
continue
131+
if not existing_preds:
132+
if not predictions:
133+
print("No valid predictions found")
134+
return 1
129135

130-
print(f"Successfully loaded {len(predictions)} predictions")
136+
print(f"Successfully loaded {len(predictions)} predictions")
131137

132-
if predictions:
133-
# Create predictions JSONL file
134138
predictions_jsonl = preds_to_jsonl(predictions, predictions_dir)
135-
print(f"\nCreated predictions JSONL: {predictions_jsonl}")
136139

137-
# Setup log directory
138-
log_dir = logs_dir / "results"
139-
log_dir.mkdir(exist_ok=True, parents=True)
140-
print(f"Using log directory: {log_dir}")
140+
# Setup log directory
141+
log_dir = logs_dir / "results"
142+
log_dir.mkdir(exist_ok=True, parents=True)
143+
print(f"Using log directory: {log_dir}")
141144

142-
# Run evaluations
143-
run_evals(predictions_jsonl, logs_dir, dataset, run_id)
145+
# Run evaluations
146+
run_evals(predictions_jsonl, logs_dir, dataset, run_id)
144147

145-
# Get and display report
146-
report = get_report(predictions_jsonl, logs_dir)
148+
# Get and display report
149+
report = get_report(predictions_jsonl, logs_dir)
147150

148-
# Update prediction JSONs with results
149-
predictions = update_pred_json(predictions, report, predictions_dir)
150-
else:
151-
print("No valid predictions found")
152-
return 1
151+
# Update prediction JSONs with results
152+
predictions = update_pred_json(predictions, report, predictions_dir)
153153

154154
return 0

0 commit comments

Comments
 (0)