diff --git a/aif_gen/_cli/commands/clean.py b/aif_gen/_cli/commands/clean.py new file mode 100644 index 00000000..63ef6aec --- /dev/null +++ b/aif_gen/_cli/commands/clean.py @@ -0,0 +1,89 @@ +import logging +import pathlib +from typing import Optional + +import click + +from aif_gen.dataset.continual_alignment_dataset import ( + ContinualAlignmentDataset, +) +from aif_gen.util.hf import download_from_hf, upload_to_hf +from aif_gen.util.seed import seed_everything + + +@click.command(context_settings={'show_default': True}) +@click.argument( + 'input_data_file', + type=click.Path(exists=True, dir_okay=False, path_type=pathlib.Path), +) +@click.argument( + 'output_data_file', + type=click.Path(dir_okay=False, path_type=pathlib.Path), +) +@click.argument( + 'words', + type=click.STRING, +) +@click.option( + '--random_seed', + type=int, + help='Random seed for data generation.', + default=0, +) +@click.option( + '--hf-repo-id', + type=click.STRING, + default=None, + help='If not None, push the generated input_dataset to a HuggingFace remote repository with the associated repo-id.', +) +def clean_dataset( + input_data_file: pathlib.Path, + output_data_file: pathlib.Path, + words: str, + random_seed: int, + hf_repo_id: Optional[str], +) -> None: + r"""Clean a ContinualAlignmentDataset given a space-separated string of words. + + INPUT_DATA_FILE: Path to the input dataset. + OUTPUT_DATA_FILE: Path to the output dataset. + WORDS: Space-separated string of words to clean the dataset. + """ + if hf_repo_id is not None: + input_data_file = download_from_hf(hf_repo_id, input_data_file) + + logging.info(f'Reading input_dataset from: {input_data_file}') + input_dataset = ContinualAlignmentDataset.from_json(input_data_file) + logging.info(f'Read {len(input_dataset)} samples from: {input_data_file}') + + if not len(input_dataset): + logging.warning('No samples found in dataset, skipping clean up.') + return + + logging.info(f'Using words: {words}') + logging.info(f'Random seed: {random_seed}') + seed_everything(random_seed) + + output_data_file.parent.mkdir(parents=True, exist_ok=True) + + words_list = words.split(' ') + if len(words_list) == 0: + logging.warning('No words found in words string, skipping clean up.') + return + + # clean up each data point in the dataset + for dataset in input_dataset.datasets: + for sample in dataset.samples: + for word in words_list: + sample.prompt = sample.prompt.replace(word, '') + sample.chosen = sample.chosen.replace(word, '') + sample.rejected = sample.rejected.replace(word, '') + + logging.info(f'Finished cleaning dataset.') + + logging.info(f'Writing {len(dataset)} samples to {output_data_file}') + input_dataset.to_json(output_data_file) + logging.info(f'Wrote {len(dataset)} samples to {output_data_file}') + + if hf_repo_id is not None: + upload_to_hf(repo_id=hf_repo_id, local_path=output_data_file) diff --git a/aif_gen/_cli/main.py b/aif_gen/_cli/main.py index 360c55e0..3e8dcd79 100644 --- a/aif_gen/_cli/main.py +++ b/aif_gen/_cli/main.py @@ -2,6 +2,7 @@ import click +from aif_gen._cli.commands.clean import clean_dataset from aif_gen._cli.commands.filter import filter_dataset from aif_gen._cli.commands.generate import generate from aif_gen._cli.commands.merge import merge @@ -47,6 +48,7 @@ def cli(log_file: pathlib.Path) -> None: cli.add_command(sample) cli.add_command(transmute) cli.add_command(filter_dataset) +cli.add_command(clean_dataset) if __name__ == '__main__': cli() diff --git a/aif_gen/api/response_mapper/response_mapper.py b/aif_gen/api/response_mapper/response_mapper.py index 54bc7e50..8d206325 100644 --- a/aif_gen/api/response_mapper/response_mapper.py +++ b/aif_gen/api/response_mapper/response_mapper.py @@ -15,8 +15,8 @@ class ResponseMapper(ResponseMapperBase): """ NUMBER_OF_PREFERENCE_AXES_SAMPLED: int = 3 - TASK_PREFERENCE_INCLUSION_PROBABILITY_POSIIVE: float = 0.5 - TASK_PREFERENCE_INCLUSION_PROBABILITY_NEGATIVE: float = 0.5 + TASK_PREFERENCE_INCLUSION_PROBABILITY_POSIIVE: float = 0.4 + TASK_PREFERENCE_INCLUSION_PROBABILITY_NEGATIVE: float = 0.4 def __init__(self, suffix_context: Optional[str] = None) -> None: self._suffix_context = suffix_context diff --git a/aif_gen/generate/service.py b/aif_gen/generate/service.py index 6626aa86..80ae053d 100644 --- a/aif_gen/generate/service.py +++ b/aif_gen/generate/service.py @@ -533,13 +533,10 @@ class ResponsePair(pydantic.BaseModel, extra='forbid'): async with async_semaphore: if cache is not None: output = await cache.get(task_prompt + task_prompt_second) - if output is None: - raise ValueError( - f'No cached response for task prompt: {task_prompt + task_prompt_second}' - ) - structured_output = ResponsePair.model_validate_json(output) - output1_str: str = structured_output.chosen - output2_str: str = structured_output.rejected + if output is not None: + structured_output = ResponsePair.model_validate_json(output) + output1_str: str = structured_output.chosen + output2_str: str = structured_output.rejected else: output = None diff --git a/benchmarks/README.md b/benchmarks/README.md index 5e681c8c..0e2f41ed 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -15,7 +15,7 @@ uv sync --group benchmarks ```sh uv run benchmarks/reward_modeling.py \ --model_name_or_path Qwen/Qwen2-0.5B-Instruct \ - --dataset_name preference_axes.json \ + --dataset_name benchmarks/continual_data_debug.json \ --dataset_index 0 \ --output_dir Qwen2-0.5B-Reward \ --per_device_train_batch_size 8 \ diff --git a/jobs/generate_all_downsampled.sh b/jobs/generate_all_downsampled.sh new file mode 100644 index 00000000..4f2948c4 --- /dev/null +++ b/jobs/generate_all_downsampled.sh @@ -0,0 +1,169 @@ +#!/bin/bash +#SBATCH --job-name=generate_static_all_70B_final +#SBATCH --partition=main +#SBATCH --mem=48G +#SBATCH --cpus-per-task=6 +#SBATCH --time=24:00:00 +#SBATCH --output=slurm-%j.out +#SBATCH --error=slurm-%j.err +#SBATCH --mail-type=ALL +#SBATCH --mail-user= + +# set -euo pipefail +source .env + +# 1) start the vllm server in the background +uvx vllm serve meta-llama/Meta-Llama-3-70B-Instruct \ + --dtype auto \ + --api-key openai \ + --tensor-parallel-size 2 & +SERVER_PID=$! +echo "⏳ Waiting for VLLM server (PID=$SERVER_PID) to come up…" + +# replace fixed sleep with a health‐check loop +export UV_VLLM_SERVER_URL="http://127.0.0.1:8000" # tell `uv run` where to send requests +for i in $(seq 1 600); do + if curl -fs "${UV_VLLM_SERVER_URL}/health"; then + echo "✅ VLLM up after $((i*5))s" + break + fi + echo "…still waiting ($i/600)…" + sleep 5 +done + +# helper to run one job +() { echo "➡️ $*"; eval "$*"; } + + +# 2) run all generation jobs sequentially + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/education_qna_direct/data.json" \ + config/static_copy/education_qna_direct.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/education_qna_eli5/data.json" \ + config/static_copy/education_qna_eli5.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/education_qna_expert/data.json" \ + config/static_copy/education_qna_expert.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/education_qna_hinted/data.json" \ + config/static_copy/education_qna_hinted.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/education_summary_eli5/data.json" \ + config/static_copy/education_summary_eli5.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/education_summary_expert/data.json" \ + config/static_copy/education_summary_expert.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/politics_generate_formal/data.json" \ + config/static_copy/politics_generate_formal.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/politics_generate_rapper/data.json" \ + config/static_copy/politics_generate_rapper.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/politics_generate_shakespeare/data.json" \ + config/static_copy/politics_generate_shakespeare.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/politics_qna_eli5/data.json" \ + config/static_copy/politics_qna_eli5.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/politics_qna_expert/data.json" \ + config/static_copy/politics_qna_expert.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/politics_summary_eli5/data.json" \ + config/static_copy/politics_summary_eli5.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/politics_summary_expert/data.json" \ + config/static_copy/politics_summary_expert.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/tech_healthcare_qna_eli5/data.json" \ + config/static_copy/tech_healthcare_qna_eli5.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/tech_healthcare_qna_expert/data.json" \ + config/static_copy/tech_healthcare_qna_expert.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/tech_physics_summary_eli5/data.json" \ + config/static_copy/tech_physics_summary_eli5.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/tech_physics_summary_expert/data.json" \ + config/static_copy/tech_physics_summary_expert.yaml \ + Meta-Llama-3.1-70B-Instruct + + uv run aif generate \ + --include-preference-axes \ + --max_concurrency 256 \ + --output_file "data/70B_generation/tech_physics_summary_highschool/data.json" \ + config/static_copy/tech_physics_summary_highschool.yaml \ + Meta-Llama-3.1-70B-Instruct + +# 3) shutdown the server when done +echo "✅ All jobs finished. Shutting down VLLM server (PID=$SERVER_PID)…" +kill $SERVER_PID +wait $SERVER_PID 2>/dev/null || true +echo "🛑 Server stopped." diff --git a/jobs/generate_all_static.sh b/jobs/generate_all_static.sh index f06e08a3..ad42056a 100644 --- a/jobs/generate_all_static.sh +++ b/jobs/generate_all_static.sh @@ -10,120 +10,145 @@ source .env -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/education_qna_direct.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/education_qna_eli5.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/education_qna_expert.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/education_qna_hinted.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/education_summary_eli5.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/education_summary_expert.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/politics_generate_rapper.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/politics_generate_shakespeare.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/politics_qna_eli5.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/politics_qna_expert.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/politics_summary_eli5.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/politics_summary_expert.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/tech_healthcare_qna_eli5.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/tech_healthcare_qna_expert.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/tech_physics_summary_eli5.yaml \ -# gpt-4o-mini \ -# && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/tech_physics_summary_expert.yaml \ -# gpt-4o-mini && \ -# uv run aif \ -# generate \ -# --max_concurrency 256 \ -# config/static/tech_physics_summary_highschool.yaml \ -# gpt-4o-mini - uv run aif \ generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/education_qna_direct.yaml \ +gpt-4o-mini +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/education_qna_eli5.yaml \ +gpt-4o-mini \ +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/education_qna_expert.yaml \ +gpt-4o-mini \ +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/education_qna_hinted.yaml \ +gpt-4o-mini \ +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/education_summary_eli5.yaml \ +gpt-4o-mini \ +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/education_summary_expert.yaml \ +gpt-4o-mini \ +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/politics_generate_formal.yaml \ +gpt-4o-mini \ +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ --max_concurrency 256 \ config/static/politics_generate_rapper.yaml \ gpt-4o-mini \ && \ uv run aif \ generate \ +--include-preference-axes \ +--temperature 1.1 \ --max_concurrency 256 \ config/static/politics_generate_shakespeare.yaml \ +gpt-4o-mini \ +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/politics_qna_eli5.yaml \ +gpt-4o-mini \ +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/politics_qna_expert.yaml \ +gpt-4o-mini \ +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/politics_summary_eli5.yaml \ +gpt-4o-mini \ +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/politics_summary_expert.yaml \ +gpt-4o-mini +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/tech_healthcare_qna_eli5.yaml \ +gpt-4o-mini \ +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/tech_healthcare_qna_expert.yaml \ +gpt-4o-mini \ +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/tech_physics_summary_eli5.yaml \ +gpt-4o-mini \ +&& \ +uv run aif \ +generate \ +--include-preference-axes \ +--temperature 1.1 \ +--max_concurrency 256 \ +config/static/tech_physics_summary_expert.yaml \ gpt-4o-mini && \ uv run aif \ generate \ +--include-preference-axes \ +--temperature 1.1 \ --max_concurrency 256 \ config/static/tech_physics_summary_highschool.yaml \ gpt-4o-mini diff --git a/jobs/validate_all_static.sh b/jobs/validate_all_static.sh index 06ff546a..3da9c095 100644 --- a/jobs/validate_all_static.sh +++ b/jobs/validate_all_static.sh @@ -5,135 +5,47 @@ #SBATCH --time=24:00:00 #SBATCH --output=slurm-%j.out #SBATCH --error=slurm-%j.err -#SBATCH --mail-type=ALL -#SBATCH --mail-user= +# load your env vars source .env -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/education_qna_direct/*/data.json \ -data/70B_15_validation/70B/education_qna_direct/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/education_qna_eli5/*/data.json \ -data/70B_15_validation/70B/education_qna_eli5/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/education_qna_expert/*/data.json \ -data/70B_15_validation/70B/education_qna_expert/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/education_qna_hinted/*/data.json \ -data/70B_15_validation/70B/education_qna_hinted/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/education_summary_eli5/*/data.json \ -data/70B_15_validation/70B/education_summary_eli5/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/education_summary_expert/*/data.json \ -data/70B_15_validation/70B/education_summary_expert/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/politics_generate_long/*/data.json \ -data/70B_15_validation/70B/politics_generate_long/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/politics_generate_short/*/data.json \ -data/70B_15_validation/70B/politics_generate_short/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/politics_qna_eli5/*/data.json \ -data/70B_15_validation/70B/politics_qna_eli5/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/politics_qna_expert/*/data.json \ -data/70B_15_validation/70B/politics_qna_expert/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/politics_summary_eli5/*/data.json \ -data/70B_15_validation/70B/politics_summary_eli5/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/politics_summary_expert/*/data.json \ -data/70B_15_validation/70B/politics_summary_expert/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/politics_generate_short/*/data.json \ -data/70B_15_validation/70B/politics_generate_short/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/politics_qna_eli5/*/data.json \ -data/70B_15_validation/70B/politics_qna_eli5/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/politics_summary_eli5/*/data.json \ -data/70B_15_validation/70B/politics_summary_eli5/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct \ -&& \ -uv run aif \ -validate \ ---max_concurrency 256 \ -data/70B_15_generation/tech_healthcare_summary_expert/*/data.json \ -data/70B_15_validation/70B/tech_healthcare_summary_expert/validate.json \ ---no-validate-diversity \ ---model Meta-Llama-3.1-70B-Instruct +# map each LLM to its endpoint and key + +declare -A API_KEY=( + ["gpt-4o-mini"]="$OPENAI_API_KEY" +) + +LLMS=( + "gpt-4o-mini" +) + +FOLDERS=( + "4omini_generation_downsampled" + "70B_generation" +) + +for llm in "${LLMS[@]}"; do + unset OPENAI_BASE_URL + export OPENAI_API_KEY="${API_KEY[$llm]}" + + for gen in "${FOLDERS[@]}"; do + # derive validation folder name + val_folder="${gen/_generation/_validation}" + for sub in "data/$gen"/*; do + [ -d "$sub" ] || continue + + infile="$sub/data.json" + outdir="data/$val_folder/$llm/$(basename "$sub")" + mkdir -p "$outdir" + outfile="$outdir/validate.json" + + uv run aif validate \ + --max_concurrency 256 \ + "$infile" \ + "$outfile" \ + --no-validate-diversity \ + --no-validate-embedding-diversity \ + --model "$llm" + done + done +done