diff --git a/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/ais_bench_configs/datasets/agieval/accuracy_agieval.py b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/ais_bench_configs/datasets/agieval/accuracy_agieval.py new file mode 100644 index 00000000..d4230efd --- /dev/null +++ b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/ais_bench_configs/datasets/agieval/accuracy_agieval.py @@ -0,0 +1,11 @@ +from mmengine import read_base + +with read_base(): + from .agieval_gen_0_shot_chat_prompt import agieval_datasets + +# 冒烟:仅取一个子集、小样本 +agieval_datasets = agieval_datasets[:1] +agieval_datasets[0]['reader_cfg'] = dict( + agieval_datasets[0]['reader_cfg'], + test_range='[0:10]', +) diff --git a/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/ais_bench_configs/models/vllm_api/accuracy_agieval.py b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/ais_bench_configs/models/vllm_api/accuracy_agieval.py new file mode 100644 index 00000000..5ea477da --- /dev/null +++ b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/ais_bench_configs/models/vllm_api/accuracy_agieval.py @@ -0,0 +1,8 @@ +from mmengine import read_base + +with read_base(): + from .vllm_api_general_chat import models + +models[0]['model'] = "qwen" +models[0]['max_out_len'] = 64 +models[0]['batch_size'] = 16 diff --git a/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/case.yml b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/case.yml new file mode 100644 index 00000000..b47b0549 --- /dev/null +++ b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/case.yml @@ -0,0 +1,13 @@ +case_type: benchmark +case_group: + - run_server_accuracy + - llm_datasets_main + - short + - refactor_success_1202 +case_name: accuracy_agieval # 在整个工程的case中必须唯一 +enable: y +script: + start: run.sh + end: clean.sh +timeout: 90 +rank_size: 3 diff --git a/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/clean.sh b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/clean.sh new file mode 100644 index 00000000..7c751b9d --- /dev/null +++ b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/clean.sh @@ -0,0 +1,4 @@ +#!/bin/bash +CUR_DIR=$(dirname $(readlink -f $0)) +[ -f "${CUR_DIR}/tmplog.txt" ] && rm -f "${CUR_DIR}/tmplog.txt" +exit 0 diff --git a/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/run.sh b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/run.sh new file mode 100644 index 00000000..79d81807 --- /dev/null +++ b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_agieval/run.sh @@ -0,0 +1,72 @@ +#!/bin/bash +declare -i ret_ok=0 +declare -i ret_failed=1 + +CUR_DIR=$(dirname $(readlink -f $0)) +CASE_NAME=$(basename "$CUR_DIR") +LAST_3_DIRNAME=$(echo $CUR_DIR | rev | cut -d'/' -f1-3 | rev) +CASE_OUTPUT_PATH=${PROJECT_OUTPUT_PATH}/${LAST_3_DIRNAME} +AIS_BENCH_CODE_CONFIGS_DIR=${PROJECT_PATH}/../ais_bench/benchmark/configs +CONFIG_DATASET_NAME="agieval" +# 冒烟仅跑一个子集,输出名为该子集 abbr(agieval-gaokao-chinese) +OUTPUT_DATASET_NAME="agieval-gaokao-chinese" +CURR_API="vllm-api-general-chat" + +if [ ! -d ${CASE_OUTPUT_PATH} ];then + mkdir -p ${CASE_OUTPUT_PATH} +fi +rm -rf ${CASE_OUTPUT_PATH}/* + +echo "Copying config files..." +cp -r ${CUR_DIR}/ais_bench_configs/* ${AIS_BENCH_CODE_CONFIGS_DIR}/ + +{ + echo "" + echo "models[0]['host_ip'] = '${AISBENCH_SMOKE_SERVICE_IP}'" + echo "models[0]['host_port'] = ${AISBENCH_SMOKE_SERVICE_PORT}" + echo "models[0]['path'] = '${AISBENCH_SMOKE_MODEL_PATH}'" +} >> "${AIS_BENCH_CODE_CONFIGS_DIR}/models/vllm_api/${CASE_NAME}.py" + +echo -e "\033[1;32m[1/1]\033[0m Test case - ${CASE_NAME}" + +set -o pipefail +ais_bench --models ${CASE_NAME} --datasets ${CASE_NAME} --work-dir ${CASE_OUTPUT_PATH} 2>&1 | tee ${CUR_DIR}/tmplog.txt +if [ $? -ne 0 ] +then + echo "Run $CASE_NAME test: Failed" + exit $ret_failed +fi +echo "Run $CASE_NAME test: Success" + +WORK_DIR_INFO=$(cat ${CUR_DIR}/tmplog.txt | grep 'Current exp folder: ') +TIMESTAMP="${WORK_DIR_INFO##*/}" + +CURR_OUTPUT_PATH=${CASE_OUTPUT_PATH}/${TIMESTAMP} +LOG_EVAL_OUTPUT_PATH=${CURR_OUTPUT_PATH}/logs/eval/${CURR_API}/${OUTPUT_DATASET_NAME}.out +LOG_INFER_OUTPUT_PATH=${CURR_OUTPUT_PATH}/logs/infer/${CURR_API}/${OUTPUT_DATASET_NAME}.out +PREDICTIONS_OUTPUT_PATH=${CURR_OUTPUT_PATH}/predictions/${CURR_API}/${OUTPUT_DATASET_NAME}.jsonl +RESULTS_OUTPUT_PATH=${CURR_OUTPUT_PATH}/results/${CURR_API}/${OUTPUT_DATASET_NAME}.json +SUMMARY_OUTPUT_PATH=${CURR_OUTPUT_PATH}/summary/summary_${TIMESTAMP}.csv + +if [ ! -f "$LOG_EVAL_OUTPUT_PATH" ];then + echo "Can't find $LOG_EVAL_OUTPUT_PATH" + exit $ret_failed +fi +if [ ! -f "$LOG_INFER_OUTPUT_PATH" ];then + echo "Can't find $LOG_INFER_OUTPUT_PATH" + exit $ret_failed +fi +if [ ! -f "$PREDICTIONS_OUTPUT_PATH" ];then + echo "Can't find $PREDICTIONS_OUTPUT_PATH" + exit $ret_failed +fi +if [ ! -f "$RESULTS_OUTPUT_PATH" ];then + echo "Can't find $RESULTS_OUTPUT_PATH" + exit $ret_failed +fi +if [ ! -f "$SUMMARY_OUTPUT_PATH" ];then + echo "Can't find $SUMMARY_OUTPUT_PATH" + exit $ret_failed +fi + +exit $ret_ok diff --git a/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/ais_bench_configs/datasets/math/accuracy_math.py b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/ais_bench_configs/datasets/math/accuracy_math.py new file mode 100644 index 00000000..5bae0bdf --- /dev/null +++ b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/ais_bench_configs/datasets/math/accuracy_math.py @@ -0,0 +1,10 @@ +from mmengine import read_base + +with read_base(): + from .math_prm800k_500_0shot_cot_gen import math_datasets + +# 冒烟:小样本 +math_datasets[0]['reader_cfg'] = dict( + math_datasets[0]['reader_cfg'], + test_range='[0:10]', +) diff --git a/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/ais_bench_configs/models/vllm_api/accuracy_math.py b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/ais_bench_configs/models/vllm_api/accuracy_math.py new file mode 100644 index 00000000..b35dbf04 --- /dev/null +++ b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/ais_bench_configs/models/vllm_api/accuracy_math.py @@ -0,0 +1,8 @@ +from mmengine import read_base + +with read_base(): + from .vllm_api_general_chat import models + +models[0]['model'] = "qwen" +models[0]['max_out_len'] = 512 +models[0]['batch_size'] = 8 diff --git a/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/case.yml b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/case.yml new file mode 100644 index 00000000..d42d9fc0 --- /dev/null +++ b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/case.yml @@ -0,0 +1,13 @@ +case_type: benchmark +case_group: + - run_server_accuracy + - llm_datasets_main + - short + - refactor_success_1202 +case_name: accuracy_math # 在整个工程的case中必须唯一 +enable: y +script: + start: run.sh + end: clean.sh +timeout: 90 +rank_size: 3 diff --git a/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/clean.sh b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/clean.sh new file mode 100644 index 00000000..7c751b9d --- /dev/null +++ b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/clean.sh @@ -0,0 +1,4 @@ +#!/bin/bash +CUR_DIR=$(dirname $(readlink -f $0)) +[ -f "${CUR_DIR}/tmplog.txt" ] && rm -f "${CUR_DIR}/tmplog.txt" +exit 0 diff --git a/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/run.sh b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/run.sh new file mode 100644 index 00000000..1e5d7c3e --- /dev/null +++ b/smoke_tests/test-case/run_server_accuracy/llm_datasets_main/accuracy_math/run.sh @@ -0,0 +1,71 @@ +#!/bin/bash +declare -i ret_ok=0 +declare -i ret_failed=1 + +CUR_DIR=$(dirname $(readlink -f $0)) +CASE_NAME=$(basename "$CUR_DIR") +LAST_3_DIRNAME=$(echo $CUR_DIR | rev | cut -d'/' -f1-3 | rev) +CASE_OUTPUT_PATH=${PROJECT_OUTPUT_PATH}/${LAST_3_DIRNAME} +AIS_BENCH_CODE_CONFIGS_DIR=${PROJECT_PATH}/../ais_bench/benchmark/configs +CONFIG_DATASET_NAME="math" +OUTPUT_DATASET_NAME="math_prm800k_500" +CURR_API="vllm-api-general-chat" + +if [ ! -d ${CASE_OUTPUT_PATH} ];then + mkdir -p ${CASE_OUTPUT_PATH} +fi +rm -rf ${CASE_OUTPUT_PATH}/* + +echo "Copying config files..." +cp -r ${CUR_DIR}/ais_bench_configs/* ${AIS_BENCH_CODE_CONFIGS_DIR}/ + +{ + echo "" + echo "models[0]['host_ip'] = '${AISBENCH_SMOKE_SERVICE_IP}'" + echo "models[0]['host_port'] = ${AISBENCH_SMOKE_SERVICE_PORT}" + echo "models[0]['path'] = '${AISBENCH_SMOKE_MODEL_PATH}'" +} >> "${AIS_BENCH_CODE_CONFIGS_DIR}/models/vllm_api/${CASE_NAME}.py" + +echo -e "\033[1;32m[1/1]\033[0m Test case - ${CASE_NAME}" + +set -o pipefail +ais_bench --models ${CASE_NAME} --datasets ${CASE_NAME} --work-dir ${CASE_OUTPUT_PATH} 2>&1 | tee ${CUR_DIR}/tmplog.txt +if [ $? -ne 0 ] +then + echo "Run $CASE_NAME test: Failed" + exit $ret_failed +fi +echo "Run $CASE_NAME test: Success" + +WORK_DIR_INFO=$(cat ${CUR_DIR}/tmplog.txt | grep 'Current exp folder: ') +TIMESTAMP="${WORK_DIR_INFO##*/}" + +CURR_OUTPUT_PATH=${CASE_OUTPUT_PATH}/${TIMESTAMP} +LOG_EVAL_OUTPUT_PATH=${CURR_OUTPUT_PATH}/logs/eval/${CURR_API}/${OUTPUT_DATASET_NAME}.out +LOG_INFER_OUTPUT_PATH=${CURR_OUTPUT_PATH}/logs/infer/${CURR_API}/${OUTPUT_DATASET_NAME}.out +PREDICTIONS_OUTPUT_PATH=${CURR_OUTPUT_PATH}/predictions/${CURR_API}/${OUTPUT_DATASET_NAME}.jsonl +RESULTS_OUTPUT_PATH=${CURR_OUTPUT_PATH}/results/${CURR_API}/${OUTPUT_DATASET_NAME}.json +SUMMARY_OUTPUT_PATH=${CURR_OUTPUT_PATH}/summary/summary_${TIMESTAMP}.csv + +if [ ! -f "$LOG_EVAL_OUTPUT_PATH" ];then + echo "Can't find $LOG_EVAL_OUTPUT_PATH" + exit $ret_failed +fi +if [ ! -f "$LOG_INFER_OUTPUT_PATH" ];then + echo "Can't find $LOG_INFER_OUTPUT_PATH" + exit $ret_failed +fi +if [ ! -f "$PREDICTIONS_OUTPUT_PATH" ];then + echo "Can't find $PREDICTIONS_OUTPUT_PATH" + exit $ret_failed +fi +if [ ! -f "$RESULTS_OUTPUT_PATH" ];then + echo "Can't find $RESULTS_OUTPUT_PATH" + exit $ret_failed +fi +if [ ! -f "$SUMMARY_OUTPUT_PATH" ];then + echo "Can't find $SUMMARY_OUTPUT_PATH" + exit $ret_failed +fi + +exit $ret_ok