| #!/bin/bash |
|
|
| scripts=( |
| |
| |
| |
| "evaluate_musr_object_placements_raw_vs_finetuned.py" |
| "evaluate_musr_murder_mystery_raw_vs_finetuned.py" |
| "evaluate_musr_team_allocation_raw_vs_finetuned.py" |
| "evaluate_medqa_raw_vs_finetuned.py" |
| "evaluate_gsm8k_raw_vs_finetuned.py" |
| "evaluate_aime_raw_vs_finetuned.py" |
| "evaluate_aimo_raw_vs_finetuned.py" |
| "evaluate_art_raw_vs_finetuned.py" |
| "evaluate_copa_raw_vs_finetuned_guess_effect.py" |
| "evaluate_goEmotion_raw_vs_finetuned.py" |
| ) |
|
|
| OUTPUT_DIR="/home/moein_salimi/users/Parsa/AbductiveReasoning/SFT/Evaluation/14B" |
|
|
| ROOT_DIR="./SFT/Evaluation/14B" |
|
|
| BASE_RESULTS_DIR="/home/moein_salimi/users/Parsa/AbductiveReasoning/SFT/results_sft_14b" |
|
|
| |
| RAW_MODEL_PATH="/home/moein_salimi/PLLMS/unsloth-Qwen2.5-14B-Instruct-bnb-4bit" |
| |
| RUN_NAME="SFT_dt12.11.19:13_e6_unsloth_Qwen2.5_14B_Instruct_bnb_4bit_bnb_4bit_lr5e-06_t0.0_r64_b4_SFT_Implementation" |
|
|
| TRAINING_DIR="$BASE_RESULTS_DIR/Training_${RUN_NAME}" |
| FINAL_DIR="$BASE_RESULTS_DIR/${RUN_NAME}" |
|
|
| if [ -d "$TRAINING_DIR/checkpoint" ]; then |
| CHECKPOINT_DIR="$TRAINING_DIR/checkpoint" |
| TRAINING_BASE="$TRAINING_DIR" |
| elif [ -d "$FINAL_DIR/checkpoint" ]; then |
| CHECKPOINT_DIR="$FINAL_DIR/checkpoint" |
| TRAINING_BASE="$FINAL_DIR" |
| else |
| echo "ERROR: Could not find checkpoint directory." |
| echo "Tried:" |
| echo " $TRAINING_DIR/checkpoint" |
| echo " $FINAL_DIR/checkpoint" |
| exit 1 |
| fi |
|
|
| echo "Using checkpoint directory: $CHECKPOINT_DIR" |
| echo |
|
|
| COMMON_ARGS="--cuda_device 0 --evaluate_checkpoints 1" |
|
|
| declare -A BATCH_SIZES=( |
| ["evaluate_neulr_deductive_raw_vs_finetuned.py"]=8 |
| ["evaluate_neulr_inductive_raw_vs_finetuned.py"]=8 |
| ["evaluate_neulr_abductive_raw_vs_finetuned.py"]=8 |
| ["evaluate_medqa_raw_vs_finetuned.py"]=16 |
| ["evaluate_musr_murder_mystery_raw_vs_finetuned.py"]=8 |
| ["evaluate_musr_object_placements_raw_vs_finetuned.py"]=2 |
| ["evaluate_musr_team_allocation_raw_vs_finetuned.py"]=16 |
| ["evaluate_gsm8k_raw_vs_finetuned.py"]=16 |
| ["evaluate_aime_raw_vs_finetuned.py"]=8 |
| ["evaluate_aimo_raw_vs_finetuned.py"]=8 |
| ["evaluate_art_raw_vs_finetuned.py"]=64 |
| ["evaluate_copa_raw_vs_finetuned_guess_effect.py"]=64 |
| ["evaluate_goEmotion_raw_vs_finetuned.py"]=16 |
| ) |
|
|
| export TRAINING_BASE |
|
|
| for ckpt_name in $(ls -1 "$CHECKPOINT_DIR" | grep '^checkpoint-' | sort -t- -k2,2n); do |
| ckpt="$CHECKPOINT_DIR/$ckpt_name" |
| [ -d "$ckpt" ] || continue |
|
|
| echo "=====================================" |
| echo "Using checkpoint: $ckpt" |
| echo "=====================================" |
|
|
| for script in "${scripts[@]}"; do |
| batch_size="${BATCH_SIZES[$script]:-256}" |
|
|
| echo "Running $script with checkpoint $ckpt (batch_size=$batch_size) ..." |
| python3 ./Evaluation/"$script" \ |
| $COMMON_ARGS \ |
| --batch_size "$batch_size" \ |
| --checkpoint_path "$ckpt" \ |
| --run "$RUN_NAME" \ |
| --raw_path "$RAW_MODEL_PATH" \ |
| --output_path "$OUTPUT_DIR" |
|
|
| echo "Finished $script" |
| echo "-------------------------------------" |
| done |
| python3 ./Evaluation/create_table.py \ |
| --root "$ROOT_DIR" \ |
| --out_csv "./SFT/Evaluation//metrics_summary.xlsx" \ |
| --run "$RUN_NAME" \ |
| --base_model_name "qwen2.5-14B" \ |
| --base_result_dir "$BASE_RESULTS_DIR" \ |
| --train_data "UniADILR" |
| done |
|
|