LlemmaFT / process_mse_data.sh
Zenos5's picture
Upload 24 files
766ea9e verified
#!/bin/bash
#SBATCH --time=1:00:00 # walltime. hours:minutes:seconds
#SBATCH --ntasks=8 # number of processor cores (i.e. tasks)
#SBATCH --nodes=1 # number of nodes
#SBATCH --gpus=1
#SBATCH --mem=80G # 164G memory per CPU core
#SBATCH --mail-user=aw742@byu.edu # email address
#SBATCH --mail-type=BEGIN
#SBATCH --mail-type=END
#SBATCH --mail-type=FAIL
#SBATCH --qos=cs
#SBATCH --partition=cs
# some helpful debugging options
set -e
set -u
# LOAD MODULES, INSERT CODE, AND RUN YOUR PROGRAMS HERE
# module load python/3.11
source ./mse_env/Scripts/activate
# json config = "max_samples": 500,
# python mse_text_img_process.py
# python convert_mse.py
# pip install jsonlines
# pip install deepeval
NUM_TEST_CASES=100
# python mse_ollama_run.py --num $NUM_TEST_CASES --test f --shot 0 --out_file metric_test_orig_100_f.txt
# echo "Test case faithfulness finished"
NUM_SHOT=0
# set DEEPEVAL_RESULTS_FOLDER=.\data
python mse_ollama_timer.py
echo "Test time calculated"
# deepeval set-local-model --model-name Hudson/llemma:7b
# ollama pull Hudson/llemma:7b
# deepeval set-ollama Hudson/llemma:7b
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_0_shot_100_ar"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT #--out_file metric_test_0_shot_100_ar.txt
# echo "Test case answer relevancy finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_0_shot_100_crec"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT #--out_file metric_test_0_shot_100_crec.txt
# echo "Test case contexual recall finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_0_shot_100_cp"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT #--out_file metric_test_0_shot_100_cp.txt
# echo "Test case contextual precision finished"
NUM_SHOT=1
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_1_shot_100_ar"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT #--out_file metric_test_1_shot_100_ar.txt
# echo "Test case answer relevancy finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_1_shot_100_crec"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT #--out_file metric_test_1_shot_100_crec.txt
# echo "Test case contexual recall finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_1_shot_100_cp"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT #--out_file metric_test_1_shot_100_cp.txt
# echo "Test case contextual precision finished"
NUM_SHOT=5
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_5_shot_100_ar"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT #--out_file metric_test_5_shot_100_ar.txt
# echo "Test case answer relevancy finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_5_shot_100_crec"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT #--out_file metric_test_5_shot_100_crec.txt
# echo "Test case contexual recall finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_5_shot_100_cp"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT #--out_file metric_test_5_shot_100_cp.txt
# echo "Test case contextual precision finished"
# python mse_ollama_run.py --num 25 --begin 0 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_cp.txt
# echo "Test case contextual precision finished"
# python mse_ollama_run.py --num 25 --begin 25 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b25_cp.txt
# echo "Test case contextual precision finished (start 25)"
# python mse_ollama_run.py --num 25 --begin 50 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b50_cp.txt
# echo "Test case contextual precision finished (start 50)"
# python mse_ollama_run.py --num 25 --begin 75 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b75_cp.txt
# echo "Test case contextual precision finished (start 75)"
NUM_SHOT=10
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_10_shot_100_ar"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT -out_file metric_test_10_shot_100_ar.txt
# echo "Test case answer relevancy finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_10_shot_100_crec"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT -out_file metric_test_10_shot_100_crec.txt
# echo "Test case contexual recall finished"
# export DEEPEVAL_RESULTS_FOLDER="./metric_test_10_shot_100_cp"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT -out_file metric_test_10_shot_100_cp.txt
# echo "Test case contextual precision finished"
# finetuned
NUM_SHOT=0
# export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_ar"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT #> metric_test_ft_100_ar.txt
# echo "Test case answer relevancy finished"
# export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_crec"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT #> metric_test_ft_100_crec.txt
# echo "Test case contexual recall finished"
# export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_cp"
# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT > metric_test_ft_100_cp.txt
# echo "Test case contextual precision finished"
# python mse_ollama_run.py --num $NUM_TEST_CASES --test crel --out_file metric_test_orig_100_crel.txt
# echo "Test case contextual relevancy finished"
# python mse_ollama_run.py --num $NUM_TEST_CASES --test f --out_file metric_test_orig_100_f.txt
# echo "Test case faithfulness finished"
# python mse_jsonl_resize.py
# python finetune.py
# echo "Original Llemma Model"
# echo "Processing 0 shot 100 test cases"
# CUDA_VISIBLE_DEVICES=0 python mse_deepeval_dataset.py --num 100 --shot 0 --dataset mse_llemma_orig_100_case_0_shot
# echo "Processing 1 shot 100 test cases"
# CUDA_VISIBLE_DEVICES=0 python mse_deepeval_dataset.py --num 100 --shot 1 --dataset mse_llemma_orig_100_case_1_shot
# echo "Processing 5 shot 100 test cases"
# CUDA_VISIBLE_DEVICES=0 python mse_deepeval_dataset.py --num 100 --shot 5 --dataset mse_llemma_orig_100_case_5_shot
# echo "Processing 10 shot 100 test cases"
# CUDA_VISIBLE_DEVICES=0 python mse_deepeval_dataset.py --num 100 --shot 10 --dataset mse_llemma_orig_100_case_10_shot