LlemmaFT / process_mse_data_2.sh

Upload 24 files

766ea9e verified about 1 month ago

5.68 kB

	#!/bin/bash

	#SBATCH --time=1:00:00 # walltime. hours:minutes:seconds
	#SBATCH --ntasks=8 # number of processor cores (i.e. tasks)
	#SBATCH --nodes=1 # number of nodes
	#SBATCH --gpus=1
	#SBATCH --mem=80G # 164G memory per CPU core
	#SBATCH --mail-user=aw742@byu.edu # email address
	#SBATCH --mail-type=BEGIN
	#SBATCH --mail-type=END
	#SBATCH --mail-type=FAIL
	#SBATCH --qos=cs
	#SBATCH --partition=cs

	# some helpful debugging options
	set -e
	set -u

	# LOAD MODULES, INSERT CODE, AND RUN YOUR PROGRAMS HERE
	# module load python/3.11

	source ./mse_env/Scripts/activate

	# json config = "max_samples": 500,

	# python mse_text_img_process.py
	# python convert_mse.py

	# pip install jsonlines
	# pip install deepeval

	NUM_TEST_CASES=100

	# python mse_ollama_run.py --num $NUM_TEST_CASES --test f --shot 0 --out_file metric_test_orig_100_f.txt
	# echo "Test case faithfulness finished"

	NUM_SHOT=0

	# set DEEPEVAL_RESULTS_FOLDER=.\data

	# deepeval set-local-model --model-name Hudson/llemma:7b
	# ollama pull Hudson/llemma:7b
	# deepeval set-ollama Hudson/llemma:7b

	# python mse_ollama_run.py --test ar --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_0_shot.json > metric_test_0_shot_100_ar.txt
	# echo "Test case answer relevancy finished"
	# python mse_ollama_run.py --test crec --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_0_shot.json #> metric_test_0_shot_100_crec.txt
	# echo "Test case contexual recall finished"
	# python mse_ollama_run.py --test cp --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_0_shot.json > metric_test_0_shot_100_cp.txt
	# echo "Test case contextual precision finished"

	NUM_SHOT=1

	# python mse_ollama_run.py --test ar --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_1_shot.json > metric_test_1_shot_100_ar.txt
	# echo "Test case answer relevancy finished"
	# python mse_ollama_run.py --test crec --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_1_shot.json #> metric_test_1_shot_100_crec.txt
	# echo "Test case contexual recall finished"
	# python mse_ollama_run.py --test cp --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_1_shot.json #> metric_test_1_shot_100_cp.txt
	# echo "Test case contextual precision finished"



	NUM_SHOT=5
	# python mse_ollama_run.py --test ar --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_5_shot.json > metric_test_5_shot_100_ar.txt
	# echo "Test case answer relevancy finished"
	# python mse_ollama_run.py --test crec --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_5_shot.json #> metric_test_5_shot_100_crec.txt
	# echo "Test case contexual recall finished"
	# python mse_ollama_run.py --test cp --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_5_shot.json #> metric_test_5_shot_100_cp.txt
	# echo "Test case contextual precision finished"

	# # python mse_ollama_run.py --num 25 --begin 0 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_cp.txt
	# # echo "Test case contextual precision finished"

	# # python mse_ollama_run.py --num 25 --begin 25 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b25_cp.txt
	# # echo "Test case contextual precision finished (start 25)"
	# # python mse_ollama_run.py --num 25 --begin 50 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b50_cp.txt
	# # echo "Test case contextual precision finished (start 50)"
	# # python mse_ollama_run.py --num 25 --begin 75 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b75_cp.txt
	# # echo "Test case contextual precision finished (start 75)"


	# NUM_SHOT=10
	# python mse_ollama_run.py --test ar --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_10_shot.json > metric_test_10_shot_100_ar.txt
	# echo "Test case answer relevancy finished"
	# python mse_ollama_run.py --test crec --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_10_shot.json #> metric_test_10_shot_100_crec.txt
	# echo "Test case contexual recall finished"
	# python mse_ollama_run.py --test cp --dataset ./deepeval-test-dataset/mse_llemma_orig_100_case_10_shot.json #> metric_test_10_shot_100_cp.txt
	# echo "Test case contextual precision finished"

	# finetuned
	NUM_SHOT=0
	# export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_ar"
	# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT --out_file metric_test_ft_100_ar.docx
	# echo "Test case answer relevancy finished"
	# export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_crec"
	# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT --out_file metric_test_ft_100_crec.docx
	# echo "Test case contexual recall finished"
	# export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_cp"
	# python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT --out_file metric_test_ft_100_cp.docx
	# echo "Test case contextual precision finished"

	python mse_ollama_run_ft.py --test ar --dataset ./deepeval-test-dataset/mse_llemma_ft_100_case_0_shot.json #> metric_test_ft_100_ar.txt
	echo "Test case answer relevancy finished"
	python mse_ollama_run_ft.py --test crec --dataset ./deepeval-test-dataset/mse_llemma_ft_100_case_0_shot.json > metric_test_ft_100_crec.txt
	echo "Test case contexual recall finished"
	python mse_ollama_run_ft.py --test cp --dataset ./deepeval-test-dataset/mse_llemma_ft_100_case_0_shot.json > metric_test_ft_100_cp.txt
	echo "Test case contextual precision finished"

	# python mse_ollama_run.py --num $NUM_TEST_CASES --test crel --out_file metric_test_orig_100_crel.txt
	# echo "Test case contextual relevancy finished"


	# python mse_ollama_run.py --num $NUM_TEST_CASES --test f --out_file metric_test_orig_100_f.txt
	# echo "Test case faithfulness finished"

	# python mse_jsonl_resize.py

	# python finetune.py