Spaces:
Build error
Build error
ready for few shots eval
Browse files- llm_toolkit/eval.py +0 -67
- llm_toolkit/eval_lf.py +0 -110
- llm_toolkit/eval_shots.py +166 -0
- llm_toolkit/logical_reasoning_utils.py +1 -1
- notebooks/05_Few-shot_Prompting_Anthropic.ipynb +0 -0
- notebooks/05b_Anthropic-Models_analysis.ipynb +0 -0
- scripts/eval-mgtv-shots.sh +24 -0
- scripts/eval-shots.sh +21 -0
llm_toolkit/eval.py
DELETED
@@ -1,67 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import sys
|
3 |
-
import torch
|
4 |
-
from dotenv import find_dotenv, load_dotenv
|
5 |
-
|
6 |
-
found_dotenv = find_dotenv(".env")
|
7 |
-
|
8 |
-
if len(found_dotenv) == 0:
|
9 |
-
found_dotenv = find_dotenv(".env.example")
|
10 |
-
print(f"loading env vars from: {found_dotenv}")
|
11 |
-
load_dotenv(found_dotenv, override=False)
|
12 |
-
|
13 |
-
path = os.path.dirname(found_dotenv)
|
14 |
-
print(f"Adding {path} to sys.path")
|
15 |
-
sys.path.append(path)
|
16 |
-
|
17 |
-
from llm_toolkit.translation_engine import *
|
18 |
-
from llm_toolkit.translation_utils import *
|
19 |
-
|
20 |
-
model_name = os.getenv("MODEL_NAME")
|
21 |
-
adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
|
22 |
-
load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
|
23 |
-
data_path = os.getenv("DATA_PATH")
|
24 |
-
results_path = os.getenv("RESULTS_PATH")
|
25 |
-
|
26 |
-
print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)
|
27 |
-
|
28 |
-
gpu_stats = torch.cuda.get_device_properties(0)
|
29 |
-
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
30 |
-
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
31 |
-
print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
32 |
-
print(f"{start_gpu_memory} GB of memory reserved.")
|
33 |
-
|
34 |
-
model, tokenizer = load_model(
|
35 |
-
model_name, load_in_4bit=load_in_4bit, adapter_name_or_path=adapter_name_or_path
|
36 |
-
)
|
37 |
-
|
38 |
-
gpu_stats = torch.cuda.get_device_properties(0)
|
39 |
-
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
40 |
-
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
41 |
-
print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
42 |
-
print(f"{start_gpu_memory} GB of memory reserved.")
|
43 |
-
|
44 |
-
datasets = load_translation_dataset(data_path, tokenizer)
|
45 |
-
|
46 |
-
print("Evaluating model: " + model_name)
|
47 |
-
predictions = eval_model(model, tokenizer, datasets["test"])
|
48 |
-
|
49 |
-
gpu_stats = torch.cuda.get_device_properties(0)
|
50 |
-
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
51 |
-
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
52 |
-
print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
53 |
-
print(f"{start_gpu_memory} GB of memory reserved.")
|
54 |
-
|
55 |
-
if adapter_name_or_path is not None:
|
56 |
-
model_name += "_" + adapter_name_or_path.split("/")[-1]
|
57 |
-
|
58 |
-
save_results(
|
59 |
-
model_name,
|
60 |
-
results_path,
|
61 |
-
datasets["test"],
|
62 |
-
predictions,
|
63 |
-
debug=True,
|
64 |
-
)
|
65 |
-
|
66 |
-
metrics = calc_metrics(datasets["test"]["english"], predictions, debug=True)
|
67 |
-
print(metrics)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm_toolkit/eval_lf.py
DELETED
@@ -1,110 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import sys
|
3 |
-
import torch
|
4 |
-
from dotenv import find_dotenv, load_dotenv
|
5 |
-
from llamafactory.chat import ChatModel
|
6 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
7 |
-
|
8 |
-
found_dotenv = find_dotenv(".env")
|
9 |
-
|
10 |
-
if len(found_dotenv) == 0:
|
11 |
-
found_dotenv = find_dotenv(".env.example")
|
12 |
-
print(f"loading env vars from: {found_dotenv}")
|
13 |
-
load_dotenv(found_dotenv, override=False)
|
14 |
-
|
15 |
-
path = os.path.dirname(found_dotenv)
|
16 |
-
print(f"Adding {path} to sys.path")
|
17 |
-
sys.path.append(path)
|
18 |
-
|
19 |
-
from llm_toolkit.translation_utils import *
|
20 |
-
|
21 |
-
model_name = os.getenv("MODEL_NAME")
|
22 |
-
adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
|
23 |
-
load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
|
24 |
-
data_path = os.getenv("DATA_PATH")
|
25 |
-
results_path = os.getenv("RESULTS_PATH")
|
26 |
-
|
27 |
-
print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)
|
28 |
-
|
29 |
-
|
30 |
-
def load_model(
|
31 |
-
model_name,
|
32 |
-
max_seq_length=2048,
|
33 |
-
dtype=torch.bfloat16,
|
34 |
-
load_in_4bit=False,
|
35 |
-
adapter_name_or_path=None,
|
36 |
-
):
|
37 |
-
print(f"loading model: {model_name}")
|
38 |
-
|
39 |
-
if adapter_name_or_path:
|
40 |
-
template = "llama3" if "llama-3" in model_name.lower() else "chatml"
|
41 |
-
|
42 |
-
args = dict(
|
43 |
-
model_name_or_path=model_name,
|
44 |
-
adapter_name_or_path=adapter_name_or_path, # load the saved LoRA adapters
|
45 |
-
template=template, # same to the one in training
|
46 |
-
finetuning_type="lora", # same to the one in training
|
47 |
-
quantization_bit=4 if load_in_4bit else None, # load 4-bit quantized model
|
48 |
-
)
|
49 |
-
chat_model = ChatModel(args)
|
50 |
-
return chat_model.engine.model, chat_model.engine.tokenizer
|
51 |
-
|
52 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
53 |
-
bnb_config = BitsAndBytesConfig(
|
54 |
-
load_in_4bit=load_in_4bit,
|
55 |
-
bnb_4bit_quant_type="nf4",
|
56 |
-
bnb_4bit_use_double_quant=False,
|
57 |
-
bnb_4bit_compute_dtype=dtype,
|
58 |
-
)
|
59 |
-
|
60 |
-
model = AutoModelForCausalLM.from_pretrained(
|
61 |
-
model_name,
|
62 |
-
quantization_config=bnb_config,
|
63 |
-
torch_dtype=dtype,
|
64 |
-
trust_remote_code=True,
|
65 |
-
device_map="auto",
|
66 |
-
)
|
67 |
-
|
68 |
-
return model, tokenizer
|
69 |
-
|
70 |
-
|
71 |
-
gpu_stats = torch.cuda.get_device_properties(0)
|
72 |
-
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
73 |
-
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
74 |
-
print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
75 |
-
print(f"{start_gpu_memory} GB of memory reserved.")
|
76 |
-
|
77 |
-
model, tokenizer = load_model(
|
78 |
-
model_name, load_in_4bit=load_in_4bit, adapter_name_or_path=adapter_name_or_path
|
79 |
-
)
|
80 |
-
|
81 |
-
gpu_stats = torch.cuda.get_device_properties(0)
|
82 |
-
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
83 |
-
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
84 |
-
print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
85 |
-
print(f"{start_gpu_memory} GB of memory reserved.")
|
86 |
-
|
87 |
-
datasets = load_translation_dataset(data_path, tokenizer)
|
88 |
-
|
89 |
-
print("Evaluating model: " + model_name)
|
90 |
-
predictions = eval_model(model, tokenizer, datasets["test"])
|
91 |
-
|
92 |
-
gpu_stats = torch.cuda.get_device_properties(0)
|
93 |
-
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
94 |
-
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
95 |
-
print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
96 |
-
print(f"{start_gpu_memory} GB of memory reserved.")
|
97 |
-
|
98 |
-
if adapter_name_or_path is not None:
|
99 |
-
model_name += "_" + adapter_name_or_path.split("/")[-1]
|
100 |
-
|
101 |
-
save_results(
|
102 |
-
model_name,
|
103 |
-
results_path,
|
104 |
-
datasets["test"],
|
105 |
-
predictions,
|
106 |
-
debug=True,
|
107 |
-
)
|
108 |
-
|
109 |
-
metrics = calc_metrics(datasets["test"]["english"], predictions, debug=True)
|
110 |
-
print(metrics)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm_toolkit/eval_shots.py
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import torch
|
4 |
+
from dotenv import find_dotenv, load_dotenv
|
5 |
+
|
6 |
+
found_dotenv = find_dotenv(".env")
|
7 |
+
|
8 |
+
if len(found_dotenv) == 0:
|
9 |
+
found_dotenv = find_dotenv(".env.example")
|
10 |
+
print(f"loading env vars from: {found_dotenv}")
|
11 |
+
load_dotenv(found_dotenv, override=False)
|
12 |
+
|
13 |
+
path = os.path.dirname(found_dotenv)
|
14 |
+
print(f"Adding {path} to sys.path")
|
15 |
+
sys.path.append(path)
|
16 |
+
|
17 |
+
from llm_toolkit.llm_utils import *
|
18 |
+
from llm_toolkit.logical_reasoning_utils import *
|
19 |
+
|
20 |
+
device = check_gpu()
|
21 |
+
is_cuda = torch.cuda.is_available()
|
22 |
+
|
23 |
+
model_name = os.getenv("MODEL_NAME")
|
24 |
+
adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
|
25 |
+
load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
|
26 |
+
data_path = os.getenv("LOGICAL_REASONING_DATA_PATH")
|
27 |
+
results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
|
28 |
+
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
|
29 |
+
batch_size = int(os.getenv("BATCH_SIZE", 1))
|
30 |
+
using_llama_factory = os.getenv("USING_LLAMA_FACTORY") == "true"
|
31 |
+
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 2048))
|
32 |
+
start_num_shots = int(os.getenv("START_NUM_SHOTS", 0))
|
33 |
+
|
34 |
+
print(
|
35 |
+
model_name,
|
36 |
+
adapter_name_or_path,
|
37 |
+
load_in_4bit,
|
38 |
+
data_path,
|
39 |
+
results_path,
|
40 |
+
max_new_tokens,
|
41 |
+
batch_size,
|
42 |
+
)
|
43 |
+
|
44 |
+
dtype = (
|
45 |
+
torch.float32
|
46 |
+
if os.getenv("USE_FLOAT32_FOR_INFERENCE") == "true"
|
47 |
+
else (
|
48 |
+
torch.bfloat16
|
49 |
+
if os.getenv("USE_BF16_FOR_INFERENCE") == "true"
|
50 |
+
else torch.float16
|
51 |
+
)
|
52 |
+
)
|
53 |
+
|
54 |
+
if is_cuda:
|
55 |
+
torch.cuda.empty_cache()
|
56 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
57 |
+
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
58 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
59 |
+
print(f"(0) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
60 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
61 |
+
|
62 |
+
torch.cuda.empty_cache()
|
63 |
+
|
64 |
+
model, tokenizer = load_model(
|
65 |
+
model_name,
|
66 |
+
load_in_4bit=load_in_4bit,
|
67 |
+
adapter_name_or_path=adapter_name_or_path,
|
68 |
+
using_llama_factory=using_llama_factory,
|
69 |
+
dtype=dtype,
|
70 |
+
)
|
71 |
+
|
72 |
+
if is_cuda:
|
73 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
74 |
+
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
75 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
76 |
+
print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
77 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
78 |
+
|
79 |
+
|
80 |
+
def on_num_shots_step_completed(model_name, dataset, predictions):
|
81 |
+
save_results(
|
82 |
+
model_name,
|
83 |
+
results_path,
|
84 |
+
dataset,
|
85 |
+
predictions,
|
86 |
+
)
|
87 |
+
|
88 |
+
metrics = calc_metrics(dataset["label"], predictions, debug=True)
|
89 |
+
print(f"{model_name} metrics: {metrics}")
|
90 |
+
|
91 |
+
|
92 |
+
if adapter_name_or_path is not None:
|
93 |
+
model_name += "/" + adapter_name_or_path.split("/")[-1]
|
94 |
+
|
95 |
+
|
96 |
+
def evaluate_model_with_num_shots(
|
97 |
+
model,
|
98 |
+
tokenizer,
|
99 |
+
model_name,
|
100 |
+
data_path,
|
101 |
+
start_num_shots=0,
|
102 |
+
range_num_shots=[10],
|
103 |
+
batch_size=1,
|
104 |
+
max_new_tokens=2048,
|
105 |
+
device="cuda",
|
106 |
+
):
|
107 |
+
print(f"Evaluating model: {model_name} on {device}")
|
108 |
+
|
109 |
+
for num_shots in range_num_shots:
|
110 |
+
if num_shots < start_num_shots:
|
111 |
+
continue
|
112 |
+
|
113 |
+
print(f"*** Evaluating with num_shots: {num_shots}")
|
114 |
+
|
115 |
+
datasets = load_logical_reasoning_dataset(
|
116 |
+
data_path,
|
117 |
+
tokenizer=tokenizer,
|
118 |
+
chinese_prompt=not use_english_datasets,
|
119 |
+
using_p1=False,
|
120 |
+
)
|
121 |
+
if len(sys.argv) > 1:
|
122 |
+
num = int(sys.argv[1])
|
123 |
+
if num > 0:
|
124 |
+
print(f"--- evaluating {num} entries")
|
125 |
+
datasets["test"] = datasets["test"].select(range(num))
|
126 |
+
|
127 |
+
print_row_details(datasets["test"].to_pandas(), indices=[0, -1])
|
128 |
+
|
129 |
+
predictions = eval_model(
|
130 |
+
model,
|
131 |
+
tokenizer,
|
132 |
+
datasets["test"],
|
133 |
+
device=device,
|
134 |
+
batch_size=batch_size,
|
135 |
+
max_new_tokens=max_new_tokens,
|
136 |
+
)
|
137 |
+
|
138 |
+
model_name_with_rp = f"{model_name}/shots-{num_shots:02d}"
|
139 |
+
|
140 |
+
try:
|
141 |
+
on_num_shots_step_completed(
|
142 |
+
model_name_with_rp,
|
143 |
+
datasets["test"],
|
144 |
+
predictions,
|
145 |
+
)
|
146 |
+
except Exception as e:
|
147 |
+
print(e)
|
148 |
+
|
149 |
+
|
150 |
+
evaluate_model_with_num_shots(
|
151 |
+
model,
|
152 |
+
tokenizer,
|
153 |
+
model_name,
|
154 |
+
data_path,
|
155 |
+
batch_size=batch_size,
|
156 |
+
max_new_tokens=max_new_tokens,
|
157 |
+
device=device,
|
158 |
+
start_num_shots=start_num_shots,
|
159 |
+
)
|
160 |
+
|
161 |
+
if is_cuda:
|
162 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
163 |
+
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
164 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
165 |
+
print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
166 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
llm_toolkit/logical_reasoning_utils.py
CHANGED
@@ -294,7 +294,7 @@ def load_logical_reasoning_dataset(
|
|
294 |
|
295 |
model_name = os.getenv("MODEL_NAME")
|
296 |
|
297 |
-
if "
|
298 |
messages = messages[1:]
|
299 |
|
300 |
texts = []
|
|
|
294 |
|
295 |
model_name = os.getenv("MODEL_NAME")
|
296 |
|
297 |
+
if "gemma" in model_name.lower():
|
298 |
messages = messages[1:]
|
299 |
|
300 |
texts = []
|
notebooks/05_Few-shot_Prompting_Anthropic.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/05b_Anthropic-Models_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
scripts/eval-mgtv-shots.sh
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
BASEDIR=$(dirname "$0")
|
4 |
+
cd $BASEDIR/..
|
5 |
+
echo Current Directory:
|
6 |
+
pwd
|
7 |
+
|
8 |
+
BASEDIR=`pwd`
|
9 |
+
|
10 |
+
nvidia-smi
|
11 |
+
uname -a
|
12 |
+
cat /etc/os-release
|
13 |
+
lscpu
|
14 |
+
grep MemTotal /proc/meminfo
|
15 |
+
|
16 |
+
$BASEDIR/scripts/eval-shots.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat
|
17 |
+
|
18 |
+
$BASEDIR/scripts/eval-shots.sh internlm internlm2_5-7b-chat
|
19 |
+
|
20 |
+
$BASEDIR/scripts/eval-shots.sh internlm internlm2_5-7b-chat-1m
|
21 |
+
|
22 |
+
$BASEDIR/scripts/eval-shots.sh Qwen Qwen2-7B-Instruct
|
23 |
+
|
24 |
+
$BASEDIR/scripts/eval-shots.sh shenzhi-wang Llama3.1-8B-Chinese-Chat
|
scripts/eval-shots.sh
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
BASEDIR=$(dirname "$0")
|
4 |
+
cd $BASEDIR/..
|
5 |
+
echo Current Directory:
|
6 |
+
pwd
|
7 |
+
|
8 |
+
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
9 |
+
export RESIZE_TOKEN_EMBEDDINGS=true
|
10 |
+
export USING_LLAMA_FACTORY=true
|
11 |
+
export USING_P1_PROMPT_TEMPLATE=false
|
12 |
+
export LOAD_IN_4BIT=false
|
13 |
+
|
14 |
+
export ORG_NAME=$1
|
15 |
+
export MODEL=$2
|
16 |
+
export MODEL_NAME=$ORG_NAME/$MODEL
|
17 |
+
|
18 |
+
export LOGICAL_REASONING_RESULTS_PATH=data/${MODEL}_results.csv
|
19 |
+
|
20 |
+
echo Evaluating $MODEL_NAME with few-shot learning
|
21 |
+
python llm_toolkit/eval_shots.py
|