Spaces:
Build error
Build error
ready for generic prompt
Browse files- .env.example +1 -0
- .gitignore +1 -0
- llm_toolkit/eval_rpp.py +3 -2
- llm_toolkit/translation_utils.py +3 -2
- requirements.txt +1 -1
- scripts/eval-4gpu.sh +6 -1
- scripts/eval-mac.sh +10 -5
.env.example
CHANGED
@@ -2,6 +2,7 @@ MODEL_NAME=Qwen/Qwen2-7B-Instruct
|
|
2 |
|
3 |
BATCH_SIZE=2
|
4 |
MAX_NEW_TOKENS=300
|
|
|
5 |
|
6 |
HF_TOKEN=
|
7 |
|
|
|
2 |
|
3 |
BATCH_SIZE=2
|
4 |
MAX_NEW_TOKENS=300
|
5 |
+
USING_CHAT_TEMPLATE=true
|
6 |
|
7 |
HF_TOKEN=
|
8 |
|
.gitignore
CHANGED
@@ -151,3 +151,4 @@ dmypy.json
|
|
151 |
/llama.cpp
|
152 |
/llama-factory/config/models
|
153 |
/codedrive
|
|
|
|
151 |
/llama.cpp
|
152 |
/llama-factory/config/models
|
153 |
/codedrive
|
154 |
+
temp.csv
|
llm_toolkit/eval_rpp.py
CHANGED
@@ -27,6 +27,7 @@ data_path = os.getenv("DATA_PATH")
|
|
27 |
results_path = os.getenv("RESULTS_PATH")
|
28 |
batch_size = int(os.getenv("BATCH_SIZE", 1))
|
29 |
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
|
|
|
30 |
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 2048))
|
31 |
start_repetition_penalty = float(os.getenv("START_REPETITION_PENALTY", 1.0))
|
32 |
end_repetition_penalty = float(os.getenv("END_REPETITION_PENALTY", 1.3))
|
@@ -63,7 +64,7 @@ if is_cuda:
|
|
63 |
print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
64 |
print(f"{start_gpu_memory} GB of memory reserved.")
|
65 |
|
66 |
-
datasets = load_translation_dataset(data_path, tokenizer)
|
67 |
|
68 |
if len(sys.argv) > 1:
|
69 |
num = int(sys.argv[1])
|
@@ -82,7 +83,7 @@ def on_repetition_penalty_step_completed(model_name, predictions):
|
|
82 |
predictions,
|
83 |
)
|
84 |
|
85 |
-
metrics = calc_metrics(datasets["test"]["english"], predictions, debug=True)
|
86 |
print(f"{model_name} metrics: {metrics}")
|
87 |
|
88 |
|
|
|
27 |
results_path = os.getenv("RESULTS_PATH")
|
28 |
batch_size = int(os.getenv("BATCH_SIZE", 1))
|
29 |
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
|
30 |
+
using_chat_template = os.getenv("USING_CHAT_TEMPLATE") == "true"
|
31 |
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 2048))
|
32 |
start_repetition_penalty = float(os.getenv("START_REPETITION_PENALTY", 1.0))
|
33 |
end_repetition_penalty = float(os.getenv("END_REPETITION_PENALTY", 1.3))
|
|
|
64 |
print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
65 |
print(f"{start_gpu_memory} GB of memory reserved.")
|
66 |
|
67 |
+
datasets = load_translation_dataset(data_path, tokenizer, using_chat_template=using_chat_template)
|
68 |
|
69 |
if len(sys.argv) > 1:
|
70 |
num = int(sys.argv[1])
|
|
|
83 |
predictions,
|
84 |
)
|
85 |
|
86 |
+
metrics = calc_metrics(datasets["test"]["english"], predictions, datasets["test"]["chinese"], debug=True)
|
87 |
print(f"{model_name} metrics: {metrics}")
|
88 |
|
89 |
|
llm_toolkit/translation_utils.py
CHANGED
@@ -118,7 +118,7 @@ def get_few_shot_prompt(dataset, num_shots=5):
|
|
118 |
return translation_prompt
|
119 |
|
120 |
|
121 |
-
def load_translation_dataset(data_path, tokenizer=None, num_shots=0, for_openai=False):
|
122 |
train_data_file = data_path.replace(".tsv", "-train.tsv")
|
123 |
test_data_file = data_path.replace(".tsv", "-test.tsv")
|
124 |
|
@@ -187,7 +187,8 @@ def load_translation_dataset(data_path, tokenizer=None, num_shots=0, for_openai=
|
|
187 |
else:
|
188 |
prompt = tokenizer.apply_chat_template(
|
189 |
messages, tokenize=False, add_generation_prompt=True
|
190 |
-
)
|
|
|
191 |
prompts.append(prompt)
|
192 |
texts.append(prompt + output + tokenizer.eos_token)
|
193 |
|
|
|
118 |
return translation_prompt
|
119 |
|
120 |
|
121 |
+
def load_translation_dataset(data_path, tokenizer=None, num_shots=0, for_openai=False, using_chat_template=True):
|
122 |
train_data_file = data_path.replace(".tsv", "-train.tsv")
|
123 |
test_data_file = data_path.replace(".tsv", "-test.tsv")
|
124 |
|
|
|
187 |
else:
|
188 |
prompt = tokenizer.apply_chat_template(
|
189 |
messages, tokenize=False, add_generation_prompt=True
|
190 |
+
) if using_chat_template else prompt
|
191 |
+
|
192 |
prompts.append(prompt)
|
193 |
texts.append(prompt + output + tokenizer.eos_token)
|
194 |
|
requirements.txt
CHANGED
@@ -13,7 +13,7 @@ packaging
|
|
13 |
langchain_openai==0.1.13
|
14 |
wandb==0.17.6
|
15 |
transformers==4.43.3
|
16 |
-
bitsandbytes
|
17 |
sentencepiece==0.1.98
|
18 |
einops==0.8.0
|
19 |
accelerate==0.32.0
|
|
|
13 |
langchain_openai==0.1.13
|
14 |
wandb==0.17.6
|
15 |
transformers==4.43.3
|
16 |
+
bitsandbytes #==0.43.3
|
17 |
sentencepiece==0.1.98
|
18 |
einops==0.8.0
|
19 |
accelerate==0.32.0
|
scripts/eval-4gpu.sh
CHANGED
@@ -31,7 +31,12 @@ export LOAD_IN_4BIT=true
|
|
31 |
export MAX_NEW_TOKENS=2048
|
32 |
export START_REPETITION_PENALTY=1.0
|
33 |
export END_REPETITION_PENALTY=1.1
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
./scripts/eval-rpp.sh shenzhi-wang Llama3.1-70B-Chinese-Chat checkpoint-210
|
37 |
|
|
|
31 |
export MAX_NEW_TOKENS=2048
|
32 |
export START_REPETITION_PENALTY=1.0
|
33 |
export END_REPETITION_PENALTY=1.1
|
34 |
+
|
35 |
+
export USING_CHAT_TEMPLATE=false
|
36 |
+
export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048_generic_prompt.csv
|
37 |
+
|
38 |
+
# export USING_CHAT_TEMPLATE=true
|
39 |
+
# export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048.csv
|
40 |
|
41 |
./scripts/eval-rpp.sh shenzhi-wang Llama3.1-70B-Chinese-Chat checkpoint-210
|
42 |
|
scripts/eval-mac.sh
CHANGED
@@ -43,14 +43,19 @@ export RESULTS_PATH=results/mac-results_fine_tuned.csv
|
|
43 |
export MAX_NEW_TOKENS=2048
|
44 |
export START_REPETITION_PENALTY=1.0
|
45 |
export END_REPETITION_PENALTY=1.1
|
46 |
-
export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048.csv
|
47 |
|
48 |
-
|
|
|
49 |
|
50 |
-
#
|
|
|
51 |
|
52 |
-
|
53 |
|
54 |
-
|
|
|
|
|
|
|
|
|
55 |
|
56 |
./scripts/eval-rpp.sh microsoft Phi-3.5-mini-instruct checkpoint-210
|
|
|
43 |
export MAX_NEW_TOKENS=2048
|
44 |
export START_REPETITION_PENALTY=1.0
|
45 |
export END_REPETITION_PENALTY=1.1
|
|
|
46 |
|
47 |
+
export USING_CHAT_TEMPLATE=false
|
48 |
+
export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048_generic_prompt.csv
|
49 |
|
50 |
+
# export USING_CHAT_TEMPLATE=true
|
51 |
+
# export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048.csv
|
52 |
|
53 |
+
./scripts/eval-rpp.sh internlm internlm2_5-7b-chat checkpoint-140
|
54 |
|
55 |
+
./scripts/eval-rpp.sh Qwen Qwen2-7B-Instruct checkpoint-105
|
56 |
+
|
57 |
+
./scripts/eval-rpp.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat checkpoint-70
|
58 |
+
|
59 |
+
./scripts/eval-rpp.sh shenzhi-wang Llama3.1-8B-Chinese-Chat checkpoint-105
|
60 |
|
61 |
./scripts/eval-rpp.sh microsoft Phi-3.5-mini-instruct checkpoint-210
|