dh-mc commited on
Commit
de63924
1 Parent(s): 927a69c

ready for generic prompt

Browse files
.env.example CHANGED
@@ -2,6 +2,7 @@ MODEL_NAME=Qwen/Qwen2-7B-Instruct
2
 
3
  BATCH_SIZE=2
4
  MAX_NEW_TOKENS=300
 
5
 
6
  HF_TOKEN=
7
 
 
2
 
3
  BATCH_SIZE=2
4
  MAX_NEW_TOKENS=300
5
+ USING_CHAT_TEMPLATE=true
6
 
7
  HF_TOKEN=
8
 
.gitignore CHANGED
@@ -151,3 +151,4 @@ dmypy.json
151
  /llama.cpp
152
  /llama-factory/config/models
153
  /codedrive
 
 
151
  /llama.cpp
152
  /llama-factory/config/models
153
  /codedrive
154
+ temp.csv
llm_toolkit/eval_rpp.py CHANGED
@@ -27,6 +27,7 @@ data_path = os.getenv("DATA_PATH")
27
  results_path = os.getenv("RESULTS_PATH")
28
  batch_size = int(os.getenv("BATCH_SIZE", 1))
29
  use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
 
30
  max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 2048))
31
  start_repetition_penalty = float(os.getenv("START_REPETITION_PENALTY", 1.0))
32
  end_repetition_penalty = float(os.getenv("END_REPETITION_PENALTY", 1.3))
@@ -63,7 +64,7 @@ if is_cuda:
63
  print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
64
  print(f"{start_gpu_memory} GB of memory reserved.")
65
 
66
- datasets = load_translation_dataset(data_path, tokenizer)
67
 
68
  if len(sys.argv) > 1:
69
  num = int(sys.argv[1])
@@ -82,7 +83,7 @@ def on_repetition_penalty_step_completed(model_name, predictions):
82
  predictions,
83
  )
84
 
85
- metrics = calc_metrics(datasets["test"]["english"], predictions, debug=True)
86
  print(f"{model_name} metrics: {metrics}")
87
 
88
 
 
27
  results_path = os.getenv("RESULTS_PATH")
28
  batch_size = int(os.getenv("BATCH_SIZE", 1))
29
  use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
30
+ using_chat_template = os.getenv("USING_CHAT_TEMPLATE") == "true"
31
  max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 2048))
32
  start_repetition_penalty = float(os.getenv("START_REPETITION_PENALTY", 1.0))
33
  end_repetition_penalty = float(os.getenv("END_REPETITION_PENALTY", 1.3))
 
64
  print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
65
  print(f"{start_gpu_memory} GB of memory reserved.")
66
 
67
+ datasets = load_translation_dataset(data_path, tokenizer, using_chat_template=using_chat_template)
68
 
69
  if len(sys.argv) > 1:
70
  num = int(sys.argv[1])
 
83
  predictions,
84
  )
85
 
86
+ metrics = calc_metrics(datasets["test"]["english"], predictions, datasets["test"]["chinese"], debug=True)
87
  print(f"{model_name} metrics: {metrics}")
88
 
89
 
llm_toolkit/translation_utils.py CHANGED
@@ -118,7 +118,7 @@ def get_few_shot_prompt(dataset, num_shots=5):
118
  return translation_prompt
119
 
120
 
121
- def load_translation_dataset(data_path, tokenizer=None, num_shots=0, for_openai=False):
122
  train_data_file = data_path.replace(".tsv", "-train.tsv")
123
  test_data_file = data_path.replace(".tsv", "-test.tsv")
124
 
@@ -187,7 +187,8 @@ def load_translation_dataset(data_path, tokenizer=None, num_shots=0, for_openai=
187
  else:
188
  prompt = tokenizer.apply_chat_template(
189
  messages, tokenize=False, add_generation_prompt=True
190
- )
 
191
  prompts.append(prompt)
192
  texts.append(prompt + output + tokenizer.eos_token)
193
 
 
118
  return translation_prompt
119
 
120
 
121
+ def load_translation_dataset(data_path, tokenizer=None, num_shots=0, for_openai=False, using_chat_template=True):
122
  train_data_file = data_path.replace(".tsv", "-train.tsv")
123
  test_data_file = data_path.replace(".tsv", "-test.tsv")
124
 
 
187
  else:
188
  prompt = tokenizer.apply_chat_template(
189
  messages, tokenize=False, add_generation_prompt=True
190
+ ) if using_chat_template else prompt
191
+
192
  prompts.append(prompt)
193
  texts.append(prompt + output + tokenizer.eos_token)
194
 
requirements.txt CHANGED
@@ -13,7 +13,7 @@ packaging
13
  langchain_openai==0.1.13
14
  wandb==0.17.6
15
  transformers==4.43.3
16
- bitsandbytes==0.43.3
17
  sentencepiece==0.1.98
18
  einops==0.8.0
19
  accelerate==0.32.0
 
13
  langchain_openai==0.1.13
14
  wandb==0.17.6
15
  transformers==4.43.3
16
+ bitsandbytes #==0.43.3
17
  sentencepiece==0.1.98
18
  einops==0.8.0
19
  accelerate==0.32.0
scripts/eval-4gpu.sh CHANGED
@@ -31,7 +31,12 @@ export LOAD_IN_4BIT=true
31
  export MAX_NEW_TOKENS=2048
32
  export START_REPETITION_PENALTY=1.0
33
  export END_REPETITION_PENALTY=1.1
34
- export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048.csv
 
 
 
 
 
35
 
36
  ./scripts/eval-rpp.sh shenzhi-wang Llama3.1-70B-Chinese-Chat checkpoint-210
37
 
 
31
  export MAX_NEW_TOKENS=2048
32
  export START_REPETITION_PENALTY=1.0
33
  export END_REPETITION_PENALTY=1.1
34
+
35
+ export USING_CHAT_TEMPLATE=false
36
+ export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048_generic_prompt.csv
37
+
38
+ # export USING_CHAT_TEMPLATE=true
39
+ # export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048.csv
40
 
41
  ./scripts/eval-rpp.sh shenzhi-wang Llama3.1-70B-Chinese-Chat checkpoint-210
42
 
scripts/eval-mac.sh CHANGED
@@ -43,14 +43,19 @@ export RESULTS_PATH=results/mac-results_fine_tuned.csv
43
  export MAX_NEW_TOKENS=2048
44
  export START_REPETITION_PENALTY=1.0
45
  export END_REPETITION_PENALTY=1.1
46
- export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048.csv
47
 
48
- # ./scripts/eval-rpp.sh internlm internlm2_5-7b-chat checkpoint-140
 
49
 
50
- # ./scripts/eval-rpp.sh Qwen Qwen2-7B-Instruct checkpoint-105
 
51
 
52
- # ./scripts/eval-rpp.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat checkpoint-70
53
 
54
- # ./scripts/eval-rpp.sh shenzhi-wang Llama3.1-8B-Chinese-Chat checkpoint-105
 
 
 
 
55
 
56
  ./scripts/eval-rpp.sh microsoft Phi-3.5-mini-instruct checkpoint-210
 
43
  export MAX_NEW_TOKENS=2048
44
  export START_REPETITION_PENALTY=1.0
45
  export END_REPETITION_PENALTY=1.1
 
46
 
47
+ export USING_CHAT_TEMPLATE=false
48
+ export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048_generic_prompt.csv
49
 
50
+ # export USING_CHAT_TEMPLATE=true
51
+ # export RESULTS_PATH=results/mac-results_rpp_with_mnt_2048.csv
52
 
53
+ ./scripts/eval-rpp.sh internlm internlm2_5-7b-chat checkpoint-140
54
 
55
+ ./scripts/eval-rpp.sh Qwen Qwen2-7B-Instruct checkpoint-105
56
+
57
+ ./scripts/eval-rpp.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat checkpoint-70
58
+
59
+ ./scripts/eval-rpp.sh shenzhi-wang Llama3.1-8B-Chinese-Chat checkpoint-105
60
 
61
  ./scripts/eval-rpp.sh microsoft Phi-3.5-mini-instruct checkpoint-210