dh-mc commited on
Commit
58a3992
·
1 Parent(s): 144336f
.gitattributes CHANGED
@@ -64,3 +64,6 @@ results/mgtv-results_merged_model.csv filter=lfs diff=lfs merge=lfs -text
64
  results/mgtv-results_p1_full_metrics.csv filter=lfs diff=lfs merge=lfs -text
65
  results/mgtv-results_p2_full_metrics.csv filter=lfs diff=lfs merge=lfs -text
66
  results/llama3-8b_lora_sft_bf16-p1.csv filter=lfs diff=lfs merge=lfs -text
 
67
  filter=lfs diff=lfs merge=lfs -text
 
 
 
64
  results/mgtv-results_p1_full_metrics.csv filter=lfs diff=lfs merge=lfs -text
65
  results/mgtv-results_p2_full_metrics.csv filter=lfs diff=lfs merge=lfs -text
66
  results/llama3-8b_lora_sft_bf16-p1.csv filter=lfs diff=lfs merge=lfs -text
67
+ llama-factory/data/Icon
68
  filter=lfs diff=lfs merge=lfs -text
69
+ llama-factory/data/alpaca_mgtv_p1_en.json filter=lfs diff=lfs merge=lfs -text
70
+ llama-factory/data/alpaca_mgtv_p2_en.json filter=lfs diff=lfs merge=lfs -text
competition/00a_InternLM2.5_Llama3_GLM4_Results.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
competition/00a_r2_InternLM2.5_Llama3_GLM4_Results.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
competition/03_EDA_en.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
competition/11b_Llama-3_8b_p1_r2_analysis.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
competition/11c_Llama-3_8b_p2_r2_analysis.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
llama-factory/config/llama3-8b_lora_sft_bf16-p1_en.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### model
2
+ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
3
+
4
+ ### method
5
+ stage: sft
6
+ do_train: true
7
+ finetuning_type: lora
8
+ lora_target: all
9
+ # quantization_bit: 4 # use 4-bit QLoRA
10
+ loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
11
+ # use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
12
+ upcast_layernorm: true
13
+
14
+ ### dataset
15
+ dataset: alpaca_mgtv_p1_en
16
+ template: llama3
17
+ cutoff_len: 4096
18
+ max_samples: 25000
19
+ overwrite_cache: true
20
+ preprocessing_num_workers: 16
21
+
22
+ ### output
23
+ output_dir: saves/llama3-8b/lora/sft_bf16_p1_full_en
24
+ logging_steps: 10
25
+ save_steps: 175
26
+ plot_loss: true
27
+ # overwrite_output_dir: true
28
+
29
+ ### train
30
+ per_device_train_batch_size: 16
31
+ gradient_accumulation_steps: 8
32
+ learning_rate: 1.0e-4
33
+ num_train_epochs: 1.0
34
+ lr_scheduler_type: cosine
35
+ warmup_ratio: 0.1
36
+ bf16: true
37
+ ddp_timeout: 180000000
38
+
39
+ ### eval
40
+ val_size: 0.1
41
+ per_device_eval_batch_size: 1
42
+ eval_strategy: steps
43
+ eval_steps: 175
44
+
45
+ report_to: wandb
46
+ run_name: llama3_8b_p1_en # optional
llama-factory/config/llama3-8b_lora_sft_bf16-p2_en.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### model
2
+ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
3
+
4
+ ### method
5
+ stage: sft
6
+ do_train: true
7
+ finetuning_type: lora
8
+ lora_target: all
9
+ # quantization_bit: 4 # use 4-bit QLoRA
10
+ loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
11
+ # use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
12
+ upcast_layernorm: true
13
+
14
+ ### dataset
15
+ dataset: alpaca_mgtv_p2_en
16
+ template: llama3
17
+ cutoff_len: 4096
18
+ max_samples: 25000
19
+ overwrite_cache: true
20
+ preprocessing_num_workers: 16
21
+
22
+ ### output
23
+ output_dir: saves/llama3-8b/lora/sft_bf16_p2_full_en
24
+ logging_steps: 10
25
+ save_steps: 175
26
+ plot_loss: true
27
+ # overwrite_output_dir: true
28
+
29
+ ### train
30
+ per_device_train_batch_size: 16
31
+ gradient_accumulation_steps: 8
32
+ learning_rate: 1.0e-4
33
+ num_train_epochs: 1.0
34
+ lr_scheduler_type: cosine
35
+ warmup_ratio: 0.1
36
+ bf16: true
37
+ ddp_timeout: 180000000
38
+
39
+ ### eval
40
+ val_size: 0.1
41
+ per_device_eval_batch_size: 1
42
+ eval_strategy: steps
43
+ eval_steps: 175
44
+
45
+ report_to: wandb
46
+ run_name: llama3_8b_p2_en # optional
llama-factory/data/alpaca_mgtv_p1_en.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d88d2715925c53b13b1e393a1e5c1b0a62a98ddd273a4a6ae6dbbfd6ef96c32c
3
+ size 44594582
llama-factory/data/alpaca_mgtv_p2_en.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89cd205feb846c67906926afdf1776ae05a210c6f1653a04e753f071f8506859
3
+ size 57644582
llama-factory/data/dataset_info.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a491ee69f583486d9649f5fb4ef2f06c72d5f4397d98afec654b1e917901e66a
3
- size 13750
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f4455a35b493baea9718535166c71778ba14994522057129a99b3614b659aa
3
+ size 13894
llm_toolkit/logical_reasoning_utils.py CHANGED
@@ -24,6 +24,22 @@ P1 = """你是一个逻辑游戏的主持人。游戏规则如下:
24
  参与者提出的问题: {}
25
  """
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  P2 = """你是一个情景猜谜游戏的主持人。游戏规则如下:
28
 
29
  1. 参与者会得到一个谜面,谜面会描述一个简单又难以理解的事件。
@@ -45,6 +61,34 @@ P2 = """你是一个情景猜谜游戏的主持人。游戏规则如下:
45
  **参与者提出的问题:** {}
46
  """
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  def extract_answer(text, debug=False):
50
  if text:
@@ -136,30 +180,7 @@ def load_logical_reasoning_dataset(
136
  )
137
 
138
  if tokenizer:
139
- reasoning_prompt = (
140
- (P1 if using_p1 else P2)
141
- if chinese_prompt
142
- else """You are the host of a situational guessing game. The rules of the game are as follows:
143
-
144
- 1. Participants will receive a riddle that describes a simple yet difficult to understand event.
145
- 2. The host knows the answer, which is the solution to the riddle.
146
- 3. Participants can ask any closed-ended questions to uncover the truth of the event.
147
- 4. For each question, the host will respond with one of the following five options based on the actual situation: Yes, No, Unimportant, Correct answer, or Incorrect questioning. The criteria for each response are as follows:
148
- - If the riddle and answer can provide an answer to the question, respond with: Yes or No
149
- - If the riddle and answer cannot directly or indirectly infer an answer to the question, respond with: Unimportant
150
- - If the participant's question is not a closed-ended question or is difficult to understand, respond with: Incorrect questioning
151
- - If the participant's question essentially reveals the truth of the answer, respond with: Correct answer
152
- 5. The response must not include any additional information, nor should any word be omitted from the options. For example, "No" cannot be abbreviated to "N".
153
-
154
- Please strictly follow these rules when answering the participant's questions.
155
-
156
- **Riddle:** {}
157
-
158
- **Answer:** {}
159
-
160
- **Participant's question:** {}
161
- """
162
- )
163
 
164
  def formatting_prompts_func(examples):
165
  inputs = examples["text"]
@@ -234,17 +255,22 @@ def load_alpaca_data(data_path, using_p1=True, use_english_datasets=False):
234
  else "llama-factory/data/alpaca_mgtv_p2.json"
235
  )
236
 
 
 
 
237
  if os.path.exists(alpaca_data_path):
238
  print("loading existing data from:", alpaca_data_path)
239
  data = pd.read_json(alpaca_data_path, orient="records", lines=False)
240
  return data
241
 
242
  print("loading new data from:", alpaca_data_path)
 
243
  datasets = load_logical_reasoning_dataset(
244
- data_path, chinese_prompt=not use_english_datasets
245
  )
246
 
247
- prompt_template = P1 if using_p1 else P2
 
248
  df_train = datasets["train"].to_pandas()
249
  df_train["instruction"] = df_train.apply(
250
  lambda x: prompt_template.format(x["puzzle"], x["truth"], x["text"]), axis=1
 
24
  参与者提出的问题: {}
25
  """
26
 
27
+ P1_en = """You are the host of a logic game. The rules of the game are as follows:
28
+
29
+ 1. Participants will receive a puzzle.
30
+ 2. Participants can ask questions to obtain clues and try to solve the puzzle.
31
+ 3. For each question, the host will answer with one of the following five options based on the actual situation: Yes, No, Unimportant, Correct answer, or Incorrect questioning.
32
+ 4. The answer cannot include any additional information, nor can any word in the options be omitted. For example, “No” cannot be shortened to “N”.
33
+ 5. Participants need to infer and ultimately find the correct answer to the puzzle based on the responses.
34
+
35
+ Please strictly adhere to these rules when answering participants’ questions.
36
+
37
+ Puzzle: {}
38
+
39
+ Actual situation: {}
40
+
41
+ Question from participants: {}"""
42
+
43
  P2 = """你是一个情景猜谜游戏的主持人。游戏规则如下:
44
 
45
  1. 参与者会得到一个谜面,谜面会描述一个简单又难以理解的事件。
 
61
  **参与者提出的问题:** {}
62
  """
63
 
64
+ P2_en = """You are the host of a situational guessing game. The rules of the game are as follows:
65
+
66
+ 1. Participants will receive a riddle that describes a simple yet difficult to understand event.
67
+ 2. The host knows the answer, which is the solution to the riddle.
68
+ 3. Participants can ask any closed-ended questions to uncover the truth of the event.
69
+ 4. For each question, the host will respond with one of the following five options based on the actual situation: Yes, No, Unimportant, Correct answer, or Incorrect questioning. The criteria for each response are as follows:
70
+ - If the riddle and answer can provide an answer to the question, respond with: Yes or No
71
+ - If the riddle and answer cannot directly or indirectly infer an answer to the question, respond with: Unimportant
72
+ - If the participant's question is not a closed-ended question or is difficult to understand, respond with: Incorrect questioning
73
+ - If the participant's question essentially reveals the truth of the answer, respond with: Correct answer
74
+ 5. The response must not include any additional information, nor should any word be omitted from the options. For example, "No" cannot be abbreviated to "N".
75
+
76
+ Please strictly follow these rules when answering the participant's questions.
77
+
78
+ **Riddle:** {}
79
+
80
+ **Answer:** {}
81
+
82
+ **Participant's question:** {}
83
+ """
84
+
85
+
86
+ def get_prompt_template(using_p1=True, chinese_prompt=True):
87
+ if using_p1:
88
+ return P1 if chinese_prompt else P1_en
89
+ else:
90
+ return P2 if chinese_prompt else P2_en
91
+
92
 
93
  def extract_answer(text, debug=False):
94
  if text:
 
180
  )
181
 
182
  if tokenizer:
183
+ reasoning_prompt = get_prompt_template(using_p1, chinese_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  def formatting_prompts_func(examples):
186
  inputs = examples["text"]
 
255
  else "llama-factory/data/alpaca_mgtv_p2.json"
256
  )
257
 
258
+ if use_english_datasets:
259
+ alpaca_data_path = alpaca_data_path.replace(".json", "_en.json")
260
+
261
  if os.path.exists(alpaca_data_path):
262
  print("loading existing data from:", alpaca_data_path)
263
  data = pd.read_json(alpaca_data_path, orient="records", lines=False)
264
  return data
265
 
266
  print("loading new data from:", alpaca_data_path)
267
+ chinese_prompt = not use_english_datasets
268
  datasets = load_logical_reasoning_dataset(
269
+ data_path, using_p1=using_p1, chinese_prompt=chinese_prompt
270
  )
271
 
272
+ prompt_template = get_prompt_template(using_p1, chinese_prompt)
273
+
274
  df_train = datasets["train"].to_pandas()
275
  df_train["instruction"] = df_train.apply(
276
  lambda x: prompt_template.format(x["puzzle"], x["truth"], x["text"]), axis=1
results/mgtv-llama3_p1_r2_full_metrics.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ epoch,model,accuracy,precision,recall,f1
2
+ 0,shenzhi-wang/Llama3-8B-Chinese-Chat_torch.bfloat16,0.7836666666666666,0.7667122897184859,0.7929173693086004,0.7679400621793133
3
+ 1,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-175_torch.bfloat16,0.7706666666666667,0.8072750943858197,0.7706666666666667,0.7835719791561528
4
+ 2,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-350_torch.bfloat16,0.724,0.8118050163437011,0.724,0.7562266825513707
5
+ 3,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-525_torch.bfloat16,0.6756666666666666,0.7811762160181578,0.6756666666666666,0.7108457483297581
6
+ 4,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-700_torch.bfloat16,0.6496666666666666,0.779896556141616,0.6496666666666666,0.6931844557591907
results/mgtv-llama3_p2_r2_full_metrics.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ epoch,model,accuracy,precision,recall,f1
2
+ 0,shenzhi-wang/Llama3-8B-Chinese-Chat_torch.bfloat16,0.73,0.7709739363586101,0.73,0.7462914191370829
3
+ 1,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-175_torch.bfloat16,0.718,0.8113087212796575,0.718,0.75010620119596
4
+ 2,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-350_torch.bfloat16,0.7273333333333334,0.8025119533060184,0.7273333333333334,0.7549822040428019
5
+ 3,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-525_torch.bfloat16,0.6883333333333334,0.7816168321128566,0.6883333333333334,0.716763388345211
6
+ 4,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-700_torch.bfloat16,0.6406666666666667,0.7636301286950402,0.6406666666666667,0.6807929233309169
scripts/eval-mgtv-llama3_8b_en.sh ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ BASEDIR=$(dirname "$0")
4
+ cd $BASEDIR/..
5
+ echo Current Directory:
6
+ pwd
7
+
8
+ BASEDIR=`pwd`
9
+
10
+ nvidia-smi
11
+ uname -a
12
+ cat /etc/os-release
13
+ lscpu
14
+ grep MemTotal /proc/meminfo
15
+
16
+ #pip install -r requirements.txt
17
+ #cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes]
18
+
19
+ export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
20
+ export RESIZE_TOKEN_EMBEDDINGS=true
21
+ export START_EPOCH=0
22
+ export USING_LLAMA_FACTORY=true
23
+
24
+ export MODEL_NAME=shenzhi-wang/Llama3-8B-Chinese-Chat
25
+ export MODEL_PREFIX=llama3-8b_lora_sft_bf16
26
+
27
+ export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p1_en.csv
28
+ export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p1_full_en
29
+ export USING_P1_PROMPT_TEMPLATE=true
30
+ echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
31
+ python llm_toolkit/eval_logical_reasoning_all_epochs.py
32
+
33
+ export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p2_en.csv
34
+ export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p2_full_en
35
+ export USING_P1_PROMPT_TEMPLATE=false
36
+ echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
37
+ python llm_toolkit/eval_logical_reasoning_all_epochs.py
scripts/tune-mgtv-llama3_8b_en.sh ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ BASEDIR=$(dirname "$0")
4
+ cd $BASEDIR/..
5
+ echo Current Directory:
6
+ pwd
7
+
8
+ BASEDIR=`pwd`
9
+
10
+ nvidia-smi
11
+ uname -a
12
+ cat /etc/os-release
13
+ lscpu
14
+ grep MemTotal /proc/meminfo
15
+
16
+ #pip install -r requirements.txt
17
+ #cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes] && cd $BASEDIR
18
+ #pip install transformers==4.41.2
19
+ #pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
20
+
21
+ export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
22
+
23
+ export MODEL_NAME=meta-llama/Meta-Llama-3-8B-Instruct
24
+
25
+ export MODEL_PREFIX=llama3-8b_lora_sft_bf16
26
+
27
+ export CONFIG_FILE=config/$MODEL_PREFIX-p1_en.yaml
28
+ echo "Tuning $MODEL_NAME with $CONFIG_FILE"
29
+ $BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
30
+
31
+
32
+ export CONFIG_FILE=config/$MODEL_PREFIX-p2_en.yaml
33
+ echo "Tuning $MODEL_NAME with $CONFIG_FILE"
34
+ $BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
35
+
36
+
37
+ $BASEDIR/scripts/eval-mgtv-llama3_8b_en.sh
scripts/tune-mgtv.sh CHANGED
@@ -1 +1 @@
1
- tune-mgtv-llama3_8b.sh
 
1
+ tune-mgtv-llama3_8b_en.sh