dh-mc commited on
Commit
16974cb
·
1 Parent(s): c4388ab

llama3 epoch 6 shows promising results; adding 4 epochs

Browse files
competition/00a_InternLM_2.5_Results.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
competition/11_Llama-3_8b_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
competition/11a_Llama-3_8b_p2_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
llama-factory/config/llama3-8b_lora_sft_bf16-p1.yaml CHANGED
@@ -25,13 +25,13 @@ output_dir: saves/llama3-8b/lora/sft_bf16_p1_full
25
  logging_steps: 10
26
  save_steps: 175
27
  plot_loss: true
28
- overwrite_output_dir: true
29
 
30
  ### train
31
  per_device_train_batch_size: 16
32
  gradient_accumulation_steps: 8
33
  learning_rate: 1.0e-4
34
- num_train_epochs: 6.0
35
  lr_scheduler_type: cosine
36
  warmup_ratio: 0.1
37
  bf16: true
 
25
  logging_steps: 10
26
  save_steps: 175
27
  plot_loss: true
28
+ # overwrite_output_dir: true
29
 
30
  ### train
31
  per_device_train_batch_size: 16
32
  gradient_accumulation_steps: 8
33
  learning_rate: 1.0e-4
34
+ num_train_epochs: 10.0
35
  lr_scheduler_type: cosine
36
  warmup_ratio: 0.1
37
  bf16: true
llama-factory/config/llama3-8b_lora_sft_bf16-p2.yaml CHANGED
@@ -31,7 +31,7 @@ plot_loss: true
31
  per_device_train_batch_size: 16
32
  gradient_accumulation_steps: 8
33
  learning_rate: 1.0e-4
34
- num_train_epochs: 6.0
35
  lr_scheduler_type: cosine
36
  warmup_ratio: 0.1
37
  bf16: true
 
31
  per_device_train_batch_size: 16
32
  gradient_accumulation_steps: 8
33
  learning_rate: 1.0e-4
34
+ num_train_epochs: 10.0
35
  lr_scheduler_type: cosine
36
  warmup_ratio: 0.1
37
  bf16: true
results/mgtv-llama3_p1_full_metrics.csv CHANGED
@@ -1,4 +1,8 @@
1
  epoch,model,accuracy,precision,recall,f1
2
  0,shenzhi-wang/Llama3-8B-Chinese-Chat,0.7836666666666666,0.7667122897184859,0.7929173693086004,0.7679400621793133
3
- 1,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-175,0.5686666666666667,0.8071228551961105,0.5686666666666667,0.625398807088777
4
- 2,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-350,0.7043333333333334,0.8108167278539298,0.7043333333333334,0.7421863499027709
 
 
 
 
 
1
  epoch,model,accuracy,precision,recall,f1
2
  0,shenzhi-wang/Llama3-8B-Chinese-Chat,0.7836666666666666,0.7667122897184859,0.7929173693086004,0.7679400621793133
3
+ 1,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-175,0.292,0.7244497001864332,0.292,0.39603584197818525
4
+ 2,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-350,0.314,0.8230975844189062,0.314,0.44460745309834776
5
+ 3,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-525,,,,
6
+ 4,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-700,0.0,0.0,0.0,0.0
7
+ 5,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-875,,,,
8
+ 6,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-1050,0.5506666666666666,0.7209974218469999,0.5506666666666666,0.6017622173499129
results/mgtv-llama3_p2_full_metrics.csv CHANGED
@@ -1,5 +1,8 @@
1
  epoch,model,accuracy,precision,recall,f1
2
  0,shenzhi-wang/Llama3-8B-Chinese-Chat,0.73,0.7709739363586101,0.73,0.7462914191370829
3
- 1,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-175,0.773,0.7739158621170704,0.773,0.7642801051494378
4
- 2,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-350,0.7046666666666667,0.814516278555831,0.7046666666666667,0.7453647242165446
5
- 3,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-525,0.6793333333333333,0.8030704466494853,0.6793333333333333,0.7246368106499855
 
 
 
 
1
  epoch,model,accuracy,precision,recall,f1
2
  0,shenzhi-wang/Llama3-8B-Chinese-Chat,0.73,0.7709739363586101,0.73,0.7462914191370829
3
+ 1,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-175,,,,
4
+ 2,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-350,,,,
5
+ 3,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-525,,,,
6
+ 4,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-700,,,,
7
+ 5,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-875,,,,
8
+ 6,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-1050,0.6656666666666666,0.7924121951113166,0.6656666666666666,0.7124311215236507
scripts/eval-mgtv-llama3_8b.sh CHANGED
@@ -18,7 +18,7 @@ grep MemTotal /proc/meminfo
18
 
19
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
20
  export RESIZE_TOKEN_EMBEDDINGS=true
21
- export START_EPOCH=1
22
 
23
  #export MODEL_NAME=FlagAlpha/Llama3-Chinese-8B-Instruct
24
  export MODEL_NAME=shenzhi-wang/Llama3-8B-Chinese-Chat
 
18
 
19
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
20
  export RESIZE_TOKEN_EMBEDDINGS=true
21
+ export START_EPOCH=7
22
 
23
  #export MODEL_NAME=FlagAlpha/Llama3-Chinese-8B-Instruct
24
  export MODEL_NAME=shenzhi-wang/Llama3-8B-Chinese-Chat