Spaces:
Build error
Build error
llama3 epoch 6 shows promising results; adding 4 epochs
Browse files- competition/00a_InternLM_2.5_Results.ipynb +0 -0
- competition/11_Llama-3_8b_analysis.ipynb +0 -0
- competition/11a_Llama-3_8b_p2_analysis.ipynb +0 -0
- llama-factory/config/llama3-8b_lora_sft_bf16-p1.yaml +2 -2
- llama-factory/config/llama3-8b_lora_sft_bf16-p2.yaml +1 -1
- results/mgtv-llama3_p1_full_metrics.csv +6 -2
- results/mgtv-llama3_p2_full_metrics.csv +6 -3
- scripts/eval-mgtv-llama3_8b.sh +1 -1
competition/00a_InternLM_2.5_Results.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
competition/11_Llama-3_8b_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
competition/11a_Llama-3_8b_p2_analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
llama-factory/config/llama3-8b_lora_sft_bf16-p1.yaml
CHANGED
@@ -25,13 +25,13 @@ output_dir: saves/llama3-8b/lora/sft_bf16_p1_full
|
|
25 |
logging_steps: 10
|
26 |
save_steps: 175
|
27 |
plot_loss: true
|
28 |
-
overwrite_output_dir: true
|
29 |
|
30 |
### train
|
31 |
per_device_train_batch_size: 16
|
32 |
gradient_accumulation_steps: 8
|
33 |
learning_rate: 1.0e-4
|
34 |
-
num_train_epochs:
|
35 |
lr_scheduler_type: cosine
|
36 |
warmup_ratio: 0.1
|
37 |
bf16: true
|
|
|
25 |
logging_steps: 10
|
26 |
save_steps: 175
|
27 |
plot_loss: true
|
28 |
+
# overwrite_output_dir: true
|
29 |
|
30 |
### train
|
31 |
per_device_train_batch_size: 16
|
32 |
gradient_accumulation_steps: 8
|
33 |
learning_rate: 1.0e-4
|
34 |
+
num_train_epochs: 10.0
|
35 |
lr_scheduler_type: cosine
|
36 |
warmup_ratio: 0.1
|
37 |
bf16: true
|
llama-factory/config/llama3-8b_lora_sft_bf16-p2.yaml
CHANGED
@@ -31,7 +31,7 @@ plot_loss: true
|
|
31 |
per_device_train_batch_size: 16
|
32 |
gradient_accumulation_steps: 8
|
33 |
learning_rate: 1.0e-4
|
34 |
-
num_train_epochs:
|
35 |
lr_scheduler_type: cosine
|
36 |
warmup_ratio: 0.1
|
37 |
bf16: true
|
|
|
31 |
per_device_train_batch_size: 16
|
32 |
gradient_accumulation_steps: 8
|
33 |
learning_rate: 1.0e-4
|
34 |
+
num_train_epochs: 10.0
|
35 |
lr_scheduler_type: cosine
|
36 |
warmup_ratio: 0.1
|
37 |
bf16: true
|
results/mgtv-llama3_p1_full_metrics.csv
CHANGED
@@ -1,4 +1,8 @@
|
|
1 |
epoch,model,accuracy,precision,recall,f1
|
2 |
0,shenzhi-wang/Llama3-8B-Chinese-Chat,0.7836666666666666,0.7667122897184859,0.7929173693086004,0.7679400621793133
|
3 |
-
1,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-175,0.
|
4 |
-
2,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-350,0.
|
|
|
|
|
|
|
|
|
|
1 |
epoch,model,accuracy,precision,recall,f1
|
2 |
0,shenzhi-wang/Llama3-8B-Chinese-Chat,0.7836666666666666,0.7667122897184859,0.7929173693086004,0.7679400621793133
|
3 |
+
1,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-175,0.292,0.7244497001864332,0.292,0.39603584197818525
|
4 |
+
2,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-350,0.314,0.8230975844189062,0.314,0.44460745309834776
|
5 |
+
3,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-525,,,,
|
6 |
+
4,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-700,0.0,0.0,0.0,0.0
|
7 |
+
5,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-875,,,,
|
8 |
+
6,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-1050,0.5506666666666666,0.7209974218469999,0.5506666666666666,0.6017622173499129
|
results/mgtv-llama3_p2_full_metrics.csv
CHANGED
@@ -1,5 +1,8 @@
|
|
1 |
epoch,model,accuracy,precision,recall,f1
|
2 |
0,shenzhi-wang/Llama3-8B-Chinese-Chat,0.73,0.7709739363586101,0.73,0.7462914191370829
|
3 |
-
1,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-175
|
4 |
-
2,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-350
|
5 |
-
3,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-525
|
|
|
|
|
|
|
|
1 |
epoch,model,accuracy,precision,recall,f1
|
2 |
0,shenzhi-wang/Llama3-8B-Chinese-Chat,0.73,0.7709739363586101,0.73,0.7462914191370829
|
3 |
+
1,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-175,,,,
|
4 |
+
2,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-350,,,,
|
5 |
+
3,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-525,,,,
|
6 |
+
4,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-700,,,,
|
7 |
+
5,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-875,,,,
|
8 |
+
6,shenzhi-wang/Llama3-8B-Chinese-Chat_checkpoint-1050,0.6656666666666666,0.7924121951113166,0.6656666666666666,0.7124311215236507
|
scripts/eval-mgtv-llama3_8b.sh
CHANGED
@@ -18,7 +18,7 @@ grep MemTotal /proc/meminfo
|
|
18 |
|
19 |
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
20 |
export RESIZE_TOKEN_EMBEDDINGS=true
|
21 |
-
export START_EPOCH=
|
22 |
|
23 |
#export MODEL_NAME=FlagAlpha/Llama3-Chinese-8B-Instruct
|
24 |
export MODEL_NAME=shenzhi-wang/Llama3-8B-Chinese-Chat
|
|
|
18 |
|
19 |
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
20 |
export RESIZE_TOKEN_EMBEDDINGS=true
|
21 |
+
export START_EPOCH=7
|
22 |
|
23 |
#export MODEL_NAME=FlagAlpha/Llama3-Chinese-8B-Instruct
|
24 |
export MODEL_NAME=shenzhi-wang/Llama3-8B-Chinese-Chat
|