dh-mc commited on
Commit
c4a6675
·
1 Parent(s): 1b0a558

ready for internlm r3

Browse files
competition/00d_Llama3_Results.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
competition/11b_Llama-3_8b_p1_en_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
competition/11b_Llama-3_8b_p2_en_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
competition/11d_Llama-3_8b_p1_r3_analysis.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
competition/11d_Llama-3_8b_p2_r3_analysis.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
llama-factory/config/internlm2_5_7b_lora_sft_bf16_p2_full_r3.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### model
2
+ model_name_or_path: internlm/internlm2_5-7b-chat-1m
3
+
4
+ ### method
5
+ stage: sft
6
+ do_train: true
7
+ finetuning_type: lora
8
+ lora_target: all
9
+ # quantization_bit: 4 # use 4-bit QLoRA
10
+ loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
11
+ # use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
12
+ upcast_layernorm: true
13
+
14
+ ### dataset
15
+ dataset: alpaca_mgtv_p2
16
+ template: intern2
17
+ cutoff_len: 8192
18
+ max_samples: 25000
19
+ overwrite_cache: true
20
+ preprocessing_num_workers: 16
21
+
22
+ ### output
23
+ output_dir: saves/internlm2_5_7b/lora/sft_bf16_p2_full_r3
24
+ logging_steps: 10
25
+ save_steps: 35
26
+ plot_loss: true
27
+ # overwrite_output_dir: true
28
+
29
+ ### train
30
+ per_device_train_batch_size: 16
31
+ gradient_accumulation_steps: 8
32
+ learning_rate: 1.0e-4
33
+ num_train_epochs: 2.0
34
+ lr_scheduler_type: cosine
35
+ warmup_ratio: 0.1
36
+ bf16: true
37
+ ddp_timeout: 180000000
38
+
39
+ ### eval
40
+ val_size: 0.1
41
+ per_device_eval_batch_size: 1
42
+ eval_strategy: steps
43
+ eval_steps: 35
44
+
45
+ report_to: wandb
46
+ run_name: internlm2_5_7b_p2_l40_r3 # optional
results/mgtv-llama3_p1_en_full_metrics.csv CHANGED
@@ -1,5 +1,5 @@
1
  epoch,model,accuracy,precision,recall,f1
2
- 0.0,meta-llama/Meta-Llama-3-8B-Instruct_torch.bfloat16_lf,0.13333333333333333,0.5430486329272943,0.13333333333333333,0.17807889451865855
3
  0.3333333333333333,meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-117_torch.bfloat16_lf,0.6486666666666666,0.6525934632970077,0.6486666666666666,0.6312721163517108
4
  0.6666666666666666,meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-234_torch.bfloat16_lf,0.561,0.6897096276142071,0.561,0.6083393704375663
5
  1.0,meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-351_torch.bfloat16_lf,0.621,0.686842945161901,0.621,0.6417441253605001
 
1
  epoch,model,accuracy,precision,recall,f1
2
+ 0.0,meta-llama/Meta-Llama-3-8B-Instruct_torch.bfloat16_lf,0.139,0.5741012854071863,0.13899999999999998,0.18649813199749535
3
  0.3333333333333333,meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-117_torch.bfloat16_lf,0.6486666666666666,0.6525934632970077,0.6486666666666666,0.6312721163517108
4
  0.6666666666666666,meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-234_torch.bfloat16_lf,0.561,0.6897096276142071,0.561,0.6083393704375663
5
  1.0,meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-351_torch.bfloat16_lf,0.621,0.686842945161901,0.621,0.6417441253605001
results/mgtv-llama3_p1_r3_full_metrics.csv ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ epoch,model,accuracy,precision,recall,f1
2
+ 0.0,hfl/llama-3-chinese-8b-instruct-v3_torch.bfloat16_lf,0.4563333333333333,0.6744501777722286,0.4563333333333333,0.5301217655617138
3
+ 0.2,hfl/llama-3-chinese-8b-instruct-v3/checkpoint-35_torch.bfloat16_lf,0.6406666666666667,0.7652410757316818,0.6406666666666667,0.686506602559313
4
+ 0.4,hfl/llama-3-chinese-8b-instruct-v3/checkpoint-70_torch.bfloat16_lf,0.7223333333333334,0.761495200766395,0.7223333333333334,0.7296692772947554
5
+ 0.6,hfl/llama-3-chinese-8b-instruct-v3/checkpoint-105_torch.bfloat16_lf,0.6256666666666667,0.7694288569564225,0.6256666666666667,0.6747421032154606
6
+ 0.8,hfl/llama-3-chinese-8b-instruct-v3/checkpoint-140_torch.bfloat16_lf,0.7173333333333334,0.7746925589996599,0.7173333333333334,0.7391047683054164
7
+ 1.0,hfl/llama-3-chinese-8b-instruct-v3/checkpoint-175_torch.bfloat16_lf,0.688,0.7678475683985698,0.688,0.7181965107150645
results/mgtv-llama3_p2_en_full_metrics.csv CHANGED
@@ -1,5 +1,5 @@
1
  epoch,model,accuracy,precision,recall,f1
2
- 0.0,meta-llama/Meta-Llama-3-8B-Instruct_torch.bfloat16_lf,0.17533333333333334,0.5690657625063544,0.17533333333333334,0.21427445914454485
3
  0.3333333333333333,meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-117_torch.bfloat16_lf,0.6203333333333333,0.663582082981778,0.6203333333333333,0.6363626392286635
4
  0.6666666666666666,meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-234_torch.bfloat16_lf,0.5613333333333334,0.7000506187405509,0.5613333333333334,0.6113039056178092
5
  1.0,meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-351_torch.bfloat16_lf,0.6203333333333333,0.6819200833733873,0.6203333333333333,0.6405153767205392
 
1
  epoch,model,accuracy,precision,recall,f1
2
+ 0.0,meta-llama/Meta-Llama-3-8B-Instruct_torch.bfloat16_lf,0.15466666666666667,0.5218515852800517,0.15466666666666667,0.17611762474007195
3
  0.3333333333333333,meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-117_torch.bfloat16_lf,0.6203333333333333,0.663582082981778,0.6203333333333333,0.6363626392286635
4
  0.6666666666666666,meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-234_torch.bfloat16_lf,0.5613333333333334,0.7000506187405509,0.5613333333333334,0.6113039056178092
5
  1.0,meta-llama/Meta-Llama-3-8B-Instruct/checkpoint-351_torch.bfloat16_lf,0.6203333333333333,0.6819200833733873,0.6203333333333333,0.6405153767205392
results/mgtv-llama3_p2_r3_full_metrics.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ epoch,model,accuracy,precision,recall,f1
2
+ 0.0,hfl/llama-3-chinese-8b-instruct-v3_torch.bfloat16_lf,0.25066666666666665,0.6852419041932336,0.25066666666666665,0.32636449818329016
3
+ 0.2,hfl/llama-3-chinese-8b-instruct-v3/checkpoint-35_torch.bfloat16_lf,0.7283333333333334,0.7722393813259697,0.7283333333333334,0.7426450360790026
4
+ 0.4,hfl/llama-3-chinese-8b-instruct-v3/checkpoint-70_torch.bfloat16_lf,0.741,0.7868300593752113,0.741,0.7514058688729928
scripts/eval-mgtv-internlm.sh CHANGED
@@ -20,8 +20,8 @@ pip install transformers==4.41.2
20
 
21
  export MODEL_NAME=internlm/internlm2_5-7b-chat-1m
22
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
23
- export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_p2_full_r2.csv
24
- export ADAPTER_PATH_BASE=llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full_r2
25
  export USING_LLAMA_FACTORY=true
26
  export START_EPOCH=4
27
 
 
20
 
21
  export MODEL_NAME=internlm/internlm2_5-7b-chat-1m
22
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
23
+ export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_p2_full_r3.csv
24
+ export ADAPTER_PATH_BASE=llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full_r3
25
  export USING_LLAMA_FACTORY=true
26
  export START_EPOCH=4
27
 
scripts/tune-mgtv-internlm.sh CHANGED
@@ -25,8 +25,12 @@ export CONFIG_FILE=config/internlm2_5_7b_lora_sft_bf16_p1_full.yaml
25
 
26
  #export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_p2_full.csv
27
  #export CONFIG_FILE=config/internlm2_5_7b_lora_sft_bf16_p2_full.yaml
28
- export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_p2_full_r2.csv
29
- export CONFIG_FILE=config/internlm2_5_7b_lora_sft_bf16_p2_full_r2.yaml
 
 
30
 
31
  echo "Tuning with $CONFIG_FILE"
32
  $BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
 
 
 
25
 
26
  #export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_p2_full.csv
27
  #export CONFIG_FILE=config/internlm2_5_7b_lora_sft_bf16_p2_full.yaml
28
+ # export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_p2_full_r2.csv
29
+ # export CONFIG_FILE=config/internlm2_5_7b_lora_sft_bf16_p2_full_r2.yaml
30
+ export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_p2_full_r3.csv
31
+ export CONFIG_FILE=config/internlm2_5_7b_lora_sft_bf16_p2_full_r3.yaml
32
 
33
  echo "Tuning with $CONFIG_FILE"
34
  $BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
35
+
36
+ $BASEDIR/scripts/eval-mgtv-internlm.sh
scripts/tune-mgtv.sh CHANGED
@@ -1 +1 @@
1
- tune-mgtv-llama3_8b.sh
 
1
+ tune-mgtv-internlm.sh