dh-mc commited on
Commit
dc1ac39
·
1 Parent(s): c817aef

updated logging/save/eval steps

Browse files
llama-factory/config/qwen2_72b_lora_sft_4bit-p1.yaml CHANGED
@@ -21,15 +21,15 @@ preprocessing_num_workers: 16
21
 
22
  ### output
23
  output_dir: saves/qwen2-72b/lora/sft_4bit_p1_full
24
- logging_steps: 10
25
- save_steps: 88
26
  plot_loss: true
27
  overwrite_output_dir: true
28
  # resume_from_checkpoint: true
29
 
30
  ### train
31
- per_device_train_batch_size: 2
32
- gradient_accumulation_steps: 4
33
  learning_rate: 1.0e-4
34
  num_train_epochs: 3.0
35
  lr_scheduler_type: cosine
@@ -41,7 +41,7 @@ ddp_timeout: 180000000
41
  val_size: 0.1
42
  per_device_eval_batch_size: 1
43
  eval_strategy: steps
44
- eval_steps: 88
45
 
46
  report_to: wandb
47
  run_name: qwen2_72b_4bit_p1_full # optional
 
21
 
22
  ### output
23
  output_dir: saves/qwen2-72b/lora/sft_4bit_p1_full
24
+ logging_steps: 100
25
+ save_steps: 2109
26
  plot_loss: true
27
  overwrite_output_dir: true
28
  # resume_from_checkpoint: true
29
 
30
  ### train
31
+ per_device_train_batch_size: 1
32
+ gradient_accumulation_steps: 8
33
  learning_rate: 1.0e-4
34
  num_train_epochs: 3.0
35
  lr_scheduler_type: cosine
 
41
  val_size: 0.1
42
  per_device_eval_batch_size: 1
43
  eval_strategy: steps
44
+ eval_steps: 2109
45
 
46
  report_to: wandb
47
  run_name: qwen2_72b_4bit_p1_full # optional
llama-factory/config/qwen2_72b_lora_sft_4bit-p2.yaml CHANGED
@@ -21,14 +21,14 @@ preprocessing_num_workers: 16
21
 
22
  ### output
23
  output_dir: saves/qwen2-72b/lora/sft_4bit_p2_full
24
- logging_steps: 10
25
- save_steps: 88
26
  plot_loss: true
27
  overwrite_output_dir: true
28
  # resume_from_checkpoint: true
29
 
30
  ### train
31
- per_device_train_batch_size: 4
32
  gradient_accumulation_steps: 8
33
  learning_rate: 1.0e-4
34
  num_train_epochs: 3.0
@@ -41,7 +41,7 @@ ddp_timeout: 180000000
41
  val_size: 0.1
42
  per_device_eval_batch_size: 1
43
  eval_strategy: steps
44
- eval_steps: 88
45
 
46
  report_to: wandb
47
  run_name: qwen2_72b_4bit_p2_full # optional
 
21
 
22
  ### output
23
  output_dir: saves/qwen2-72b/lora/sft_4bit_p2_full
24
+ logging_steps: 100
25
+ save_steps: 2109
26
  plot_loss: true
27
  overwrite_output_dir: true
28
  # resume_from_checkpoint: true
29
 
30
  ### train
31
+ per_device_train_batch_size: 1
32
  gradient_accumulation_steps: 8
33
  learning_rate: 1.0e-4
34
  num_train_epochs: 3.0
 
41
  val_size: 0.1
42
  per_device_eval_batch_size: 1
43
  eval_strategy: steps
44
+ eval_steps: 2109
45
 
46
  report_to: wandb
47
  run_name: qwen2_72b_4bit_p2_full # optional