sravanthib commited on
Commit
6120e54
·
verified ·
1 Parent(s): 8e9a176

Training completed

Browse files
Files changed (4) hide show
  1. README.md +4 -4
  2. all_results.json +3 -3
  3. train_results.json +3 -3
  4. trainer_state.json +6 -5
README.md CHANGED
@@ -1,9 +1,9 @@
1
  ---
 
2
  license: apache-2.0
3
  base_model: Qwen/Qwen2.5-7B-Instruct
4
  tags:
5
  - generated_from_trainer
6
- library_name: peft
7
  model-index:
8
  - name: qwen_model_testing
9
  results: []
@@ -42,7 +42,7 @@ The following hyperparameters were used during training:
42
  - gradient_accumulation_steps: 10
43
  - total_train_batch_size: 160
44
  - total_eval_batch_size: 64
45
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
47
  - lr_scheduler_warmup_ratio: 0.03
48
  - training_steps: 10
@@ -54,7 +54,7 @@ The following hyperparameters were used during training:
54
  ### Framework versions
55
 
56
  - PEFT 0.14.0
57
- - Transformers 4.43.3
58
  - Pytorch 2.3.0+cu121
59
  - Datasets 3.2.0
60
- - Tokenizers 0.19.1
 
1
  ---
2
+ library_name: peft
3
  license: apache-2.0
4
  base_model: Qwen/Qwen2.5-7B-Instruct
5
  tags:
6
  - generated_from_trainer
 
7
  model-index:
8
  - name: qwen_model_testing
9
  results: []
 
42
  - gradient_accumulation_steps: 10
43
  - total_train_batch_size: 160
44
  - total_eval_batch_size: 64
45
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
  - lr_scheduler_type: linear
47
  - lr_scheduler_warmup_ratio: 0.03
48
  - training_steps: 10
 
54
  ### Framework versions
55
 
56
  - PEFT 0.14.0
57
+ - Transformers 4.51.3
58
  - Pytorch 2.3.0+cu121
59
  - Datasets 3.2.0
60
+ - Tokenizers 0.21.2
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.0182648401826484,
3
  "total_flos": 1.394108846267433e+17,
4
- "train_loss": 4.435050201416016,
5
- "train_runtime": 166.2301,
6
- "train_samples_per_second": 9.625,
7
  "train_steps_per_second": 0.06
8
  }
 
1
  {
2
  "epoch": 0.0182648401826484,
3
  "total_flos": 1.394108846267433e+17,
4
+ "train_loss": 4.495834732055664,
5
+ "train_runtime": 166.6924,
6
+ "train_samples_per_second": 9.599,
7
  "train_steps_per_second": 0.06
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.0182648401826484,
3
  "total_flos": 1.394108846267433e+17,
4
- "train_loss": 4.435050201416016,
5
- "train_runtime": 166.2301,
6
- "train_samples_per_second": 9.625,
7
  "train_steps_per_second": 0.06
8
  }
 
1
  {
2
  "epoch": 0.0182648401826484,
3
  "total_flos": 1.394108846267433e+17,
4
+ "train_loss": 4.495834732055664,
5
+ "train_runtime": 166.6924,
6
+ "train_samples_per_second": 9.599,
7
  "train_steps_per_second": 0.06
8
  }
trainer_state.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 0.0182648401826484,
@@ -10,18 +11,18 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0182648401826484,
13
- "grad_norm": 0.1791902333498001,
14
  "learning_rate": 0.0001,
15
- "loss": 4.4351,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.0182648401826484,
20
  "step": 10,
21
  "total_flos": 1.394108846267433e+17,
22
- "train_loss": 4.435050201416016,
23
- "train_runtime": 166.2301,
24
- "train_samples_per_second": 9.625,
25
  "train_steps_per_second": 0.06
26
  }
27
  ],
 
1
  {
2
+ "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
  "epoch": 0.0182648401826484,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.0182648401826484,
14
+ "grad_norm": 0.2188371866941452,
15
  "learning_rate": 0.0001,
16
+ "loss": 4.4958,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.0182648401826484,
21
  "step": 10,
22
  "total_flos": 1.394108846267433e+17,
23
+ "train_loss": 4.495834732055664,
24
+ "train_runtime": 166.6924,
25
+ "train_samples_per_second": 9.599,
26
  "train_steps_per_second": 0.06
27
  }
28
  ],