simonmok commited on
Commit
6b0a5f3
·
verified ·
1 Parent(s): a6856f1

Training in progress, step 1500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07161090aa695b836b3109e56422c48659d0ff109ddde1739e80c7dbac69dcee
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20677a90a3902cf3ed56f86f8b03f6bea9f7c430589a4a8bb04004321c23daab
3
  size 268290900
run-1/checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8abcd494efe65f3a888ecb298174b1444ea6f4684c6a416e225e29774afed680
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a0357cd904f7ee6a0e5aa8bb877ddfb88ffd83a4e0a5a1cd26ac165c9fbae4e
3
  size 268290900
run-1/checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:792be3d99b1fc211ae9907acfab2dbf1a7d48c9af7942cada974eddbddad0c93
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:427929bd040feb5c383356aece48a2710aaba828ec3d98ccd9076d181c81f58f
3
  size 536643898
run-1/checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed9da1cbe846856af516cf037dd225e7f48e2737ef84c42a9bee0753b8c140ef
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c794bc4c67ef18245dd516031ce405ab557e4d551d225d8dd1e1abc0f2be8e33
3
  size 1064
run-1/checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c50be35dabf57488910ecef52dcf0c3f00eb115989d551ba66b159364e8ee11
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:235bcd30eb5caaf6d85f48a7cbef42afd59119224ef62ab684da9f5c869126f8
3
  size 5368
run-1/checkpoint-1500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfb49d2df99087024b83b1ae4cbe9065c78608d60387119470bddd60fcf6017a
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20677a90a3902cf3ed56f86f8b03f6bea9f7c430589a4a8bb04004321c23daab
3
  size 268290900
run-1/checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adb334a206d95bf76e0c57755b7faa7d832972346490f8b2ac27e616ba04082c
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a6e68fb838dcb38c784a301a0375db383fec934b2fc7eb29e19c8782ec724f2
3
  size 536643898
run-1/checkpoint-1500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad4d7d251acf36e559c362893a1fb310c9f46b20e8a330025a14b6829ce4ab07
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71453465aad25f4c5a0a948496c64b1f74df850abda497954afe3695c00756ee
3
  size 1064
run-1/checkpoint-1500/trainer_state.json CHANGED
@@ -10,66 +10,66 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6364516129032258,
14
- "eval_loss": 0.2942647337913513,
15
- "eval_runtime": 5.7495,
16
- "eval_samples_per_second": 539.181,
17
- "eval_steps_per_second": 11.305,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.6808456778526306,
23
- "learning_rate": 1.550763701707098e-05,
24
- "loss": 0.4631,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.837741935483871,
30
- "eval_loss": 0.12298235297203064,
31
- "eval_runtime": 5.982,
32
- "eval_samples_per_second": 518.218,
33
- "eval_steps_per_second": 10.866,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.886774193548387,
39
- "eval_loss": 0.07365494966506958,
40
- "eval_runtime": 5.4854,
41
- "eval_samples_per_second": 565.141,
42
- "eval_steps_per_second": 11.85,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.7211179137229919,
48
- "learning_rate": 1.101527403414196e-05,
49
- "loss": 0.144,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.9006451612903226,
55
- "eval_loss": 0.05463062971830368,
56
- "eval_runtime": 5.5595,
57
- "eval_samples_per_second": 557.599,
58
- "eval_steps_per_second": 11.692,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.4047314524650574,
64
- "learning_rate": 6.522911051212939e-06,
65
- "loss": 0.0842,
66
  "step": 1500
67
  }
68
  ],
69
  "logging_steps": 500,
70
- "max_steps": 2226,
71
  "num_input_tokens_seen": 0,
72
- "num_train_epochs": 7,
73
  "save_steps": 500,
74
  "stateful_callbacks": {
75
  "TrainerControl": {
@@ -87,8 +87,8 @@
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
- "alpha": 0.27523519283703446,
91
- "num_train_epochs": 7,
92
- "temperature": 3
93
  }
94
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6106451612903225,
14
+ "eval_loss": 0.2180573046207428,
15
+ "eval_runtime": 5.4534,
16
+ "eval_samples_per_second": 568.452,
17
+ "eval_steps_per_second": 11.919,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5000836253166199,
23
+ "learning_rate": 1.685534591194969e-05,
24
+ "loss": 0.3508,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8367741935483871,
30
+ "eval_loss": 0.10006564110517502,
31
+ "eval_runtime": 5.8746,
32
+ "eval_samples_per_second": 527.697,
33
+ "eval_steps_per_second": 11.065,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8880645161290323,
39
+ "eval_loss": 0.06387896835803986,
40
+ "eval_runtime": 5.5911,
41
+ "eval_samples_per_second": 554.452,
42
+ "eval_steps_per_second": 11.626,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.5663716197013855,
48
+ "learning_rate": 1.371069182389937e-05,
49
+ "loss": 0.1169,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.9,
55
+ "eval_loss": 0.04769841209053993,
56
+ "eval_runtime": 5.4183,
57
+ "eval_samples_per_second": 572.139,
58
+ "eval_steps_per_second": 11.996,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.3216884136199951,
64
+ "learning_rate": 1.0566037735849058e-05,
65
+ "loss": 0.0714,
66
  "step": 1500
67
  }
68
  ],
69
  "logging_steps": 500,
70
+ "max_steps": 3180,
71
  "num_input_tokens_seen": 0,
72
+ "num_train_epochs": 10,
73
  "save_steps": 500,
74
  "stateful_callbacks": {
75
  "TrainerControl": {
 
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
+ "alpha": 0.9382110737628452,
91
+ "num_train_epochs": 10,
92
+ "temperature": 6
93
  }
94
  }
run-1/checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c50be35dabf57488910ecef52dcf0c3f00eb115989d551ba66b159364e8ee11
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:235bcd30eb5caaf6d85f48a7cbef42afd59119224ef62ab684da9f5c869126f8
3
  size 5368
run-1/checkpoint-2000/trainer_state.json CHANGED
@@ -10,91 +10,91 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6364516129032258,
14
- "eval_loss": 0.2942647337913513,
15
- "eval_runtime": 5.7495,
16
- "eval_samples_per_second": 539.181,
17
- "eval_steps_per_second": 11.305,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.6808456778526306,
23
- "learning_rate": 1.550763701707098e-05,
24
- "loss": 0.4631,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.837741935483871,
30
- "eval_loss": 0.12298235297203064,
31
- "eval_runtime": 5.982,
32
- "eval_samples_per_second": 518.218,
33
- "eval_steps_per_second": 10.866,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.886774193548387,
39
- "eval_loss": 0.07365494966506958,
40
- "eval_runtime": 5.4854,
41
- "eval_samples_per_second": 565.141,
42
- "eval_steps_per_second": 11.85,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.7211179137229919,
48
- "learning_rate": 1.101527403414196e-05,
49
- "loss": 0.144,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.9006451612903226,
55
- "eval_loss": 0.05463062971830368,
56
- "eval_runtime": 5.5595,
57
- "eval_samples_per_second": 557.599,
58
- "eval_steps_per_second": 11.692,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.4047314524650574,
64
- "learning_rate": 6.522911051212939e-06,
65
- "loss": 0.0842,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
- "eval_accuracy": 0.917741935483871,
71
- "eval_loss": 0.044852741062641144,
72
- "eval_runtime": 5.832,
73
- "eval_samples_per_second": 531.549,
74
- "eval_steps_per_second": 11.145,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
- "eval_accuracy": 0.9193548387096774,
80
- "eval_loss": 0.04065420478582382,
81
- "eval_runtime": 5.4964,
82
- "eval_samples_per_second": 564.004,
83
- "eval_steps_per_second": 11.826,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
- "grad_norm": 0.3561893105506897,
89
- "learning_rate": 2.0305480682839176e-06,
90
- "loss": 0.0669,
91
  "step": 2000
92
  }
93
  ],
94
  "logging_steps": 500,
95
- "max_steps": 2226,
96
  "num_input_tokens_seen": 0,
97
- "num_train_epochs": 7,
98
  "save_steps": 500,
99
  "stateful_callbacks": {
100
  "TrainerControl": {
@@ -112,8 +112,8 @@
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
- "alpha": 0.27523519283703446,
116
- "num_train_epochs": 7,
117
- "temperature": 3
118
  }
119
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6106451612903225,
14
+ "eval_loss": 0.2180573046207428,
15
+ "eval_runtime": 5.4534,
16
+ "eval_samples_per_second": 568.452,
17
+ "eval_steps_per_second": 11.919,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5000836253166199,
23
+ "learning_rate": 1.685534591194969e-05,
24
+ "loss": 0.3508,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8367741935483871,
30
+ "eval_loss": 0.10006564110517502,
31
+ "eval_runtime": 5.8746,
32
+ "eval_samples_per_second": 527.697,
33
+ "eval_steps_per_second": 11.065,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8880645161290323,
39
+ "eval_loss": 0.06387896835803986,
40
+ "eval_runtime": 5.5911,
41
+ "eval_samples_per_second": 554.452,
42
+ "eval_steps_per_second": 11.626,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.5663716197013855,
48
+ "learning_rate": 1.371069182389937e-05,
49
+ "loss": 0.1169,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.9,
55
+ "eval_loss": 0.04769841209053993,
56
+ "eval_runtime": 5.4183,
57
+ "eval_samples_per_second": 572.139,
58
+ "eval_steps_per_second": 11.996,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.3216884136199951,
64
+ "learning_rate": 1.0566037735849058e-05,
65
+ "loss": 0.0714,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
+ "eval_accuracy": 0.9170967741935484,
71
+ "eval_loss": 0.0384916327893734,
72
+ "eval_runtime": 5.8913,
73
+ "eval_samples_per_second": 526.203,
74
+ "eval_steps_per_second": 11.033,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
+ "eval_accuracy": 0.9183870967741935,
80
+ "eval_loss": 0.0333557203412056,
81
+ "eval_runtime": 5.8238,
82
+ "eval_samples_per_second": 532.3,
83
+ "eval_steps_per_second": 11.161,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
+ "grad_norm": 0.24820531904697418,
89
+ "learning_rate": 7.421383647798742e-06,
90
+ "loss": 0.055,
91
  "step": 2000
92
  }
93
  ],
94
  "logging_steps": 500,
95
+ "max_steps": 3180,
96
  "num_input_tokens_seen": 0,
97
+ "num_train_epochs": 10,
98
  "save_steps": 500,
99
  "stateful_callbacks": {
100
  "TrainerControl": {
 
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
+ "alpha": 0.9382110737628452,
116
+ "num_train_epochs": 10,
117
+ "temperature": 6
118
  }
119
  }
runs/Nov25_08-56-26_a78cb449300a/events.out.tfevents.1732528693.a78cb449300a.701.6 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93bec0e1d68ce0be5270f2972e875d4a42e30ff869a6d555bc000776fbd7d027
3
- size 13952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7906d6cf1de7de14c1aebacfcffb4f68767bccf50ff7cd1b992e35c178ecd5a8
3
+ size 15343