amjadfqs commited on
Commit
ea83fc8
1 Parent(s): 5bceb86

Model save

Browse files
all_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 9.64,
3
  "eval_accuracy": 0.9265375854214123,
4
  "eval_f1_score": 0.9252257576617917,
5
  "eval_loss": 0.19247999787330627,
@@ -7,9 +7,9 @@
7
  "eval_runtime": 96.8959,
8
  "eval_samples_per_second": 18.123,
9
  "eval_steps_per_second": 0.289,
10
- "total_flos": 3.9796565447839334e+18,
11
- "train_loss": 0.3648529052734375,
12
- "train_runtime": 9658.8821,
13
- "train_samples_per_second": 5.453,
14
- "train_steps_per_second": 0.021
15
  }
 
1
  {
2
+ "epoch": 19.88,
3
  "eval_accuracy": 0.9265375854214123,
4
  "eval_f1_score": 0.9252257576617917,
5
  "eval_loss": 0.19247999787330627,
 
7
  "eval_runtime": 96.8959,
8
  "eval_samples_per_second": 18.123,
9
  "eval_steps_per_second": 0.289,
10
+ "total_flos": 8.203992042011222e+18,
11
+ "train_loss": 0.18675074555524965,
12
+ "train_runtime": 20102.5048,
13
+ "train_samples_per_second": 5.24,
14
+ "train_steps_per_second": 0.041
15
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05c10ab6bacc7302c207ddab8363a19d4194948b674aca8d2916b2b7f4de1fc6
3
  size 347601681
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9069b6fc118f468b4306a25d0928f19afb8db914dbf5be8d6a0f4c0141cf69e4
3
  size 347601681
runs/May23_21-58-26_project/events.out.tfevents.1684879118.project.44437.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb2971ac55504cfa2fc4f7b304d69d55cc9173a1d168ec0ef112a26aa8f78142
3
- size 25993
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d664e488502c6ead12600c83b5e7062b30d6c2137ca77b7e6f04d61d5e8579fd
3
+ size 26347
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.64,
3
- "total_flos": 3.9796565447839334e+18,
4
- "train_loss": 0.3648529052734375,
5
- "train_runtime": 9658.8821,
6
- "train_samples_per_second": 5.453,
7
- "train_steps_per_second": 0.021
8
  }
 
1
  {
2
+ "epoch": 19.88,
3
+ "total_flos": 8.203992042011222e+18,
4
+ "train_loss": 0.18675074555524965,
5
+ "train_runtime": 20102.5048,
6
+ "train_samples_per_second": 5.24,
7
+ "train_steps_per_second": 0.041
8
  }
trainer_state.json CHANGED
@@ -1,255 +1,737 @@
1
  {
2
- "best_metric": 0.9265375854214123,
3
- "best_model_checkpoint": "swin-base-patch4-window7-224-in22k-finetuned-brain-tumor-final/checkpoint-186",
4
- "epoch": 9.638554216867469,
5
- "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.48,
12
- "learning_rate": 5e-06,
13
- "loss": 1.3805,
14
  "step": 10
15
  },
16
  {
17
- "epoch": 0.96,
18
- "learning_rate": 1e-05,
19
- "loss": 1.2212,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.96,
24
- "eval_accuracy": 0.6429384965831435,
25
- "eval_f1_score": 0.6225332030044981,
26
- "eval_loss": 1.140669822692871,
27
- "eval_precision": 0.6600729727071496,
28
- "eval_runtime": 97.1469,
29
- "eval_samples_per_second": 18.076,
30
- "eval_steps_per_second": 0.288,
31
  "step": 20
32
  },
33
  {
34
- "epoch": 1.45,
35
- "learning_rate": 9.444444444444445e-06,
36
- "loss": 0.8664,
37
  "step": 30
38
  },
39
  {
40
- "epoch": 1.93,
41
- "learning_rate": 8.888888888888888e-06,
42
- "loss": 0.565,
43
  "step": 40
44
  },
45
  {
46
- "epoch": 1.98,
47
- "eval_accuracy": 0.8325740318906606,
48
- "eval_f1_score": 0.8311373357165648,
49
- "eval_loss": 0.5161515474319458,
50
- "eval_precision": 0.8427982091540198,
51
- "eval_runtime": 98.3593,
52
- "eval_samples_per_second": 17.853,
53
- "eval_steps_per_second": 0.285,
54
  "step": 41
55
  },
56
  {
57
- "epoch": 2.41,
58
- "learning_rate": 8.333333333333334e-06,
59
- "loss": 0.4226,
60
  "step": 50
61
  },
62
  {
63
- "epoch": 2.89,
64
- "learning_rate": 7.77777777777778e-06,
65
- "loss": 0.3245,
66
  "step": 60
67
  },
68
  {
69
- "epoch": 2.99,
70
- "eval_accuracy": 0.8804100227790432,
71
- "eval_f1_score": 0.8784127627414283,
72
- "eval_loss": 0.3265204429626465,
73
- "eval_precision": 0.8843478014022281,
74
- "eval_runtime": 94.9107,
75
- "eval_samples_per_second": 18.502,
76
- "eval_steps_per_second": 0.295,
77
- "step": 62
78
- },
79
- {
80
- "epoch": 3.37,
81
- "learning_rate": 7.222222222222223e-06,
82
- "loss": 0.2761,
83
  "step": 70
84
  },
85
  {
86
- "epoch": 3.86,
87
- "learning_rate": 6.666666666666667e-06,
88
- "loss": 0.2618,
89
  "step": 80
90
  },
91
  {
92
- "epoch": 4.0,
93
- "eval_accuracy": 0.9066059225512528,
94
- "eval_f1_score": 0.9053659025451456,
95
- "eval_loss": 0.2712633013725281,
96
- "eval_precision": 0.9105145368026273,
97
- "eval_runtime": 95.6192,
98
- "eval_samples_per_second": 18.365,
99
- "eval_steps_per_second": 0.293,
100
- "step": 83
101
- },
102
- {
103
- "epoch": 4.34,
104
- "learning_rate": 6.111111111111112e-06,
105
- "loss": 0.225,
106
- "step": 90
107
  },
108
  {
109
- "epoch": 4.82,
110
- "learning_rate": 5.555555555555557e-06,
111
- "loss": 0.2164,
112
- "step": 100
113
  },
114
  {
115
- "epoch": 4.96,
116
- "eval_accuracy": 0.8946469248291572,
117
- "eval_f1_score": 0.8929116621992249,
118
- "eval_loss": 0.28123489022254944,
119
- "eval_precision": 0.899366339703112,
120
- "eval_runtime": 95.1864,
121
- "eval_samples_per_second": 18.448,
122
- "eval_steps_per_second": 0.294,
123
- "step": 103
124
  },
125
  {
126
- "epoch": 5.3,
127
- "learning_rate": 5e-06,
128
- "loss": 0.1898,
129
  "step": 110
130
  },
131
  {
132
- "epoch": 5.78,
133
- "learning_rate": 4.444444444444444e-06,
134
- "loss": 0.1814,
135
  "step": 120
136
  },
137
  {
138
- "epoch": 5.98,
139
- "eval_accuracy": 0.9060364464692483,
140
- "eval_f1_score": 0.9043444445100207,
141
- "eval_loss": 0.24107009172439575,
142
- "eval_precision": 0.9091049886409681,
143
- "eval_runtime": 96.0888,
144
- "eval_samples_per_second": 18.275,
145
- "eval_steps_per_second": 0.291,
146
- "step": 124
147
  },
148
  {
149
- "epoch": 6.27,
150
- "learning_rate": 3.88888888888889e-06,
151
- "loss": 0.1608,
152
  "step": 130
153
  },
154
  {
155
- "epoch": 6.75,
156
- "learning_rate": 3.3333333333333333e-06,
157
- "loss": 0.1481,
158
  "step": 140
159
  },
160
  {
161
- "epoch": 6.99,
162
- "eval_accuracy": 0.9100227790432802,
163
- "eval_f1_score": 0.9083931079494111,
164
- "eval_loss": 0.23449133336544037,
165
- "eval_precision": 0.9129716383829837,
166
- "eval_runtime": 94.2446,
167
- "eval_samples_per_second": 18.632,
168
- "eval_steps_per_second": 0.297,
169
- "step": 145
170
- },
171
- {
172
- "epoch": 7.23,
173
- "learning_rate": 2.7777777777777783e-06,
174
- "loss": 0.1487,
175
  "step": 150
176
  },
177
  {
178
- "epoch": 7.71,
179
- "learning_rate": 2.222222222222222e-06,
180
- "loss": 0.1468,
181
  "step": 160
182
  },
183
  {
184
- "epoch": 8.0,
185
- "eval_accuracy": 0.9071753986332574,
186
- "eval_f1_score": 0.9054886526694435,
187
- "eval_loss": 0.23397402465343475,
188
- "eval_precision": 0.9108051352873032,
189
- "eval_runtime": 94.4553,
190
- "eval_samples_per_second": 18.591,
191
- "eval_steps_per_second": 0.296,
192
- "step": 166
193
- },
194
- {
195
- "epoch": 8.19,
196
- "learning_rate": 1.6666666666666667e-06,
197
- "loss": 0.1592,
198
  "step": 170
199
  },
200
  {
201
- "epoch": 8.67,
202
- "learning_rate": 1.111111111111111e-06,
203
- "loss": 0.1336,
204
  "step": 180
205
  },
206
  {
207
- "epoch": 8.96,
208
- "eval_accuracy": 0.9265375854214123,
209
- "eval_f1_score": 0.9252257576617917,
210
- "eval_loss": 0.19247999787330627,
211
- "eval_precision": 0.9269521372101541,
212
- "eval_runtime": 95.6309,
213
- "eval_samples_per_second": 18.362,
214
- "eval_steps_per_second": 0.293,
215
- "step": 186
216
- },
217
- {
218
- "epoch": 9.16,
219
- "learning_rate": 5.555555555555555e-07,
220
- "loss": 0.1361,
221
  "step": 190
222
  },
223
  {
224
- "epoch": 9.64,
225
- "learning_rate": 0.0,
226
- "loss": 0.133,
227
  "step": 200
228
  },
229
  {
230
- "epoch": 9.64,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  "eval_accuracy": 0.9219817767653758,
232
- "eval_f1_score": 0.9207089542401595,
233
- "eval_loss": 0.20209115743637085,
234
- "eval_precision": 0.9235132299409764,
235
- "eval_runtime": 96.5665,
236
- "eval_samples_per_second": 18.184,
237
- "eval_steps_per_second": 0.29,
238
- "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  },
240
  {
241
- "epoch": 9.64,
242
- "step": 200,
243
- "total_flos": 3.9796565447839334e+18,
244
- "train_loss": 0.3648529052734375,
245
- "train_runtime": 9658.8821,
246
- "train_samples_per_second": 5.453,
247
- "train_steps_per_second": 0.021
248
  }
249
  ],
250
- "max_steps": 200,
251
- "num_train_epochs": 10,
252
- "total_flos": 3.9796565447839334e+18,
253
  "trial_name": null,
254
  "trial_params": null
255
  }
 
1
  {
2
+ "best_metric": 0.9396355353075171,
3
+ "best_model_checkpoint": "swin-base-patch4-window7-224-in22k-finetuned-brain-tumor-final/checkpoint-495",
4
+ "epoch": 19.87878787878788,
5
+ "global_step": 820,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.24,
12
+ "learning_rate": 1.2195121951219514e-06,
13
+ "loss": 1.4212,
14
  "step": 10
15
  },
16
  {
17
+ "epoch": 0.48,
18
+ "learning_rate": 2.4390243902439027e-06,
19
+ "loss": 1.3801,
 
 
 
 
 
 
 
 
 
 
 
20
  "step": 20
21
  },
22
  {
23
+ "epoch": 0.73,
24
+ "learning_rate": 3.6585365853658537e-06,
25
+ "loss": 1.2755,
26
  "step": 30
27
  },
28
  {
29
+ "epoch": 0.97,
30
+ "learning_rate": 4.8780487804878055e-06,
31
+ "loss": 1.1562,
32
  "step": 40
33
  },
34
  {
35
+ "epoch": 0.99,
36
+ "eval_accuracy": 0.6378132118451025,
37
+ "eval_f1_score": 0.6190569285283298,
38
+ "eval_loss": 1.1378333568572998,
39
+ "eval_precision": 0.6536626392431208,
40
+ "eval_runtime": 96.8359,
41
+ "eval_samples_per_second": 18.134,
42
+ "eval_steps_per_second": 0.568,
43
  "step": 41
44
  },
45
  {
46
+ "epoch": 1.21,
47
+ "learning_rate": 6.0975609756097564e-06,
48
+ "loss": 0.9839,
49
  "step": 50
50
  },
51
  {
52
+ "epoch": 1.45,
53
+ "learning_rate": 7.317073170731707e-06,
54
+ "loss": 0.7914,
55
  "step": 60
56
  },
57
  {
58
+ "epoch": 1.7,
59
+ "learning_rate": 8.536585365853658e-06,
60
+ "loss": 0.6245,
 
 
 
 
 
 
 
 
 
 
 
61
  "step": 70
62
  },
63
  {
64
+ "epoch": 1.94,
65
+ "learning_rate": 9.756097560975611e-06,
66
+ "loss": 0.4878,
67
  "step": 80
68
  },
69
  {
70
+ "epoch": 1.99,
71
+ "eval_accuracy": 0.7591116173120729,
72
+ "eval_f1_score": 0.7498580587869668,
73
+ "eval_loss": 0.6476529836654663,
74
+ "eval_precision": 0.7873955323748831,
75
+ "eval_runtime": 96.8155,
76
+ "eval_samples_per_second": 18.138,
77
+ "eval_steps_per_second": 0.568,
78
+ "step": 82
 
 
 
 
 
 
79
  },
80
  {
81
+ "epoch": 2.18,
82
+ "learning_rate": 9.89159891598916e-06,
83
+ "loss": 0.3834,
84
+ "step": 90
85
  },
86
  {
87
+ "epoch": 2.42,
88
+ "learning_rate": 9.756097560975611e-06,
89
+ "loss": 0.3288,
90
+ "step": 100
 
 
 
 
 
91
  },
92
  {
93
+ "epoch": 2.67,
94
+ "learning_rate": 9.620596205962061e-06,
95
+ "loss": 0.3111,
96
  "step": 110
97
  },
98
  {
99
+ "epoch": 2.91,
100
+ "learning_rate": 9.485094850948512e-06,
101
+ "loss": 0.2623,
102
  "step": 120
103
  },
104
  {
105
+ "epoch": 2.98,
106
+ "eval_accuracy": 0.8337129840546698,
107
+ "eval_f1_score": 0.8310652064326258,
108
+ "eval_loss": 0.44103795289993286,
109
+ "eval_precision": 0.8487545531816145,
110
+ "eval_runtime": 95.5234,
111
+ "eval_samples_per_second": 18.383,
112
+ "eval_steps_per_second": 0.576,
113
+ "step": 123
114
  },
115
  {
116
+ "epoch": 3.15,
117
+ "learning_rate": 9.34959349593496e-06,
118
+ "loss": 0.2449,
119
  "step": 130
120
  },
121
  {
122
+ "epoch": 3.39,
123
+ "learning_rate": 9.21409214092141e-06,
124
+ "loss": 0.2058,
125
  "step": 140
126
  },
127
  {
128
+ "epoch": 3.64,
129
+ "learning_rate": 9.07859078590786e-06,
130
+ "loss": 0.2426,
 
 
 
 
 
 
 
 
 
 
 
131
  "step": 150
132
  },
133
  {
134
+ "epoch": 3.88,
135
+ "learning_rate": 8.94308943089431e-06,
136
+ "loss": 0.1985,
137
  "step": 160
138
  },
139
  {
140
+ "epoch": 4.0,
141
+ "eval_accuracy": 0.8143507972665148,
142
+ "eval_f1_score": 0.8114988597034715,
143
+ "eval_loss": 0.465999573469162,
144
+ "eval_precision": 0.8455116716314306,
145
+ "eval_runtime": 95.3707,
146
+ "eval_samples_per_second": 18.412,
147
+ "eval_steps_per_second": 0.577,
148
+ "step": 165
149
+ },
150
+ {
151
+ "epoch": 4.12,
152
+ "learning_rate": 8.80758807588076e-06,
153
+ "loss": 0.1852,
154
  "step": 170
155
  },
156
  {
157
+ "epoch": 4.36,
158
+ "learning_rate": 8.67208672086721e-06,
159
+ "loss": 0.1738,
160
  "step": 180
161
  },
162
  {
163
+ "epoch": 4.61,
164
+ "learning_rate": 8.536585365853658e-06,
165
+ "loss": 0.1806,
 
 
 
 
 
 
 
 
 
 
 
166
  "step": 190
167
  },
168
  {
169
+ "epoch": 4.85,
170
+ "learning_rate": 8.401084010840109e-06,
171
+ "loss": 0.1736,
172
  "step": 200
173
  },
174
  {
175
+ "epoch": 4.99,
176
+ "eval_accuracy": 0.8775626423690205,
177
+ "eval_f1_score": 0.8759706154432891,
178
+ "eval_loss": 0.3229566812515259,
179
+ "eval_precision": 0.8894198873971897,
180
+ "eval_runtime": 96.4712,
181
+ "eval_samples_per_second": 18.202,
182
+ "eval_steps_per_second": 0.57,
183
+ "step": 206
184
+ },
185
+ {
186
+ "epoch": 5.09,
187
+ "learning_rate": 8.265582655826559e-06,
188
+ "loss": 0.1677,
189
+ "step": 210
190
+ },
191
+ {
192
+ "epoch": 5.33,
193
+ "learning_rate": 8.130081300813009e-06,
194
+ "loss": 0.1384,
195
+ "step": 220
196
+ },
197
+ {
198
+ "epoch": 5.58,
199
+ "learning_rate": 7.99457994579946e-06,
200
+ "loss": 0.1319,
201
+ "step": 230
202
+ },
203
+ {
204
+ "epoch": 5.82,
205
+ "learning_rate": 7.859078590785908e-06,
206
+ "loss": 0.124,
207
+ "step": 240
208
+ },
209
+ {
210
+ "epoch": 5.99,
211
+ "eval_accuracy": 0.9026195899772209,
212
+ "eval_f1_score": 0.9013627262353661,
213
+ "eval_loss": 0.2684251368045807,
214
+ "eval_precision": 0.9089527019800868,
215
+ "eval_runtime": 94.5477,
216
+ "eval_samples_per_second": 18.573,
217
+ "eval_steps_per_second": 0.582,
218
+ "step": 247
219
+ },
220
+ {
221
+ "epoch": 6.06,
222
+ "learning_rate": 7.723577235772358e-06,
223
+ "loss": 0.1508,
224
+ "step": 250
225
+ },
226
+ {
227
+ "epoch": 6.3,
228
+ "learning_rate": 7.5880758807588085e-06,
229
+ "loss": 0.1168,
230
+ "step": 260
231
+ },
232
+ {
233
+ "epoch": 6.55,
234
+ "learning_rate": 7.452574525745257e-06,
235
+ "loss": 0.1043,
236
+ "step": 270
237
+ },
238
+ {
239
+ "epoch": 6.79,
240
+ "learning_rate": 7.317073170731707e-06,
241
+ "loss": 0.1278,
242
+ "step": 280
243
+ },
244
+ {
245
+ "epoch": 6.98,
246
+ "eval_accuracy": 0.9179954441913439,
247
+ "eval_f1_score": 0.9166179163753916,
248
+ "eval_loss": 0.22102278470993042,
249
+ "eval_precision": 0.9210303187193152,
250
+ "eval_runtime": 95.7688,
251
+ "eval_samples_per_second": 18.336,
252
+ "eval_steps_per_second": 0.574,
253
+ "step": 288
254
+ },
255
+ {
256
+ "epoch": 7.03,
257
+ "learning_rate": 7.181571815718158e-06,
258
+ "loss": 0.1122,
259
+ "step": 290
260
+ },
261
+ {
262
+ "epoch": 7.27,
263
+ "learning_rate": 7.046070460704607e-06,
264
+ "loss": 0.1052,
265
+ "step": 300
266
+ },
267
+ {
268
+ "epoch": 7.52,
269
+ "learning_rate": 6.910569105691057e-06,
270
+ "loss": 0.0965,
271
+ "step": 310
272
+ },
273
+ {
274
+ "epoch": 7.76,
275
+ "learning_rate": 6.775067750677508e-06,
276
+ "loss": 0.1072,
277
+ "step": 320
278
+ },
279
+ {
280
+ "epoch": 8.0,
281
+ "learning_rate": 6.639566395663957e-06,
282
+ "loss": 0.0959,
283
+ "step": 330
284
+ },
285
+ {
286
+ "epoch": 8.0,
287
+ "eval_accuracy": 0.9208428246013668,
288
+ "eval_f1_score": 0.9195432105337242,
289
+ "eval_loss": 0.21511134505271912,
290
+ "eval_precision": 0.9259948741044411,
291
+ "eval_runtime": 94.727,
292
+ "eval_samples_per_second": 18.537,
293
+ "eval_steps_per_second": 0.581,
294
+ "step": 330
295
+ },
296
+ {
297
+ "epoch": 8.24,
298
+ "learning_rate": 6.504065040650407e-06,
299
+ "loss": 0.087,
300
+ "step": 340
301
+ },
302
+ {
303
+ "epoch": 8.48,
304
+ "learning_rate": 6.368563685636857e-06,
305
+ "loss": 0.0788,
306
+ "step": 350
307
+ },
308
+ {
309
+ "epoch": 8.73,
310
+ "learning_rate": 6.233062330623306e-06,
311
+ "loss": 0.0952,
312
+ "step": 360
313
+ },
314
+ {
315
+ "epoch": 8.97,
316
+ "learning_rate": 6.0975609756097564e-06,
317
+ "loss": 0.0849,
318
+ "step": 370
319
+ },
320
+ {
321
+ "epoch": 8.99,
322
  "eval_accuracy": 0.9219817767653758,
323
+ "eval_f1_score": 0.9205449523633584,
324
+ "eval_loss": 0.21544188261032104,
325
+ "eval_precision": 0.9290889598643586,
326
+ "eval_runtime": 95.0804,
327
+ "eval_samples_per_second": 18.469,
328
+ "eval_steps_per_second": 0.578,
329
+ "step": 371
330
+ },
331
+ {
332
+ "epoch": 9.21,
333
+ "learning_rate": 5.962059620596207e-06,
334
+ "loss": 0.0954,
335
+ "step": 380
336
+ },
337
+ {
338
+ "epoch": 9.45,
339
+ "learning_rate": 5.826558265582656e-06,
340
+ "loss": 0.0808,
341
+ "step": 390
342
+ },
343
+ {
344
+ "epoch": 9.7,
345
+ "learning_rate": 5.691056910569106e-06,
346
+ "loss": 0.0703,
347
+ "step": 400
348
+ },
349
+ {
350
+ "epoch": 9.94,
351
+ "learning_rate": 5.555555555555557e-06,
352
+ "loss": 0.0805,
353
+ "step": 410
354
+ },
355
+ {
356
+ "epoch": 9.99,
357
+ "eval_accuracy": 0.9191343963553531,
358
+ "eval_f1_score": 0.9178865857712233,
359
+ "eval_loss": 0.21121914684772491,
360
+ "eval_precision": 0.9250862844253733,
361
+ "eval_runtime": 94.2987,
362
+ "eval_samples_per_second": 18.622,
363
+ "eval_steps_per_second": 0.583,
364
+ "step": 412
365
+ },
366
+ {
367
+ "epoch": 10.18,
368
+ "learning_rate": 5.420054200542005e-06,
369
+ "loss": 0.0521,
370
+ "step": 420
371
+ },
372
+ {
373
+ "epoch": 10.42,
374
+ "learning_rate": 5.2845528455284555e-06,
375
+ "loss": 0.0681,
376
+ "step": 430
377
+ },
378
+ {
379
+ "epoch": 10.67,
380
+ "learning_rate": 5.149051490514906e-06,
381
+ "loss": 0.0769,
382
+ "step": 440
383
+ },
384
+ {
385
+ "epoch": 10.91,
386
+ "learning_rate": 5.013550135501355e-06,
387
+ "loss": 0.0682,
388
+ "step": 450
389
+ },
390
+ {
391
+ "epoch": 10.98,
392
+ "eval_accuracy": 0.9384965831435079,
393
+ "eval_f1_score": 0.9369393287065592,
394
+ "eval_loss": 0.15628309547901154,
395
+ "eval_precision": 0.9402176891312171,
396
+ "eval_runtime": 96.028,
397
+ "eval_samples_per_second": 18.286,
398
+ "eval_steps_per_second": 0.573,
399
+ "step": 453
400
+ },
401
+ {
402
+ "epoch": 11.15,
403
+ "learning_rate": 4.8780487804878055e-06,
404
+ "loss": 0.0668,
405
+ "step": 460
406
+ },
407
+ {
408
+ "epoch": 11.39,
409
+ "learning_rate": 4.742547425474256e-06,
410
+ "loss": 0.0657,
411
+ "step": 470
412
+ },
413
+ {
414
+ "epoch": 11.64,
415
+ "learning_rate": 4.607046070460705e-06,
416
+ "loss": 0.0703,
417
+ "step": 480
418
+ },
419
+ {
420
+ "epoch": 11.88,
421
+ "learning_rate": 4.471544715447155e-06,
422
+ "loss": 0.0624,
423
+ "step": 490
424
+ },
425
+ {
426
+ "epoch": 12.0,
427
+ "eval_accuracy": 0.9396355353075171,
428
+ "eval_f1_score": 0.9384678994386635,
429
+ "eval_loss": 0.1576806604862213,
430
+ "eval_precision": 0.9408448811333167,
431
+ "eval_runtime": 95.9898,
432
+ "eval_samples_per_second": 18.294,
433
+ "eval_steps_per_second": 0.573,
434
+ "step": 495
435
+ },
436
+ {
437
+ "epoch": 12.12,
438
+ "learning_rate": 4.336043360433605e-06,
439
+ "loss": 0.0531,
440
+ "step": 500
441
+ },
442
+ {
443
+ "epoch": 12.36,
444
+ "learning_rate": 4.200542005420054e-06,
445
+ "loss": 0.0579,
446
+ "step": 510
447
+ },
448
+ {
449
+ "epoch": 12.61,
450
+ "learning_rate": 4.0650406504065046e-06,
451
+ "loss": 0.0657,
452
+ "step": 520
453
+ },
454
+ {
455
+ "epoch": 12.85,
456
+ "learning_rate": 3.929539295392954e-06,
457
+ "loss": 0.0415,
458
+ "step": 530
459
+ },
460
+ {
461
+ "epoch": 12.99,
462
+ "eval_accuracy": 0.9305239179954442,
463
+ "eval_f1_score": 0.9294348170687547,
464
+ "eval_loss": 0.1836157888174057,
465
+ "eval_precision": 0.9332309174336554,
466
+ "eval_runtime": 99.8745,
467
+ "eval_samples_per_second": 17.582,
468
+ "eval_steps_per_second": 0.551,
469
+ "step": 536
470
+ },
471
+ {
472
+ "epoch": 13.09,
473
+ "learning_rate": 3.7940379403794043e-06,
474
+ "loss": 0.0569,
475
+ "step": 540
476
+ },
477
+ {
478
+ "epoch": 13.33,
479
+ "learning_rate": 3.6585365853658537e-06,
480
+ "loss": 0.0707,
481
+ "step": 550
482
+ },
483
+ {
484
+ "epoch": 13.58,
485
+ "learning_rate": 3.5230352303523035e-06,
486
+ "loss": 0.0424,
487
+ "step": 560
488
+ },
489
+ {
490
+ "epoch": 13.82,
491
+ "learning_rate": 3.387533875338754e-06,
492
+ "loss": 0.0465,
493
+ "step": 570
494
+ },
495
+ {
496
+ "epoch": 13.99,
497
+ "eval_accuracy": 0.9202733485193622,
498
+ "eval_f1_score": 0.9191914508633344,
499
+ "eval_loss": 0.2144739031791687,
500
+ "eval_precision": 0.9251966092567651,
501
+ "eval_runtime": 96.9873,
502
+ "eval_samples_per_second": 18.105,
503
+ "eval_steps_per_second": 0.567,
504
+ "step": 577
505
+ },
506
+ {
507
+ "epoch": 14.06,
508
+ "learning_rate": 3.2520325203252037e-06,
509
+ "loss": 0.0483,
510
+ "step": 580
511
+ },
512
+ {
513
+ "epoch": 14.3,
514
+ "learning_rate": 3.116531165311653e-06,
515
+ "loss": 0.0432,
516
+ "step": 590
517
+ },
518
+ {
519
+ "epoch": 14.55,
520
+ "learning_rate": 2.9810298102981034e-06,
521
+ "loss": 0.046,
522
+ "step": 600
523
+ },
524
+ {
525
+ "epoch": 14.79,
526
+ "learning_rate": 2.845528455284553e-06,
527
+ "loss": 0.056,
528
+ "step": 610
529
+ },
530
+ {
531
+ "epoch": 14.98,
532
+ "eval_accuracy": 0.9339407744874715,
533
+ "eval_f1_score": 0.9324607544046006,
534
+ "eval_loss": 0.17098096013069153,
535
+ "eval_precision": 0.9369044224464911,
536
+ "eval_runtime": 96.6806,
537
+ "eval_samples_per_second": 18.163,
538
+ "eval_steps_per_second": 0.569,
539
+ "step": 618
540
+ },
541
+ {
542
+ "epoch": 15.03,
543
+ "learning_rate": 2.7100271002710026e-06,
544
+ "loss": 0.0447,
545
+ "step": 620
546
+ },
547
+ {
548
+ "epoch": 15.27,
549
+ "learning_rate": 2.574525745257453e-06,
550
+ "loss": 0.0441,
551
+ "step": 630
552
+ },
553
+ {
554
+ "epoch": 15.52,
555
+ "learning_rate": 2.4390243902439027e-06,
556
+ "loss": 0.0515,
557
+ "step": 640
558
+ },
559
+ {
560
+ "epoch": 15.76,
561
+ "learning_rate": 2.3035230352303526e-06,
562
+ "loss": 0.0435,
563
+ "step": 650
564
+ },
565
+ {
566
+ "epoch": 16.0,
567
+ "learning_rate": 2.1680216802168024e-06,
568
+ "loss": 0.0545,
569
+ "step": 660
570
+ },
571
+ {
572
+ "epoch": 16.0,
573
+ "eval_accuracy": 0.9248291571753986,
574
+ "eval_f1_score": 0.9235589091910614,
575
+ "eval_loss": 0.20944343507289886,
576
+ "eval_precision": 0.929753616084827,
577
+ "eval_runtime": 97.9348,
578
+ "eval_samples_per_second": 17.93,
579
+ "eval_steps_per_second": 0.562,
580
+ "step": 660
581
+ },
582
+ {
583
+ "epoch": 16.24,
584
+ "learning_rate": 2.0325203252032523e-06,
585
+ "loss": 0.0477,
586
+ "step": 670
587
+ },
588
+ {
589
+ "epoch": 16.48,
590
+ "learning_rate": 1.8970189701897021e-06,
591
+ "loss": 0.0424,
592
+ "step": 680
593
+ },
594
+ {
595
+ "epoch": 16.73,
596
+ "learning_rate": 1.7615176151761518e-06,
597
+ "loss": 0.0377,
598
+ "step": 690
599
+ },
600
+ {
601
+ "epoch": 16.97,
602
+ "learning_rate": 1.6260162601626018e-06,
603
+ "loss": 0.0591,
604
+ "step": 700
605
+ },
606
+ {
607
+ "epoch": 16.99,
608
+ "eval_accuracy": 0.9316628701594533,
609
+ "eval_f1_score": 0.930292082850388,
610
+ "eval_loss": 0.1752157062292099,
611
+ "eval_precision": 0.934052681687829,
612
+ "eval_runtime": 98.3671,
613
+ "eval_samples_per_second": 17.852,
614
+ "eval_steps_per_second": 0.559,
615
+ "step": 701
616
+ },
617
+ {
618
+ "epoch": 17.21,
619
+ "learning_rate": 1.4905149051490517e-06,
620
+ "loss": 0.0425,
621
+ "step": 710
622
+ },
623
+ {
624
+ "epoch": 17.45,
625
+ "learning_rate": 1.3550135501355013e-06,
626
+ "loss": 0.0394,
627
+ "step": 720
628
+ },
629
+ {
630
+ "epoch": 17.7,
631
+ "learning_rate": 1.2195121951219514e-06,
632
+ "loss": 0.0323,
633
+ "step": 730
634
+ },
635
+ {
636
+ "epoch": 17.94,
637
+ "learning_rate": 1.0840108401084012e-06,
638
+ "loss": 0.0512,
639
+ "step": 740
640
+ },
641
+ {
642
+ "epoch": 17.99,
643
+ "eval_accuracy": 0.9310933940774487,
644
+ "eval_f1_score": 0.9297072356118401,
645
+ "eval_loss": 0.17810992896556854,
646
+ "eval_precision": 0.9341920124235589,
647
+ "eval_runtime": 99.187,
648
+ "eval_samples_per_second": 17.704,
649
+ "eval_steps_per_second": 0.555,
650
+ "step": 742
651
+ },
652
+ {
653
+ "epoch": 18.18,
654
+ "learning_rate": 9.485094850948511e-07,
655
+ "loss": 0.0278,
656
+ "step": 750
657
+ },
658
+ {
659
+ "epoch": 18.42,
660
+ "learning_rate": 8.130081300813009e-07,
661
+ "loss": 0.0338,
662
+ "step": 760
663
+ },
664
+ {
665
+ "epoch": 18.67,
666
+ "learning_rate": 6.775067750677507e-07,
667
+ "loss": 0.0467,
668
+ "step": 770
669
+ },
670
+ {
671
+ "epoch": 18.91,
672
+ "learning_rate": 5.420054200542006e-07,
673
+ "loss": 0.0424,
674
+ "step": 780
675
+ },
676
+ {
677
+ "epoch": 18.98,
678
+ "eval_accuracy": 0.9305239179954442,
679
+ "eval_f1_score": 0.9292526869769537,
680
+ "eval_loss": 0.18729418516159058,
681
+ "eval_precision": 0.9338045676207426,
682
+ "eval_runtime": 97.3576,
683
+ "eval_samples_per_second": 18.037,
684
+ "eval_steps_per_second": 0.565,
685
+ "step": 783
686
+ },
687
+ {
688
+ "epoch": 19.15,
689
+ "learning_rate": 4.0650406504065046e-07,
690
+ "loss": 0.0333,
691
+ "step": 790
692
+ },
693
+ {
694
+ "epoch": 19.39,
695
+ "learning_rate": 2.710027100271003e-07,
696
+ "loss": 0.039,
697
+ "step": 800
698
+ },
699
+ {
700
+ "epoch": 19.64,
701
+ "learning_rate": 1.3550135501355015e-07,
702
+ "loss": 0.0344,
703
+ "step": 810
704
+ },
705
+ {
706
+ "epoch": 19.88,
707
+ "learning_rate": 0.0,
708
+ "loss": 0.0438,
709
+ "step": 820
710
+ },
711
+ {
712
+ "epoch": 19.88,
713
+ "eval_accuracy": 0.9265375854214123,
714
+ "eval_f1_score": 0.9252094972625854,
715
+ "eval_loss": 0.19554641842842102,
716
+ "eval_precision": 0.9307429809120586,
717
+ "eval_runtime": 100.1651,
718
+ "eval_samples_per_second": 17.531,
719
+ "eval_steps_per_second": 0.549,
720
+ "step": 820
721
  },
722
  {
723
+ "epoch": 19.88,
724
+ "step": 820,
725
+ "total_flos": 8.203992042011222e+18,
726
+ "train_loss": 0.18675074555524965,
727
+ "train_runtime": 20102.5048,
728
+ "train_samples_per_second": 5.24,
729
+ "train_steps_per_second": 0.041
730
  }
731
  ],
732
+ "max_steps": 820,
733
+ "num_train_epochs": 20,
734
+ "total_flos": 8.203992042011222e+18,
735
  "trial_name": null,
736
  "trial_params": null
737
  }