gokuls commited on
Commit
d3e8016
1 Parent(s): 2fc1c50

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  datasets:
@@ -12,7 +14,7 @@ model-index:
12
  name: Text Classification
13
  type: text-classification
14
  dataset:
15
- name: glue
16
  type: glue
17
  config: stsb
18
  split: validation
@@ -20,7 +22,7 @@ model-index:
20
  metrics:
21
  - name: Spearmanr
22
  type: spearmanr
23
- value: 0.4507892146083376
24
  ---
25
 
26
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -28,12 +30,12 @@ should probably proofread and complete it, then remove this comment. -->
28
 
29
  # hBERTv1_new_pretrain_48_emb_com_stsb
30
 
31
- This model is a fine-tuned version of [gokuls/bert_12_layer_model_v1_complete_training_new_emb_compress_48](https://huggingface.co/gokuls/bert_12_layer_model_v1_complete_training_new_emb_compress_48) on the glue dataset.
32
  It achieves the following results on the evaluation set:
33
- - Loss: 1.9571
34
- - Pearson: 0.4588
35
- - Spearmanr: 0.4508
36
- - Combined Score: 0.4548
37
 
38
  ## Model description
39
 
 
1
  ---
2
+ language:
3
+ - en
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
14
  name: Text Classification
15
  type: text-classification
16
  dataset:
17
+ name: GLUE STSB
18
  type: glue
19
  config: stsb
20
  split: validation
 
22
  metrics:
23
  - name: Spearmanr
24
  type: spearmanr
25
+ value: 0.45996385438365645
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  # hBERTv1_new_pretrain_48_emb_com_stsb
32
 
33
+ This model is a fine-tuned version of [gokuls/bert_12_layer_model_v1_complete_training_new_emb_compress_48](https://huggingface.co/gokuls/bert_12_layer_model_v1_complete_training_new_emb_compress_48) on the GLUE STSB dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 1.9214
36
+ - Pearson: 0.4648
37
+ - Spearmanr: 0.4600
38
+ - Combined Score: 0.4624
39
 
40
  ## Model description
41
 
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 27.0,
3
+ "eval_combined_score": 0.46235708950698207,
4
+ "eval_loss": 1.9214489459991455,
5
+ "eval_pearson": 0.46475032463030774,
6
+ "eval_runtime": 6.7246,
7
+ "eval_samples": 1500,
8
+ "eval_samples_per_second": 223.06,
9
+ "eval_spearmanr": 0.45996385438365645,
10
+ "eval_steps_per_second": 1.784,
11
+ "train_loss": 0.6121585881268536,
12
+ "train_runtime": 2633.1293,
13
+ "train_samples": 5749,
14
+ "train_samples_per_second": 109.167,
15
+ "train_steps_per_second": 0.854
16
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 27.0,
3
+ "eval_combined_score": 0.46235708950698207,
4
+ "eval_loss": 1.9214489459991455,
5
+ "eval_pearson": 0.46475032463030774,
6
+ "eval_runtime": 6.7246,
7
+ "eval_samples": 1500,
8
+ "eval_samples_per_second": 223.06,
9
+ "eval_spearmanr": 0.45996385438365645,
10
+ "eval_steps_per_second": 1.784
11
+ }
logs/events.out.tfevents.1686837072.garda.2601621.13 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0b7df4c808478a7923c0b7dc1b3b1109b2f0f997cb4eaf0714d4d31f0883d7e
3
+ size 473
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 27.0,
3
+ "train_loss": 0.6121585881268536,
4
+ "train_runtime": 2633.1293,
5
+ "train_samples": 5749,
6
+ "train_samples_per_second": 109.167,
7
+ "train_steps_per_second": 0.854
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,484 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.9214489459991455,
3
+ "best_model_checkpoint": "hBERTv1_new_pretrain_48_emb_com_stsb/checkpoint-990",
4
+ "epoch": 27.0,
5
+ "global_step": 1215,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "learning_rate": 3.9200000000000004e-05,
13
+ "loss": 2.5817,
14
+ "step": 45
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_combined_score": 0.1961723851082083,
19
+ "eval_loss": 2.602813720703125,
20
+ "eval_pearson": 0.2027356377543126,
21
+ "eval_runtime": 6.748,
22
+ "eval_samples_per_second": 222.289,
23
+ "eval_spearmanr": 0.189609132462104,
24
+ "eval_steps_per_second": 1.778,
25
+ "step": 45
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "learning_rate": 3.8400000000000005e-05,
30
+ "loss": 2.1023,
31
+ "step": 90
32
+ },
33
+ {
34
+ "epoch": 2.0,
35
+ "eval_combined_score": 0.19863013345614586,
36
+ "eval_loss": 2.1595816612243652,
37
+ "eval_pearson": 0.20345395522854448,
38
+ "eval_runtime": 6.7492,
39
+ "eval_samples_per_second": 222.249,
40
+ "eval_spearmanr": 0.19380631168374723,
41
+ "eval_steps_per_second": 1.778,
42
+ "step": 90
43
+ },
44
+ {
45
+ "epoch": 3.0,
46
+ "learning_rate": 3.76e-05,
47
+ "loss": 1.9567,
48
+ "step": 135
49
+ },
50
+ {
51
+ "epoch": 3.0,
52
+ "eval_combined_score": 0.18933959939433503,
53
+ "eval_loss": 2.3409199714660645,
54
+ "eval_pearson": 0.18554383257948298,
55
+ "eval_runtime": 6.7607,
56
+ "eval_samples_per_second": 221.871,
57
+ "eval_spearmanr": 0.19313536620918711,
58
+ "eval_steps_per_second": 1.775,
59
+ "step": 135
60
+ },
61
+ {
62
+ "epoch": 4.0,
63
+ "learning_rate": 3.680000000000001e-05,
64
+ "loss": 1.7201,
65
+ "step": 180
66
+ },
67
+ {
68
+ "epoch": 4.0,
69
+ "eval_combined_score": 0.2899464590935818,
70
+ "eval_loss": 2.1790404319763184,
71
+ "eval_pearson": 0.28650208786193304,
72
+ "eval_runtime": 6.747,
73
+ "eval_samples_per_second": 222.32,
74
+ "eval_spearmanr": 0.29339083032523056,
75
+ "eval_steps_per_second": 1.779,
76
+ "step": 180
77
+ },
78
+ {
79
+ "epoch": 5.0,
80
+ "learning_rate": 3.6e-05,
81
+ "loss": 1.5153,
82
+ "step": 225
83
+ },
84
+ {
85
+ "epoch": 5.0,
86
+ "eval_combined_score": 0.3366713577400152,
87
+ "eval_loss": 2.1208479404449463,
88
+ "eval_pearson": 0.3381105093417233,
89
+ "eval_runtime": 6.7661,
90
+ "eval_samples_per_second": 221.694,
91
+ "eval_spearmanr": 0.3352322061383072,
92
+ "eval_steps_per_second": 1.774,
93
+ "step": 225
94
+ },
95
+ {
96
+ "epoch": 6.0,
97
+ "learning_rate": 3.52e-05,
98
+ "loss": 1.2674,
99
+ "step": 270
100
+ },
101
+ {
102
+ "epoch": 6.0,
103
+ "eval_combined_score": 0.388996037809745,
104
+ "eval_loss": 2.122382879257202,
105
+ "eval_pearson": 0.3881787749232017,
106
+ "eval_runtime": 6.7466,
107
+ "eval_samples_per_second": 222.334,
108
+ "eval_spearmanr": 0.3898133006962884,
109
+ "eval_steps_per_second": 1.779,
110
+ "step": 270
111
+ },
112
+ {
113
+ "epoch": 7.0,
114
+ "learning_rate": 3.44e-05,
115
+ "loss": 1.0115,
116
+ "step": 315
117
+ },
118
+ {
119
+ "epoch": 7.0,
120
+ "eval_combined_score": 0.42925164874364347,
121
+ "eval_loss": 2.22530198097229,
122
+ "eval_pearson": 0.43036479777857173,
123
+ "eval_runtime": 6.7518,
124
+ "eval_samples_per_second": 222.163,
125
+ "eval_spearmanr": 0.4281384997087152,
126
+ "eval_steps_per_second": 1.777,
127
+ "step": 315
128
+ },
129
+ {
130
+ "epoch": 8.0,
131
+ "learning_rate": 3.3600000000000004e-05,
132
+ "loss": 0.7449,
133
+ "step": 360
134
+ },
135
+ {
136
+ "epoch": 8.0,
137
+ "eval_combined_score": 0.4279396504678673,
138
+ "eval_loss": 2.323458433151245,
139
+ "eval_pearson": 0.42358611153077663,
140
+ "eval_runtime": 6.7608,
141
+ "eval_samples_per_second": 221.867,
142
+ "eval_spearmanr": 0.432293189404958,
143
+ "eval_steps_per_second": 1.775,
144
+ "step": 360
145
+ },
146
+ {
147
+ "epoch": 9.0,
148
+ "learning_rate": 3.28e-05,
149
+ "loss": 0.66,
150
+ "step": 405
151
+ },
152
+ {
153
+ "epoch": 9.0,
154
+ "eval_combined_score": 0.4345560407048856,
155
+ "eval_loss": 2.361697196960449,
156
+ "eval_pearson": 0.4340089724216108,
157
+ "eval_runtime": 6.77,
158
+ "eval_samples_per_second": 221.566,
159
+ "eval_spearmanr": 0.4351031089881605,
160
+ "eval_steps_per_second": 1.773,
161
+ "step": 405
162
+ },
163
+ {
164
+ "epoch": 10.0,
165
+ "learning_rate": 3.2000000000000005e-05,
166
+ "loss": 0.4678,
167
+ "step": 450
168
+ },
169
+ {
170
+ "epoch": 10.0,
171
+ "eval_combined_score": 0.4278924878236148,
172
+ "eval_loss": 2.0740506649017334,
173
+ "eval_pearson": 0.42995071418144987,
174
+ "eval_runtime": 6.7876,
175
+ "eval_samples_per_second": 220.99,
176
+ "eval_spearmanr": 0.4258342614657797,
177
+ "eval_steps_per_second": 1.768,
178
+ "step": 450
179
+ },
180
+ {
181
+ "epoch": 11.0,
182
+ "learning_rate": 3.1200000000000006e-05,
183
+ "loss": 0.4438,
184
+ "step": 495
185
+ },
186
+ {
187
+ "epoch": 11.0,
188
+ "eval_combined_score": 0.4289254482191033,
189
+ "eval_loss": 2.3816161155700684,
190
+ "eval_pearson": 0.4284999172461374,
191
+ "eval_runtime": 6.7943,
192
+ "eval_samples_per_second": 220.772,
193
+ "eval_spearmanr": 0.4293509791920692,
194
+ "eval_steps_per_second": 1.766,
195
+ "step": 495
196
+ },
197
+ {
198
+ "epoch": 12.0,
199
+ "learning_rate": 3.0400000000000004e-05,
200
+ "loss": 0.3192,
201
+ "step": 540
202
+ },
203
+ {
204
+ "epoch": 12.0,
205
+ "eval_combined_score": 0.4590614019452154,
206
+ "eval_loss": 2.1672608852386475,
207
+ "eval_pearson": 0.45795017178280906,
208
+ "eval_runtime": 6.8031,
209
+ "eval_samples_per_second": 220.489,
210
+ "eval_spearmanr": 0.4601726321076217,
211
+ "eval_steps_per_second": 1.764,
212
+ "step": 540
213
+ },
214
+ {
215
+ "epoch": 13.0,
216
+ "learning_rate": 2.96e-05,
217
+ "loss": 0.2481,
218
+ "step": 585
219
+ },
220
+ {
221
+ "epoch": 13.0,
222
+ "eval_combined_score": 0.43744904640918025,
223
+ "eval_loss": 2.1544361114501953,
224
+ "eval_pearson": 0.4392148992975784,
225
+ "eval_runtime": 6.7836,
226
+ "eval_samples_per_second": 221.122,
227
+ "eval_spearmanr": 0.43568319352078216,
228
+ "eval_steps_per_second": 1.769,
229
+ "step": 585
230
+ },
231
+ {
232
+ "epoch": 14.0,
233
+ "learning_rate": 2.8800000000000002e-05,
234
+ "loss": 0.2296,
235
+ "step": 630
236
+ },
237
+ {
238
+ "epoch": 14.0,
239
+ "eval_combined_score": 0.45925148211887024,
240
+ "eval_loss": 2.007450580596924,
241
+ "eval_pearson": 0.46026152385259717,
242
+ "eval_runtime": 6.8038,
243
+ "eval_samples_per_second": 220.465,
244
+ "eval_spearmanr": 0.4582414403851433,
245
+ "eval_steps_per_second": 1.764,
246
+ "step": 630
247
+ },
248
+ {
249
+ "epoch": 15.0,
250
+ "learning_rate": 2.8e-05,
251
+ "loss": 0.1765,
252
+ "step": 675
253
+ },
254
+ {
255
+ "epoch": 15.0,
256
+ "eval_combined_score": 0.4620511023513117,
257
+ "eval_loss": 2.1394569873809814,
258
+ "eval_pearson": 0.46235490780864075,
259
+ "eval_runtime": 6.7896,
260
+ "eval_samples_per_second": 220.927,
261
+ "eval_spearmanr": 0.46174729689398264,
262
+ "eval_steps_per_second": 1.767,
263
+ "step": 675
264
+ },
265
+ {
266
+ "epoch": 16.0,
267
+ "learning_rate": 2.7200000000000004e-05,
268
+ "loss": 0.1533,
269
+ "step": 720
270
+ },
271
+ {
272
+ "epoch": 16.0,
273
+ "eval_combined_score": 0.4469345688037569,
274
+ "eval_loss": 2.2714791297912598,
275
+ "eval_pearson": 0.45120593215851773,
276
+ "eval_runtime": 6.8234,
277
+ "eval_samples_per_second": 219.832,
278
+ "eval_spearmanr": 0.4426632054489961,
279
+ "eval_steps_per_second": 1.759,
280
+ "step": 720
281
+ },
282
+ {
283
+ "epoch": 17.0,
284
+ "learning_rate": 2.6400000000000005e-05,
285
+ "loss": 0.1343,
286
+ "step": 765
287
+ },
288
+ {
289
+ "epoch": 17.0,
290
+ "eval_combined_score": 0.4428810611082898,
291
+ "eval_loss": 2.172586679458618,
292
+ "eval_pearson": 0.44408986762850433,
293
+ "eval_runtime": 6.787,
294
+ "eval_samples_per_second": 221.01,
295
+ "eval_spearmanr": 0.44167225458807524,
296
+ "eval_steps_per_second": 1.768,
297
+ "step": 765
298
+ },
299
+ {
300
+ "epoch": 18.0,
301
+ "learning_rate": 2.5600000000000002e-05,
302
+ "loss": 0.1373,
303
+ "step": 810
304
+ },
305
+ {
306
+ "epoch": 18.0,
307
+ "eval_combined_score": 0.4478234688937688,
308
+ "eval_loss": 2.02228045463562,
309
+ "eval_pearson": 0.4532121986881763,
310
+ "eval_runtime": 6.7875,
311
+ "eval_samples_per_second": 220.996,
312
+ "eval_spearmanr": 0.4424347390993613,
313
+ "eval_steps_per_second": 1.768,
314
+ "step": 810
315
+ },
316
+ {
317
+ "epoch": 19.0,
318
+ "learning_rate": 2.4800000000000003e-05,
319
+ "loss": 0.1277,
320
+ "step": 855
321
+ },
322
+ {
323
+ "epoch": 19.0,
324
+ "eval_combined_score": 0.4346959695013698,
325
+ "eval_loss": 1.9992105960845947,
326
+ "eval_pearson": 0.4395412779716168,
327
+ "eval_runtime": 6.7974,
328
+ "eval_samples_per_second": 220.673,
329
+ "eval_spearmanr": 0.42985066103112274,
330
+ "eval_steps_per_second": 1.765,
331
+ "step": 855
332
+ },
333
+ {
334
+ "epoch": 20.0,
335
+ "learning_rate": 2.4e-05,
336
+ "loss": 0.0968,
337
+ "step": 900
338
+ },
339
+ {
340
+ "epoch": 20.0,
341
+ "eval_combined_score": 0.46103377995091277,
342
+ "eval_loss": 2.1078310012817383,
343
+ "eval_pearson": 0.4620119136399576,
344
+ "eval_runtime": 6.7915,
345
+ "eval_samples_per_second": 220.864,
346
+ "eval_spearmanr": 0.460055646261868,
347
+ "eval_steps_per_second": 1.767,
348
+ "step": 900
349
+ },
350
+ {
351
+ "epoch": 21.0,
352
+ "learning_rate": 2.32e-05,
353
+ "loss": 0.084,
354
+ "step": 945
355
+ },
356
+ {
357
+ "epoch": 21.0,
358
+ "eval_combined_score": 0.460210810698569,
359
+ "eval_loss": 2.068389415740967,
360
+ "eval_pearson": 0.46274616538784863,
361
+ "eval_runtime": 6.7629,
362
+ "eval_samples_per_second": 221.8,
363
+ "eval_spearmanr": 0.4576754560092894,
364
+ "eval_steps_per_second": 1.774,
365
+ "step": 945
366
+ },
367
+ {
368
+ "epoch": 22.0,
369
+ "learning_rate": 2.2400000000000002e-05,
370
+ "loss": 0.0777,
371
+ "step": 990
372
+ },
373
+ {
374
+ "epoch": 22.0,
375
+ "eval_combined_score": 0.46235708950698207,
376
+ "eval_loss": 1.9214489459991455,
377
+ "eval_pearson": 0.46475032463030774,
378
+ "eval_runtime": 6.7758,
379
+ "eval_samples_per_second": 221.376,
380
+ "eval_spearmanr": 0.45996385438365645,
381
+ "eval_steps_per_second": 1.771,
382
+ "step": 990
383
+ },
384
+ {
385
+ "epoch": 23.0,
386
+ "learning_rate": 2.1600000000000003e-05,
387
+ "loss": 0.0572,
388
+ "step": 1035
389
+ },
390
+ {
391
+ "epoch": 23.0,
392
+ "eval_combined_score": 0.44643779764240843,
393
+ "eval_loss": 2.0636143684387207,
394
+ "eval_pearson": 0.45063900222949543,
395
+ "eval_runtime": 6.7543,
396
+ "eval_samples_per_second": 222.082,
397
+ "eval_spearmanr": 0.4422365930553215,
398
+ "eval_steps_per_second": 1.777,
399
+ "step": 1035
400
+ },
401
+ {
402
+ "epoch": 24.0,
403
+ "learning_rate": 2.08e-05,
404
+ "loss": 0.0615,
405
+ "step": 1080
406
+ },
407
+ {
408
+ "epoch": 24.0,
409
+ "eval_combined_score": 0.44382684994837796,
410
+ "eval_loss": 2.0404300689697266,
411
+ "eval_pearson": 0.4488704472998438,
412
+ "eval_runtime": 6.7964,
413
+ "eval_samples_per_second": 220.705,
414
+ "eval_spearmanr": 0.43878325259691214,
415
+ "eval_steps_per_second": 1.766,
416
+ "step": 1080
417
+ },
418
+ {
419
+ "epoch": 25.0,
420
+ "learning_rate": 2e-05,
421
+ "loss": 0.0516,
422
+ "step": 1125
423
+ },
424
+ {
425
+ "epoch": 25.0,
426
+ "eval_combined_score": 0.4475345905545639,
427
+ "eval_loss": 2.059901714324951,
428
+ "eval_pearson": 0.4515878825695937,
429
+ "eval_runtime": 6.7915,
430
+ "eval_samples_per_second": 220.866,
431
+ "eval_spearmanr": 0.44348129853953405,
432
+ "eval_steps_per_second": 1.767,
433
+ "step": 1125
434
+ },
435
+ {
436
+ "epoch": 26.0,
437
+ "learning_rate": 1.9200000000000003e-05,
438
+ "loss": 0.0501,
439
+ "step": 1170
440
+ },
441
+ {
442
+ "epoch": 26.0,
443
+ "eval_combined_score": 0.4509567766877898,
444
+ "eval_loss": 2.0359442234039307,
445
+ "eval_pearson": 0.4530378970256098,
446
+ "eval_runtime": 6.8254,
447
+ "eval_samples_per_second": 219.768,
448
+ "eval_spearmanr": 0.44887565634996973,
449
+ "eval_steps_per_second": 1.758,
450
+ "step": 1170
451
+ },
452
+ {
453
+ "epoch": 27.0,
454
+ "learning_rate": 1.8400000000000003e-05,
455
+ "loss": 0.0515,
456
+ "step": 1215
457
+ },
458
+ {
459
+ "epoch": 27.0,
460
+ "eval_combined_score": 0.45481392235284457,
461
+ "eval_loss": 1.9571282863616943,
462
+ "eval_pearson": 0.4588386300973516,
463
+ "eval_runtime": 6.8054,
464
+ "eval_samples_per_second": 220.412,
465
+ "eval_spearmanr": 0.4507892146083376,
466
+ "eval_steps_per_second": 1.763,
467
+ "step": 1215
468
+ },
469
+ {
470
+ "epoch": 27.0,
471
+ "step": 1215,
472
+ "total_flos": 2.301115836137472e+16,
473
+ "train_loss": 0.6121585881268536,
474
+ "train_runtime": 2633.1293,
475
+ "train_samples_per_second": 109.167,
476
+ "train_steps_per_second": 0.854
477
+ }
478
+ ],
479
+ "max_steps": 2250,
480
+ "num_train_epochs": 50,
481
+ "total_flos": 2.301115836137472e+16,
482
+ "trial_name": null,
483
+ "trial_params": null
484
+ }