pszemraj commited on
Commit
4426782
1 Parent(s): 96f3aa9

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 8.0,
3
+ "eval_loss": 0.3541961908340454,
4
+ "eval_matthews_correlation": 0.6579677841732349,
5
+ "eval_runtime": 1.9552,
6
+ "eval_samples": 1043,
7
+ "eval_samples_per_second": 533.451,
8
+ "eval_steps_per_second": 33.756,
9
+ "train_loss": 0.148403340177402,
10
+ "train_runtime": 707.366,
11
+ "train_samples": 8551,
12
+ "train_samples_per_second": 96.708,
13
+ "train_steps_per_second": 0.758
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 8.0,
3
+ "eval_loss": 0.3541961908340454,
4
+ "eval_matthews_correlation": 0.6579677841732349,
5
+ "eval_runtime": 1.9552,
6
+ "eval_samples": 1043,
7
+ "eval_samples_per_second": 533.451,
8
+ "eval_steps_per_second": 33.756
9
+ }
predict_results_cola.txt ADDED
@@ -0,0 +1,1064 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ index prediction
2
+ 0 acceptable
3
+ 1 unacceptable
4
+ 2 acceptable
5
+ 3 acceptable
6
+ 4 unacceptable
7
+ 5 acceptable
8
+ 6 acceptable
9
+ 7 acceptable
10
+ 8 acceptable
11
+ 9 acceptable
12
+ 10 unacceptable
13
+ 11 acceptable
14
+ 12 unacceptable
15
+ 13 acceptable
16
+ 14 acceptable
17
+ 15 acceptable
18
+ 16 unacceptable
19
+ 17 acceptable
20
+ 18 unacceptable
21
+ 19 unacceptable
22
+ 20 acceptable
23
+ 21 acceptable
24
+ 22 unacceptable
25
+ 23 acceptable
26
+ 24 acceptable
27
+ 25 unacceptable
28
+ 26 acceptable
29
+ 27 unacceptable
30
+ 28 acceptable
31
+ 29 acceptable
32
+ 30 acceptable
33
+ 31 acceptable
34
+ 32 acceptable
35
+ 33 acceptable
36
+ 34 unacceptable
37
+ 35 unacceptable
38
+ 36 acceptable
39
+ 37 acceptable
40
+ 38 acceptable
41
+ 39 acceptable
42
+ 40 unacceptable
43
+ 41 unacceptable
44
+ 42 unacceptable
45
+ 43 acceptable
46
+ 44 unacceptable
47
+ 45 acceptable
48
+ 46 unacceptable
49
+ 47 acceptable
50
+ 48 acceptable
51
+ 49 acceptable
52
+ 50 unacceptable
53
+ 51 acceptable
54
+ 52 acceptable
55
+ 53 acceptable
56
+ 54 unacceptable
57
+ 55 unacceptable
58
+ 56 acceptable
59
+ 57 unacceptable
60
+ 58 unacceptable
61
+ 59 acceptable
62
+ 60 unacceptable
63
+ 61 acceptable
64
+ 62 acceptable
65
+ 63 acceptable
66
+ 64 acceptable
67
+ 65 acceptable
68
+ 66 acceptable
69
+ 67 acceptable
70
+ 68 acceptable
71
+ 69 unacceptable
72
+ 70 acceptable
73
+ 71 acceptable
74
+ 72 unacceptable
75
+ 73 acceptable
76
+ 74 acceptable
77
+ 75 acceptable
78
+ 76 acceptable
79
+ 77 acceptable
80
+ 78 acceptable
81
+ 79 acceptable
82
+ 80 acceptable
83
+ 81 unacceptable
84
+ 82 acceptable
85
+ 83 acceptable
86
+ 84 acceptable
87
+ 85 acceptable
88
+ 86 acceptable
89
+ 87 acceptable
90
+ 88 acceptable
91
+ 89 acceptable
92
+ 90 acceptable
93
+ 91 acceptable
94
+ 92 unacceptable
95
+ 93 acceptable
96
+ 94 acceptable
97
+ 95 acceptable
98
+ 96 acceptable
99
+ 97 acceptable
100
+ 98 acceptable
101
+ 99 acceptable
102
+ 100 unacceptable
103
+ 101 unacceptable
104
+ 102 acceptable
105
+ 103 unacceptable
106
+ 104 acceptable
107
+ 105 acceptable
108
+ 106 acceptable
109
+ 107 acceptable
110
+ 108 acceptable
111
+ 109 acceptable
112
+ 110 acceptable
113
+ 111 acceptable
114
+ 112 acceptable
115
+ 113 acceptable
116
+ 114 acceptable
117
+ 115 unacceptable
118
+ 116 unacceptable
119
+ 117 unacceptable
120
+ 118 unacceptable
121
+ 119 acceptable
122
+ 120 unacceptable
123
+ 121 unacceptable
124
+ 122 acceptable
125
+ 123 unacceptable
126
+ 124 acceptable
127
+ 125 acceptable
128
+ 126 unacceptable
129
+ 127 acceptable
130
+ 128 acceptable
131
+ 129 acceptable
132
+ 130 unacceptable
133
+ 131 acceptable
134
+ 132 acceptable
135
+ 133 acceptable
136
+ 134 acceptable
137
+ 135 acceptable
138
+ 136 acceptable
139
+ 137 acceptable
140
+ 138 unacceptable
141
+ 139 unacceptable
142
+ 140 acceptable
143
+ 141 acceptable
144
+ 142 acceptable
145
+ 143 acceptable
146
+ 144 acceptable
147
+ 145 acceptable
148
+ 146 acceptable
149
+ 147 acceptable
150
+ 148 acceptable
151
+ 149 unacceptable
152
+ 150 unacceptable
153
+ 151 acceptable
154
+ 152 acceptable
155
+ 153 acceptable
156
+ 154 acceptable
157
+ 155 acceptable
158
+ 156 unacceptable
159
+ 157 unacceptable
160
+ 158 unacceptable
161
+ 159 acceptable
162
+ 160 unacceptable
163
+ 161 acceptable
164
+ 162 unacceptable
165
+ 163 acceptable
166
+ 164 acceptable
167
+ 165 unacceptable
168
+ 166 unacceptable
169
+ 167 unacceptable
170
+ 168 acceptable
171
+ 169 acceptable
172
+ 170 acceptable
173
+ 171 acceptable
174
+ 172 acceptable
175
+ 173 unacceptable
176
+ 174 acceptable
177
+ 175 acceptable
178
+ 176 acceptable
179
+ 177 acceptable
180
+ 178 acceptable
181
+ 179 acceptable
182
+ 180 acceptable
183
+ 181 acceptable
184
+ 182 unacceptable
185
+ 183 acceptable
186
+ 184 unacceptable
187
+ 185 acceptable
188
+ 186 acceptable
189
+ 187 unacceptable
190
+ 188 unacceptable
191
+ 189 acceptable
192
+ 190 acceptable
193
+ 191 acceptable
194
+ 192 unacceptable
195
+ 193 acceptable
196
+ 194 unacceptable
197
+ 195 acceptable
198
+ 196 unacceptable
199
+ 197 acceptable
200
+ 198 acceptable
201
+ 199 acceptable
202
+ 200 acceptable
203
+ 201 unacceptable
204
+ 202 unacceptable
205
+ 203 acceptable
206
+ 204 acceptable
207
+ 205 acceptable
208
+ 206 acceptable
209
+ 207 acceptable
210
+ 208 acceptable
211
+ 209 acceptable
212
+ 210 acceptable
213
+ 211 acceptable
214
+ 212 acceptable
215
+ 213 unacceptable
216
+ 214 acceptable
217
+ 215 acceptable
218
+ 216 acceptable
219
+ 217 unacceptable
220
+ 218 acceptable
221
+ 219 acceptable
222
+ 220 acceptable
223
+ 221 acceptable
224
+ 222 acceptable
225
+ 223 acceptable
226
+ 224 unacceptable
227
+ 225 acceptable
228
+ 226 unacceptable
229
+ 227 acceptable
230
+ 228 unacceptable
231
+ 229 acceptable
232
+ 230 acceptable
233
+ 231 acceptable
234
+ 232 acceptable
235
+ 233 acceptable
236
+ 234 acceptable
237
+ 235 acceptable
238
+ 236 unacceptable
239
+ 237 acceptable
240
+ 238 acceptable
241
+ 239 unacceptable
242
+ 240 acceptable
243
+ 241 acceptable
244
+ 242 acceptable
245
+ 243 acceptable
246
+ 244 acceptable
247
+ 245 acceptable
248
+ 246 acceptable
249
+ 247 acceptable
250
+ 248 acceptable
251
+ 249 acceptable
252
+ 250 acceptable
253
+ 251 acceptable
254
+ 252 acceptable
255
+ 253 acceptable
256
+ 254 acceptable
257
+ 255 acceptable
258
+ 256 acceptable
259
+ 257 acceptable
260
+ 258 acceptable
261
+ 259 acceptable
262
+ 260 unacceptable
263
+ 261 acceptable
264
+ 262 acceptable
265
+ 263 unacceptable
266
+ 264 unacceptable
267
+ 265 acceptable
268
+ 266 acceptable
269
+ 267 acceptable
270
+ 268 acceptable
271
+ 269 acceptable
272
+ 270 acceptable
273
+ 271 acceptable
274
+ 272 unacceptable
275
+ 273 acceptable
276
+ 274 unacceptable
277
+ 275 acceptable
278
+ 276 acceptable
279
+ 277 acceptable
280
+ 278 acceptable
281
+ 279 unacceptable
282
+ 280 acceptable
283
+ 281 unacceptable
284
+ 282 acceptable
285
+ 283 acceptable
286
+ 284 acceptable
287
+ 285 unacceptable
288
+ 286 acceptable
289
+ 287 acceptable
290
+ 288 acceptable
291
+ 289 acceptable
292
+ 290 acceptable
293
+ 291 unacceptable
294
+ 292 acceptable
295
+ 293 unacceptable
296
+ 294 unacceptable
297
+ 295 acceptable
298
+ 296 acceptable
299
+ 297 acceptable
300
+ 298 acceptable
301
+ 299 acceptable
302
+ 300 acceptable
303
+ 301 unacceptable
304
+ 302 acceptable
305
+ 303 acceptable
306
+ 304 acceptable
307
+ 305 acceptable
308
+ 306 unacceptable
309
+ 307 unacceptable
310
+ 308 acceptable
311
+ 309 acceptable
312
+ 310 acceptable
313
+ 311 acceptable
314
+ 312 acceptable
315
+ 313 unacceptable
316
+ 314 unacceptable
317
+ 315 unacceptable
318
+ 316 unacceptable
319
+ 317 acceptable
320
+ 318 acceptable
321
+ 319 unacceptable
322
+ 320 unacceptable
323
+ 321 acceptable
324
+ 322 unacceptable
325
+ 323 acceptable
326
+ 324 acceptable
327
+ 325 unacceptable
328
+ 326 acceptable
329
+ 327 acceptable
330
+ 328 acceptable
331
+ 329 acceptable
332
+ 330 acceptable
333
+ 331 acceptable
334
+ 332 unacceptable
335
+ 333 acceptable
336
+ 334 acceptable
337
+ 335 acceptable
338
+ 336 unacceptable
339
+ 337 acceptable
340
+ 338 acceptable
341
+ 339 acceptable
342
+ 340 acceptable
343
+ 341 acceptable
344
+ 342 acceptable
345
+ 343 acceptable
346
+ 344 unacceptable
347
+ 345 acceptable
348
+ 346 acceptable
349
+ 347 acceptable
350
+ 348 acceptable
351
+ 349 acceptable
352
+ 350 acceptable
353
+ 351 unacceptable
354
+ 352 acceptable
355
+ 353 acceptable
356
+ 354 acceptable
357
+ 355 acceptable
358
+ 356 unacceptable
359
+ 357 acceptable
360
+ 358 acceptable
361
+ 359 acceptable
362
+ 360 acceptable
363
+ 361 unacceptable
364
+ 362 acceptable
365
+ 363 unacceptable
366
+ 364 acceptable
367
+ 365 acceptable
368
+ 366 acceptable
369
+ 367 acceptable
370
+ 368 acceptable
371
+ 369 acceptable
372
+ 370 unacceptable
373
+ 371 acceptable
374
+ 372 acceptable
375
+ 373 acceptable
376
+ 374 acceptable
377
+ 375 acceptable
378
+ 376 acceptable
379
+ 377 acceptable
380
+ 378 acceptable
381
+ 379 unacceptable
382
+ 380 acceptable
383
+ 381 acceptable
384
+ 382 acceptable
385
+ 383 acceptable
386
+ 384 acceptable
387
+ 385 acceptable
388
+ 386 acceptable
389
+ 387 acceptable
390
+ 388 acceptable
391
+ 389 unacceptable
392
+ 390 acceptable
393
+ 391 unacceptable
394
+ 392 unacceptable
395
+ 393 acceptable
396
+ 394 acceptable
397
+ 395 acceptable
398
+ 396 acceptable
399
+ 397 acceptable
400
+ 398 acceptable
401
+ 399 acceptable
402
+ 400 acceptable
403
+ 401 acceptable
404
+ 402 unacceptable
405
+ 403 acceptable
406
+ 404 acceptable
407
+ 405 acceptable
408
+ 406 acceptable
409
+ 407 acceptable
410
+ 408 acceptable
411
+ 409 acceptable
412
+ 410 acceptable
413
+ 411 unacceptable
414
+ 412 acceptable
415
+ 413 acceptable
416
+ 414 acceptable
417
+ 415 unacceptable
418
+ 416 acceptable
419
+ 417 acceptable
420
+ 418 unacceptable
421
+ 419 acceptable
422
+ 420 acceptable
423
+ 421 acceptable
424
+ 422 acceptable
425
+ 423 acceptable
426
+ 424 acceptable
427
+ 425 acceptable
428
+ 426 acceptable
429
+ 427 acceptable
430
+ 428 acceptable
431
+ 429 unacceptable
432
+ 430 acceptable
433
+ 431 acceptable
434
+ 432 acceptable
435
+ 433 unacceptable
436
+ 434 acceptable
437
+ 435 unacceptable
438
+ 436 acceptable
439
+ 437 acceptable
440
+ 438 acceptable
441
+ 439 acceptable
442
+ 440 acceptable
443
+ 441 acceptable
444
+ 442 acceptable
445
+ 443 acceptable
446
+ 444 unacceptable
447
+ 445 acceptable
448
+ 446 acceptable
449
+ 447 acceptable
450
+ 448 unacceptable
451
+ 449 unacceptable
452
+ 450 unacceptable
453
+ 451 acceptable
454
+ 452 acceptable
455
+ 453 acceptable
456
+ 454 unacceptable
457
+ 455 acceptable
458
+ 456 acceptable
459
+ 457 acceptable
460
+ 458 acceptable
461
+ 459 unacceptable
462
+ 460 acceptable
463
+ 461 acceptable
464
+ 462 unacceptable
465
+ 463 acceptable
466
+ 464 acceptable
467
+ 465 acceptable
468
+ 466 acceptable
469
+ 467 acceptable
470
+ 468 acceptable
471
+ 469 acceptable
472
+ 470 acceptable
473
+ 471 acceptable
474
+ 472 acceptable
475
+ 473 unacceptable
476
+ 474 acceptable
477
+ 475 unacceptable
478
+ 476 unacceptable
479
+ 477 unacceptable
480
+ 478 acceptable
481
+ 479 unacceptable
482
+ 480 acceptable
483
+ 481 acceptable
484
+ 482 acceptable
485
+ 483 unacceptable
486
+ 484 acceptable
487
+ 485 acceptable
488
+ 486 acceptable
489
+ 487 acceptable
490
+ 488 unacceptable
491
+ 489 unacceptable
492
+ 490 acceptable
493
+ 491 unacceptable
494
+ 492 acceptable
495
+ 493 acceptable
496
+ 494 acceptable
497
+ 495 unacceptable
498
+ 496 unacceptable
499
+ 497 acceptable
500
+ 498 acceptable
501
+ 499 acceptable
502
+ 500 acceptable
503
+ 501 acceptable
504
+ 502 acceptable
505
+ 503 acceptable
506
+ 504 unacceptable
507
+ 505 acceptable
508
+ 506 acceptable
509
+ 507 acceptable
510
+ 508 unacceptable
511
+ 509 acceptable
512
+ 510 acceptable
513
+ 511 unacceptable
514
+ 512 acceptable
515
+ 513 acceptable
516
+ 514 acceptable
517
+ 515 acceptable
518
+ 516 acceptable
519
+ 517 unacceptable
520
+ 518 acceptable
521
+ 519 acceptable
522
+ 520 acceptable
523
+ 521 unacceptable
524
+ 522 acceptable
525
+ 523 acceptable
526
+ 524 unacceptable
527
+ 525 acceptable
528
+ 526 acceptable
529
+ 527 acceptable
530
+ 528 acceptable
531
+ 529 acceptable
532
+ 530 acceptable
533
+ 531 acceptable
534
+ 532 acceptable
535
+ 533 acceptable
536
+ 534 acceptable
537
+ 535 acceptable
538
+ 536 acceptable
539
+ 537 acceptable
540
+ 538 acceptable
541
+ 539 acceptable
542
+ 540 acceptable
543
+ 541 acceptable
544
+ 542 acceptable
545
+ 543 acceptable
546
+ 544 unacceptable
547
+ 545 unacceptable
548
+ 546 acceptable
549
+ 547 acceptable
550
+ 548 acceptable
551
+ 549 acceptable
552
+ 550 acceptable
553
+ 551 acceptable
554
+ 552 acceptable
555
+ 553 acceptable
556
+ 554 acceptable
557
+ 555 acceptable
558
+ 556 acceptable
559
+ 557 acceptable
560
+ 558 acceptable
561
+ 559 acceptable
562
+ 560 acceptable
563
+ 561 acceptable
564
+ 562 acceptable
565
+ 563 acceptable
566
+ 564 acceptable
567
+ 565 acceptable
568
+ 566 acceptable
569
+ 567 acceptable
570
+ 568 acceptable
571
+ 569 acceptable
572
+ 570 unacceptable
573
+ 571 acceptable
574
+ 572 acceptable
575
+ 573 acceptable
576
+ 574 acceptable
577
+ 575 unacceptable
578
+ 576 acceptable
579
+ 577 acceptable
580
+ 578 acceptable
581
+ 579 acceptable
582
+ 580 acceptable
583
+ 581 acceptable
584
+ 582 acceptable
585
+ 583 acceptable
586
+ 584 acceptable
587
+ 585 acceptable
588
+ 586 acceptable
589
+ 587 acceptable
590
+ 588 acceptable
591
+ 589 acceptable
592
+ 590 acceptable
593
+ 591 acceptable
594
+ 592 acceptable
595
+ 593 acceptable
596
+ 594 acceptable
597
+ 595 acceptable
598
+ 596 acceptable
599
+ 597 acceptable
600
+ 598 acceptable
601
+ 599 acceptable
602
+ 600 acceptable
603
+ 601 acceptable
604
+ 602 acceptable
605
+ 603 acceptable
606
+ 604 unacceptable
607
+ 605 acceptable
608
+ 606 acceptable
609
+ 607 acceptable
610
+ 608 acceptable
611
+ 609 acceptable
612
+ 610 unacceptable
613
+ 611 acceptable
614
+ 612 acceptable
615
+ 613 acceptable
616
+ 614 acceptable
617
+ 615 unacceptable
618
+ 616 acceptable
619
+ 617 unacceptable
620
+ 618 unacceptable
621
+ 619 acceptable
622
+ 620 acceptable
623
+ 621 acceptable
624
+ 622 acceptable
625
+ 623 acceptable
626
+ 624 unacceptable
627
+ 625 acceptable
628
+ 626 acceptable
629
+ 627 acceptable
630
+ 628 acceptable
631
+ 629 acceptable
632
+ 630 unacceptable
633
+ 631 acceptable
634
+ 632 acceptable
635
+ 633 acceptable
636
+ 634 acceptable
637
+ 635 unacceptable
638
+ 636 unacceptable
639
+ 637 acceptable
640
+ 638 acceptable
641
+ 639 acceptable
642
+ 640 acceptable
643
+ 641 acceptable
644
+ 642 unacceptable
645
+ 643 acceptable
646
+ 644 acceptable
647
+ 645 unacceptable
648
+ 646 acceptable
649
+ 647 acceptable
650
+ 648 unacceptable
651
+ 649 acceptable
652
+ 650 unacceptable
653
+ 651 acceptable
654
+ 652 acceptable
655
+ 653 acceptable
656
+ 654 acceptable
657
+ 655 unacceptable
658
+ 656 unacceptable
659
+ 657 acceptable
660
+ 658 acceptable
661
+ 659 acceptable
662
+ 660 acceptable
663
+ 661 acceptable
664
+ 662 acceptable
665
+ 663 acceptable
666
+ 664 acceptable
667
+ 665 acceptable
668
+ 666 acceptable
669
+ 667 acceptable
670
+ 668 acceptable
671
+ 669 acceptable
672
+ 670 acceptable
673
+ 671 acceptable
674
+ 672 acceptable
675
+ 673 acceptable
676
+ 674 unacceptable
677
+ 675 acceptable
678
+ 676 acceptable
679
+ 677 acceptable
680
+ 678 acceptable
681
+ 679 unacceptable
682
+ 680 acceptable
683
+ 681 acceptable
684
+ 682 acceptable
685
+ 683 acceptable
686
+ 684 acceptable
687
+ 685 acceptable
688
+ 686 acceptable
689
+ 687 acceptable
690
+ 688 unacceptable
691
+ 689 unacceptable
692
+ 690 unacceptable
693
+ 691 acceptable
694
+ 692 unacceptable
695
+ 693 acceptable
696
+ 694 unacceptable
697
+ 695 unacceptable
698
+ 696 acceptable
699
+ 697 acceptable
700
+ 698 acceptable
701
+ 699 acceptable
702
+ 700 acceptable
703
+ 701 unacceptable
704
+ 702 unacceptable
705
+ 703 unacceptable
706
+ 704 acceptable
707
+ 705 unacceptable
708
+ 706 acceptable
709
+ 707 unacceptable
710
+ 708 acceptable
711
+ 709 acceptable
712
+ 710 acceptable
713
+ 711 acceptable
714
+ 712 acceptable
715
+ 713 acceptable
716
+ 714 acceptable
717
+ 715 unacceptable
718
+ 716 unacceptable
719
+ 717 unacceptable
720
+ 718 unacceptable
721
+ 719 unacceptable
722
+ 720 acceptable
723
+ 721 unacceptable
724
+ 722 unacceptable
725
+ 723 unacceptable
726
+ 724 unacceptable
727
+ 725 unacceptable
728
+ 726 unacceptable
729
+ 727 unacceptable
730
+ 728 unacceptable
731
+ 729 acceptable
732
+ 730 unacceptable
733
+ 731 unacceptable
734
+ 732 unacceptable
735
+ 733 unacceptable
736
+ 734 unacceptable
737
+ 735 unacceptable
738
+ 736 unacceptable
739
+ 737 acceptable
740
+ 738 acceptable
741
+ 739 acceptable
742
+ 740 acceptable
743
+ 741 unacceptable
744
+ 742 acceptable
745
+ 743 acceptable
746
+ 744 unacceptable
747
+ 745 unacceptable
748
+ 746 unacceptable
749
+ 747 unacceptable
750
+ 748 acceptable
751
+ 749 acceptable
752
+ 750 acceptable
753
+ 751 acceptable
754
+ 752 acceptable
755
+ 753 acceptable
756
+ 754 acceptable
757
+ 755 acceptable
758
+ 756 acceptable
759
+ 757 acceptable
760
+ 758 acceptable
761
+ 759 acceptable
762
+ 760 acceptable
763
+ 761 acceptable
764
+ 762 acceptable
765
+ 763 acceptable
766
+ 764 acceptable
767
+ 765 acceptable
768
+ 766 acceptable
769
+ 767 acceptable
770
+ 768 acceptable
771
+ 769 acceptable
772
+ 770 acceptable
773
+ 771 acceptable
774
+ 772 acceptable
775
+ 773 acceptable
776
+ 774 acceptable
777
+ 775 unacceptable
778
+ 776 unacceptable
779
+ 777 acceptable
780
+ 778 acceptable
781
+ 779 acceptable
782
+ 780 acceptable
783
+ 781 acceptable
784
+ 782 acceptable
785
+ 783 acceptable
786
+ 784 acceptable
787
+ 785 acceptable
788
+ 786 acceptable
789
+ 787 acceptable
790
+ 788 acceptable
791
+ 789 acceptable
792
+ 790 acceptable
793
+ 791 acceptable
794
+ 792 acceptable
795
+ 793 acceptable
796
+ 794 acceptable
797
+ 795 acceptable
798
+ 796 unacceptable
799
+ 797 acceptable
800
+ 798 unacceptable
801
+ 799 unacceptable
802
+ 800 acceptable
803
+ 801 acceptable
804
+ 802 acceptable
805
+ 803 acceptable
806
+ 804 unacceptable
807
+ 805 acceptable
808
+ 806 acceptable
809
+ 807 acceptable
810
+ 808 unacceptable
811
+ 809 acceptable
812
+ 810 acceptable
813
+ 811 acceptable
814
+ 812 acceptable
815
+ 813 acceptable
816
+ 814 acceptable
817
+ 815 unacceptable
818
+ 816 acceptable
819
+ 817 acceptable
820
+ 818 acceptable
821
+ 819 acceptable
822
+ 820 unacceptable
823
+ 821 acceptable
824
+ 822 unacceptable
825
+ 823 unacceptable
826
+ 824 acceptable
827
+ 825 unacceptable
828
+ 826 unacceptable
829
+ 827 acceptable
830
+ 828 acceptable
831
+ 829 acceptable
832
+ 830 unacceptable
833
+ 831 unacceptable
834
+ 832 unacceptable
835
+ 833 acceptable
836
+ 834 acceptable
837
+ 835 acceptable
838
+ 836 acceptable
839
+ 837 acceptable
840
+ 838 acceptable
841
+ 839 acceptable
842
+ 840 acceptable
843
+ 841 unacceptable
844
+ 842 acceptable
845
+ 843 unacceptable
846
+ 844 acceptable
847
+ 845 unacceptable
848
+ 846 unacceptable
849
+ 847 acceptable
850
+ 848 acceptable
851
+ 849 acceptable
852
+ 850 acceptable
853
+ 851 unacceptable
854
+ 852 acceptable
855
+ 853 unacceptable
856
+ 854 acceptable
857
+ 855 acceptable
858
+ 856 acceptable
859
+ 857 acceptable
860
+ 858 unacceptable
861
+ 859 acceptable
862
+ 860 acceptable
863
+ 861 acceptable
864
+ 862 acceptable
865
+ 863 acceptable
866
+ 864 acceptable
867
+ 865 acceptable
868
+ 866 unacceptable
869
+ 867 unacceptable
870
+ 868 acceptable
871
+ 869 acceptable
872
+ 870 unacceptable
873
+ 871 acceptable
874
+ 872 unacceptable
875
+ 873 unacceptable
876
+ 874 acceptable
877
+ 875 acceptable
878
+ 876 acceptable
879
+ 877 unacceptable
880
+ 878 acceptable
881
+ 879 acceptable
882
+ 880 acceptable
883
+ 881 acceptable
884
+ 882 acceptable
885
+ 883 acceptable
886
+ 884 acceptable
887
+ 885 unacceptable
888
+ 886 acceptable
889
+ 887 unacceptable
890
+ 888 unacceptable
891
+ 889 unacceptable
892
+ 890 unacceptable
893
+ 891 unacceptable
894
+ 892 unacceptable
895
+ 893 acceptable
896
+ 894 acceptable
897
+ 895 acceptable
898
+ 896 acceptable
899
+ 897 acceptable
900
+ 898 unacceptable
901
+ 899 unacceptable
902
+ 900 acceptable
903
+ 901 acceptable
904
+ 902 acceptable
905
+ 903 acceptable
906
+ 904 unacceptable
907
+ 905 acceptable
908
+ 906 acceptable
909
+ 907 acceptable
910
+ 908 acceptable
911
+ 909 acceptable
912
+ 910 acceptable
913
+ 911 acceptable
914
+ 912 acceptable
915
+ 913 acceptable
916
+ 914 acceptable
917
+ 915 acceptable
918
+ 916 acceptable
919
+ 917 acceptable
920
+ 918 acceptable
921
+ 919 acceptable
922
+ 920 acceptable
923
+ 921 acceptable
924
+ 922 acceptable
925
+ 923 acceptable
926
+ 924 unacceptable
927
+ 925 acceptable
928
+ 926 acceptable
929
+ 927 acceptable
930
+ 928 acceptable
931
+ 929 unacceptable
932
+ 930 unacceptable
933
+ 931 unacceptable
934
+ 932 unacceptable
935
+ 933 unacceptable
936
+ 934 acceptable
937
+ 935 acceptable
938
+ 936 acceptable
939
+ 937 unacceptable
940
+ 938 unacceptable
941
+ 939 unacceptable
942
+ 940 acceptable
943
+ 941 acceptable
944
+ 942 unacceptable
945
+ 943 acceptable
946
+ 944 acceptable
947
+ 945 acceptable
948
+ 946 acceptable
949
+ 947 acceptable
950
+ 948 unacceptable
951
+ 949 unacceptable
952
+ 950 acceptable
953
+ 951 acceptable
954
+ 952 acceptable
955
+ 953 acceptable
956
+ 954 acceptable
957
+ 955 unacceptable
958
+ 956 acceptable
959
+ 957 acceptable
960
+ 958 acceptable
961
+ 959 acceptable
962
+ 960 acceptable
963
+ 961 acceptable
964
+ 962 acceptable
965
+ 963 acceptable
966
+ 964 acceptable
967
+ 965 acceptable
968
+ 966 acceptable
969
+ 967 unacceptable
970
+ 968 unacceptable
971
+ 969 acceptable
972
+ 970 acceptable
973
+ 971 acceptable
974
+ 972 acceptable
975
+ 973 acceptable
976
+ 974 acceptable
977
+ 975 acceptable
978
+ 976 acceptable
979
+ 977 acceptable
980
+ 978 acceptable
981
+ 979 acceptable
982
+ 980 acceptable
983
+ 981 acceptable
984
+ 982 unacceptable
985
+ 983 acceptable
986
+ 984 acceptable
987
+ 985 acceptable
988
+ 986 unacceptable
989
+ 987 acceptable
990
+ 988 acceptable
991
+ 989 acceptable
992
+ 990 acceptable
993
+ 991 acceptable
994
+ 992 acceptable
995
+ 993 acceptable
996
+ 994 acceptable
997
+ 995 acceptable
998
+ 996 acceptable
999
+ 997 unacceptable
1000
+ 998 unacceptable
1001
+ 999 acceptable
1002
+ 1000 unacceptable
1003
+ 1001 acceptable
1004
+ 1002 acceptable
1005
+ 1003 acceptable
1006
+ 1004 unacceptable
1007
+ 1005 unacceptable
1008
+ 1006 acceptable
1009
+ 1007 acceptable
1010
+ 1008 acceptable
1011
+ 1009 acceptable
1012
+ 1010 acceptable
1013
+ 1011 acceptable
1014
+ 1012 acceptable
1015
+ 1013 acceptable
1016
+ 1014 acceptable
1017
+ 1015 acceptable
1018
+ 1016 acceptable
1019
+ 1017 acceptable
1020
+ 1018 acceptable
1021
+ 1019 acceptable
1022
+ 1020 acceptable
1023
+ 1021 acceptable
1024
+ 1022 acceptable
1025
+ 1023 unacceptable
1026
+ 1024 unacceptable
1027
+ 1025 acceptable
1028
+ 1026 acceptable
1029
+ 1027 acceptable
1030
+ 1028 acceptable
1031
+ 1029 unacceptable
1032
+ 1030 acceptable
1033
+ 1031 acceptable
1034
+ 1032 acceptable
1035
+ 1033 acceptable
1036
+ 1034 acceptable
1037
+ 1035 unacceptable
1038
+ 1036 unacceptable
1039
+ 1037 acceptable
1040
+ 1038 unacceptable
1041
+ 1039 unacceptable
1042
+ 1040 unacceptable
1043
+ 1041 unacceptable
1044
+ 1042 unacceptable
1045
+ 1043 unacceptable
1046
+ 1044 acceptable
1047
+ 1045 acceptable
1048
+ 1046 unacceptable
1049
+ 1047 acceptable
1050
+ 1048 unacceptable
1051
+ 1049 unacceptable
1052
+ 1050 unacceptable
1053
+ 1051 unacceptable
1054
+ 1052 unacceptable
1055
+ 1053 unacceptable
1056
+ 1054 acceptable
1057
+ 1055 acceptable
1058
+ 1056 acceptable
1059
+ 1057 acceptable
1060
+ 1058 acceptable
1061
+ 1059 acceptable
1062
+ 1060 acceptable
1063
+ 1061 acceptable
1064
+ 1062 acceptable
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 8.0,
3
+ "train_loss": 0.148403340177402,
4
+ "train_runtime": 707.366,
5
+ "train_samples": 8551,
6
+ "train_samples_per_second": 96.708,
7
+ "train_steps_per_second": 0.758
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,739 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3541961908340454,
3
+ "best_model_checkpoint": "./runtime-text-classification/electra-base-discriminator-CoLA/checkpoint-134",
4
+ "epoch": 8.0,
5
+ "global_step": 536,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.07,
12
+ "learning_rate": 2.3529411764705884e-05,
13
+ "loss": 0.6818,
14
+ "step": 5
15
+ },
16
+ {
17
+ "epoch": 0.15,
18
+ "learning_rate": 4.705882352941177e-05,
19
+ "loss": 0.6093,
20
+ "step": 10
21
+ },
22
+ {
23
+ "epoch": 0.22,
24
+ "learning_rate": 7.058823529411765e-05,
25
+ "loss": 0.5991,
26
+ "step": 15
27
+ },
28
+ {
29
+ "epoch": 0.3,
30
+ "learning_rate": 7.999340483599093e-05,
31
+ "loss": 0.5014,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 0.37,
36
+ "learning_rate": 7.995310893151923e-05,
37
+ "loss": 0.4253,
38
+ "step": 25
39
+ },
40
+ {
41
+ "epoch": 0.45,
42
+ "learning_rate": 7.987621796687375e-05,
43
+ "loss": 0.4231,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 0.52,
48
+ "learning_rate": 7.976280237035291e-05,
49
+ "loss": 0.411,
50
+ "step": 35
51
+ },
52
+ {
53
+ "epoch": 0.6,
54
+ "learning_rate": 7.964584084578578e-05,
55
+ "loss": 0.4267,
56
+ "step": 40
57
+ },
58
+ {
59
+ "epoch": 0.67,
60
+ "learning_rate": 7.95056379670812e-05,
61
+ "loss": 0.4357,
62
+ "step": 45
63
+ },
64
+ {
65
+ "epoch": 0.75,
66
+ "learning_rate": 7.929782771972567e-05,
67
+ "loss": 0.4468,
68
+ "step": 50
69
+ },
70
+ {
71
+ "epoch": 0.82,
72
+ "learning_rate": 7.905402261718713e-05,
73
+ "loss": 0.4651,
74
+ "step": 55
75
+ },
76
+ {
77
+ "epoch": 0.9,
78
+ "learning_rate": 7.877444597281055e-05,
79
+ "loss": 0.3906,
80
+ "step": 60
81
+ },
82
+ {
83
+ "epoch": 0.97,
84
+ "learning_rate": 7.845935386489366e-05,
85
+ "loss": 0.4004,
86
+ "step": 65
87
+ },
88
+ {
89
+ "epoch": 1.0,
90
+ "eval_loss": 0.35688334703445435,
91
+ "eval_matthews_correlation": 0.6339887598158311,
92
+ "eval_runtime": 1.9677,
93
+ "eval_samples_per_second": 530.071,
94
+ "eval_steps_per_second": 33.542,
95
+ "step": 67
96
+ },
97
+ {
98
+ "epoch": 1.04,
99
+ "learning_rate": 7.810903490213196e-05,
100
+ "loss": 0.3209,
101
+ "step": 70
102
+ },
103
+ {
104
+ "epoch": 1.12,
105
+ "learning_rate": 7.77238099592675e-05,
106
+ "loss": 0.3024,
107
+ "step": 75
108
+ },
109
+ {
110
+ "epoch": 1.19,
111
+ "learning_rate": 7.730403188318356e-05,
112
+ "loss": 0.2579,
113
+ "step": 80
114
+ },
115
+ {
116
+ "epoch": 1.27,
117
+ "learning_rate": 7.685008516971451e-05,
118
+ "loss": 0.3063,
119
+ "step": 85
120
+ },
121
+ {
122
+ "epoch": 1.34,
123
+ "learning_rate": 7.636238561146672e-05,
124
+ "loss": 0.2636,
125
+ "step": 90
126
+ },
127
+ {
128
+ "epoch": 1.42,
129
+ "learning_rate": 7.58413799169732e-05,
130
+ "loss": 0.294,
131
+ "step": 95
132
+ },
133
+ {
134
+ "epoch": 1.49,
135
+ "learning_rate": 7.528754530153087e-05,
136
+ "loss": 0.2836,
137
+ "step": 100
138
+ },
139
+ {
140
+ "epoch": 1.57,
141
+ "learning_rate": 7.470138905009503e-05,
142
+ "loss": 0.3346,
143
+ "step": 105
144
+ },
145
+ {
146
+ "epoch": 1.64,
147
+ "learning_rate": 7.408344805263159e-05,
148
+ "loss": 0.2939,
149
+ "step": 110
150
+ },
151
+ {
152
+ "epoch": 1.72,
153
+ "learning_rate": 7.343428831235258e-05,
154
+ "loss": 0.2815,
155
+ "step": 115
156
+ },
157
+ {
158
+ "epoch": 1.79,
159
+ "learning_rate": 7.275450442728538e-05,
160
+ "loss": 0.2799,
161
+ "step": 120
162
+ },
163
+ {
164
+ "epoch": 1.87,
165
+ "learning_rate": 7.204471904565049e-05,
166
+ "loss": 0.2491,
167
+ "step": 125
168
+ },
169
+ {
170
+ "epoch": 1.94,
171
+ "learning_rate": 7.130558229554668e-05,
172
+ "loss": 0.2843,
173
+ "step": 130
174
+ },
175
+ {
176
+ "epoch": 2.0,
177
+ "eval_loss": 0.3541961908340454,
178
+ "eval_matthews_correlation": 0.6579677841732349,
179
+ "eval_runtime": 2.0826,
180
+ "eval_samples_per_second": 500.817,
181
+ "eval_steps_per_second": 31.691,
182
+ "step": 134
183
+ },
184
+ {
185
+ "epoch": 2.01,
186
+ "learning_rate": 7.053777118946615e-05,
187
+ "loss": 0.2591,
188
+ "step": 135
189
+ },
190
+ {
191
+ "epoch": 2.09,
192
+ "learning_rate": 6.974198900418456e-05,
193
+ "loss": 0.182,
194
+ "step": 140
195
+ },
196
+ {
197
+ "epoch": 2.16,
198
+ "learning_rate": 6.89189646365947e-05,
199
+ "loss": 0.1888,
200
+ "step": 145
201
+ },
202
+ {
203
+ "epoch": 2.24,
204
+ "learning_rate": 6.806945193607303e-05,
205
+ "loss": 0.1455,
206
+ "step": 150
207
+ },
208
+ {
209
+ "epoch": 2.31,
210
+ "learning_rate": 6.719422901399121e-05,
211
+ "loss": 0.1544,
212
+ "step": 155
213
+ },
214
+ {
215
+ "epoch": 2.39,
216
+ "learning_rate": 6.629409753100488e-05,
217
+ "loss": 0.1622,
218
+ "step": 160
219
+ },
220
+ {
221
+ "epoch": 2.46,
222
+ "learning_rate": 6.536988196277224e-05,
223
+ "loss": 0.1784,
224
+ "step": 165
225
+ },
226
+ {
227
+ "epoch": 2.54,
228
+ "learning_rate": 6.442242884477545e-05,
229
+ "loss": 0.203,
230
+ "step": 170
231
+ },
232
+ {
233
+ "epoch": 2.61,
234
+ "learning_rate": 6.345260599693621e-05,
235
+ "loss": 0.1598,
236
+ "step": 175
237
+ },
238
+ {
239
+ "epoch": 2.69,
240
+ "learning_rate": 6.24613017287359e-05,
241
+ "loss": 0.1555,
242
+ "step": 180
243
+ },
244
+ {
245
+ "epoch": 2.76,
246
+ "learning_rate": 6.144942402556829e-05,
247
+ "loss": 0.1363,
248
+ "step": 185
249
+ },
250
+ {
251
+ "epoch": 2.84,
252
+ "learning_rate": 6.041789971707008e-05,
253
+ "loss": 0.1907,
254
+ "step": 190
255
+ },
256
+ {
257
+ "epoch": 2.91,
258
+ "learning_rate": 5.9367673628191066e-05,
259
+ "loss": 0.1935,
260
+ "step": 195
261
+ },
262
+ {
263
+ "epoch": 2.99,
264
+ "learning_rate": 5.8299707713781464e-05,
265
+ "loss": 0.1228,
266
+ "step": 200
267
+ },
268
+ {
269
+ "epoch": 3.0,
270
+ "eval_loss": 0.42005565762519836,
271
+ "eval_matthews_correlation": 0.6412441886324401,
272
+ "eval_runtime": 1.931,
273
+ "eval_samples_per_second": 540.144,
274
+ "eval_steps_per_second": 34.18,
275
+ "step": 201
276
+ },
277
+ {
278
+ "epoch": 3.06,
279
+ "learning_rate": 5.721498017748917e-05,
280
+ "loss": 0.1156,
281
+ "step": 205
282
+ },
283
+ {
284
+ "epoch": 3.13,
285
+ "learning_rate": 5.6114484575773854e-05,
286
+ "loss": 0.0937,
287
+ "step": 210
288
+ },
289
+ {
290
+ "epoch": 3.21,
291
+ "learning_rate": 5.499922890785868e-05,
292
+ "loss": 0.1138,
293
+ "step": 215
294
+ },
295
+ {
296
+ "epoch": 3.28,
297
+ "learning_rate": 5.3870234692453145e-05,
298
+ "loss": 0.0948,
299
+ "step": 220
300
+ },
301
+ {
302
+ "epoch": 3.36,
303
+ "learning_rate": 5.272853603209279e-05,
304
+ "loss": 0.0777,
305
+ "step": 225
306
+ },
307
+ {
308
+ "epoch": 3.43,
309
+ "learning_rate": 5.15751786659527e-05,
310
+ "loss": 0.1148,
311
+ "step": 230
312
+ },
313
+ {
314
+ "epoch": 3.51,
315
+ "learning_rate": 5.0411219012002445e-05,
316
+ "loss": 0.1676,
317
+ "step": 235
318
+ },
319
+ {
320
+ "epoch": 3.58,
321
+ "learning_rate": 4.92377231993798e-05,
322
+ "loss": 0.0868,
323
+ "step": 240
324
+ },
325
+ {
326
+ "epoch": 3.66,
327
+ "learning_rate": 4.805576609186946e-05,
328
+ "loss": 0.1009,
329
+ "step": 245
330
+ },
331
+ {
332
+ "epoch": 3.73,
333
+ "learning_rate": 4.686643030338131e-05,
334
+ "loss": 0.1072,
335
+ "step": 250
336
+ },
337
+ {
338
+ "epoch": 3.81,
339
+ "learning_rate": 4.5670805206329904e-05,
340
+ "loss": 0.0908,
341
+ "step": 255
342
+ },
343
+ {
344
+ "epoch": 3.88,
345
+ "learning_rate": 4.4469985933823594e-05,
346
+ "loss": 0.1031,
347
+ "step": 260
348
+ },
349
+ {
350
+ "epoch": 3.96,
351
+ "learning_rate": 4.326507237657703e-05,
352
+ "loss": 0.0989,
353
+ "step": 265
354
+ },
355
+ {
356
+ "epoch": 4.0,
357
+ "eval_loss": 0.4780134856700897,
358
+ "eval_matthews_correlation": 0.6757073194553476,
359
+ "eval_runtime": 1.9344,
360
+ "eval_samples_per_second": 539.172,
361
+ "eval_steps_per_second": 34.118,
362
+ "step": 268
363
+ },
364
+ {
365
+ "epoch": 4.03,
366
+ "learning_rate": 4.205716817546599e-05,
367
+ "loss": 0.0854,
368
+ "step": 270
369
+ },
370
+ {
371
+ "epoch": 4.1,
372
+ "learning_rate": 4.084737971064728e-05,
373
+ "loss": 0.0493,
374
+ "step": 275
375
+ },
376
+ {
377
+ "epoch": 4.18,
378
+ "learning_rate": 3.9636815088169476e-05,
379
+ "loss": 0.0902,
380
+ "step": 280
381
+ },
382
+ {
383
+ "epoch": 4.25,
384
+ "learning_rate": 3.842658312500302e-05,
385
+ "loss": 0.0567,
386
+ "step": 285
387
+ },
388
+ {
389
+ "epoch": 4.33,
390
+ "learning_rate": 3.721779233341892e-05,
391
+ "loss": 0.0549,
392
+ "step": 290
393
+ },
394
+ {
395
+ "epoch": 4.4,
396
+ "learning_rate": 3.6011549905646726e-05,
397
+ "loss": 0.0596,
398
+ "step": 295
399
+ },
400
+ {
401
+ "epoch": 4.48,
402
+ "learning_rate": 3.480896069974151e-05,
403
+ "loss": 0.048,
404
+ "step": 300
405
+ },
406
+ {
407
+ "epoch": 4.55,
408
+ "learning_rate": 3.361112622758889e-05,
409
+ "loss": 0.081,
410
+ "step": 305
411
+ },
412
+ {
413
+ "epoch": 4.63,
414
+ "learning_rate": 3.241914364597488e-05,
415
+ "loss": 0.0871,
416
+ "step": 310
417
+ },
418
+ {
419
+ "epoch": 4.7,
420
+ "learning_rate": 3.123410475164486e-05,
421
+ "loss": 0.062,
422
+ "step": 315
423
+ },
424
+ {
425
+ "epoch": 4.78,
426
+ "learning_rate": 3.005709498127213e-05,
427
+ "loss": 0.0763,
428
+ "step": 320
429
+ },
430
+ {
431
+ "epoch": 4.85,
432
+ "learning_rate": 2.8889192417251865e-05,
433
+ "loss": 0.0634,
434
+ "step": 325
435
+ },
436
+ {
437
+ "epoch": 4.93,
438
+ "learning_rate": 2.7731466800231328e-05,
439
+ "loss": 0.0798,
440
+ "step": 330
441
+ },
442
+ {
443
+ "epoch": 5.0,
444
+ "learning_rate": 2.658497854928058e-05,
445
+ "loss": 0.0681,
446
+ "step": 335
447
+ },
448
+ {
449
+ "epoch": 5.0,
450
+ "eval_loss": 0.49001187086105347,
451
+ "eval_matthews_correlation": 0.6925407764960233,
452
+ "eval_runtime": 1.9334,
453
+ "eval_samples_per_second": 539.471,
454
+ "eval_steps_per_second": 34.137,
455
+ "step": 335
456
+ },
457
+ {
458
+ "epoch": 5.07,
459
+ "learning_rate": 2.5450777790601394e-05,
460
+ "loss": 0.0465,
461
+ "step": 340
462
+ },
463
+ {
464
+ "epoch": 5.15,
465
+ "learning_rate": 2.432990339566394e-05,
466
+ "loss": 0.0674,
467
+ "step": 345
468
+ },
469
+ {
470
+ "epoch": 5.22,
471
+ "learning_rate": 2.322338202965213e-05,
472
+ "loss": 0.0419,
473
+ "step": 350
474
+ },
475
+ {
476
+ "epoch": 5.3,
477
+ "learning_rate": 2.2132227211089496e-05,
478
+ "loss": 0.0481,
479
+ "step": 355
480
+ },
481
+ {
482
+ "epoch": 5.37,
483
+ "learning_rate": 2.1057438383506473e-05,
484
+ "loss": 0.0362,
485
+ "step": 360
486
+ },
487
+ {
488
+ "epoch": 5.45,
489
+ "learning_rate": 2.0000000000000012e-05,
490
+ "loss": 0.0336,
491
+ "step": 365
492
+ },
493
+ {
494
+ "epoch": 5.52,
495
+ "learning_rate": 1.89608806215234e-05,
496
+ "loss": 0.0571,
497
+ "step": 370
498
+ },
499
+ {
500
+ "epoch": 5.6,
501
+ "learning_rate": 1.79410320297327e-05,
502
+ "loss": 0.0459,
503
+ "step": 375
504
+ },
505
+ {
506
+ "epoch": 5.67,
507
+ "learning_rate": 1.694138835520206e-05,
508
+ "loss": 0.0238,
509
+ "step": 380
510
+ },
511
+ {
512
+ "epoch": 5.75,
513
+ "learning_rate": 1.5962865221806545e-05,
514
+ "loss": 0.054,
515
+ "step": 385
516
+ },
517
+ {
518
+ "epoch": 5.82,
519
+ "learning_rate": 1.5006358908056302e-05,
520
+ "loss": 0.017,
521
+ "step": 390
522
+ },
523
+ {
524
+ "epoch": 5.9,
525
+ "learning_rate": 1.4072745526149944e-05,
526
+ "loss": 0.0224,
527
+ "step": 395
528
+ },
529
+ {
530
+ "epoch": 5.97,
531
+ "learning_rate": 1.316288021949944e-05,
532
+ "loss": 0.0506,
533
+ "step": 400
534
+ },
535
+ {
536
+ "epoch": 6.0,
537
+ "eval_loss": 0.5837473273277283,
538
+ "eval_matthews_correlation": 0.6785370776792806,
539
+ "eval_runtime": 1.9297,
540
+ "eval_samples_per_second": 540.494,
541
+ "eval_steps_per_second": 34.202,
542
+ "step": 402
543
+ },
544
+ {
545
+ "epoch": 6.04,
546
+ "learning_rate": 1.2277596379461248e-05,
547
+ "loss": 0.0317,
548
+ "step": 405
549
+ },
550
+ {
551
+ "epoch": 6.12,
552
+ "learning_rate": 1.1417704881991271e-05,
553
+ "loss": 0.0451,
554
+ "step": 410
555
+ },
556
+ {
557
+ "epoch": 6.19,
558
+ "learning_rate": 1.0583993344922905e-05,
559
+ "loss": 0.0061,
560
+ "step": 415
561
+ },
562
+ {
563
+ "epoch": 6.27,
564
+ "learning_rate": 9.777225406548166e-06,
565
+ "loss": 0.028,
566
+ "step": 420
567
+ },
568
+ {
569
+ "epoch": 6.34,
570
+ "learning_rate": 8.998140026163119e-06,
571
+ "loss": 0.0358,
572
+ "step": 425
573
+ },
574
+ {
575
+ "epoch": 6.42,
576
+ "learning_rate": 8.24745080721788e-06,
577
+ "loss": 0.0154,
578
+ "step": 430
579
+ },
580
+ {
581
+ "epoch": 6.49,
582
+ "learning_rate": 7.5258453436913185e-06,
583
+ "loss": 0.0194,
584
+ "step": 435
585
+ },
586
+ {
587
+ "epoch": 6.57,
588
+ "learning_rate": 6.83398459028922e-06,
589
+ "loss": 0.0292,
590
+ "step": 440
591
+ },
592
+ {
593
+ "epoch": 6.64,
594
+ "learning_rate": 6.172502257042672e-06,
595
+ "loss": 0.0473,
596
+ "step": 445
597
+ },
598
+ {
599
+ "epoch": 6.72,
600
+ "learning_rate": 5.542004228861171e-06,
601
+ "loss": 0.0327,
602
+ "step": 450
603
+ },
604
+ {
605
+ "epoch": 6.79,
606
+ "learning_rate": 4.94306801057221e-06,
607
+ "loss": 0.0106,
608
+ "step": 455
609
+ },
610
+ {
611
+ "epoch": 6.87,
612
+ "learning_rate": 4.376242197955542e-06,
613
+ "loss": 0.0226,
614
+ "step": 460
615
+ },
616
+ {
617
+ "epoch": 6.94,
618
+ "learning_rate": 3.842045975256774e-06,
619
+ "loss": 0.0093,
620
+ "step": 465
621
+ },
622
+ {
623
+ "epoch": 7.0,
624
+ "eval_loss": 0.6298416256904602,
625
+ "eval_matthews_correlation": 0.6651627094568674,
626
+ "eval_runtime": 2.9643,
627
+ "eval_samples_per_second": 351.848,
628
+ "eval_steps_per_second": 22.265,
629
+ "step": 469
630
+ },
631
+ {
632
+ "epoch": 7.01,
633
+ "learning_rate": 3.340968639640463e-06,
634
+ "loss": 0.0178,
635
+ "step": 470
636
+ },
637
+ {
638
+ "epoch": 7.09,
639
+ "learning_rate": 2.8734691530181603e-06,
640
+ "loss": 0.0129,
641
+ "step": 475
642
+ },
643
+ {
644
+ "epoch": 7.16,
645
+ "learning_rate": 2.439975721662231e-06,
646
+ "loss": 0.034,
647
+ "step": 480
648
+ },
649
+ {
650
+ "epoch": 7.24,
651
+ "learning_rate": 2.040885403990136e-06,
652
+ "loss": 0.0067,
653
+ "step": 485
654
+ },
655
+ {
656
+ "epoch": 7.31,
657
+ "learning_rate": 1.6765637468787011e-06,
658
+ "loss": 0.0251,
659
+ "step": 490
660
+ },
661
+ {
662
+ "epoch": 7.39,
663
+ "learning_rate": 1.3473444508414102e-06,
664
+ "loss": 0.0137,
665
+ "step": 495
666
+ },
667
+ {
668
+ "epoch": 7.46,
669
+ "learning_rate": 1.0535290643752448e-06,
670
+ "loss": 0.0125,
671
+ "step": 500
672
+ },
673
+ {
674
+ "epoch": 7.54,
675
+ "learning_rate": 7.953867077573396e-07,
676
+ "loss": 0.0213,
677
+ "step": 505
678
+ },
679
+ {
680
+ "epoch": 7.61,
681
+ "learning_rate": 5.731538265441572e-07,
682
+ "loss": 0.0297,
683
+ "step": 510
684
+ },
685
+ {
686
+ "epoch": 7.69,
687
+ "learning_rate": 3.870339749991292e-07,
688
+ "loss": 0.0356,
689
+ "step": 515
690
+ },
691
+ {
692
+ "epoch": 7.76,
693
+ "learning_rate": 2.3719762964709903e-07,
694
+ "loss": 0.0213,
695
+ "step": 520
696
+ },
697
+ {
698
+ "epoch": 7.84,
699
+ "learning_rate": 1.2378203312626292e-07,
700
+ "loss": 0.0122,
701
+ "step": 525
702
+ },
703
+ {
704
+ "epoch": 7.91,
705
+ "learning_rate": 4.689106848078773e-08,
706
+ "loss": 0.0355,
707
+ "step": 530
708
+ },
709
+ {
710
+ "epoch": 7.99,
711
+ "learning_rate": 6.595164009066679e-09,
712
+ "loss": 0.0244,
713
+ "step": 535
714
+ },
715
+ {
716
+ "epoch": 8.0,
717
+ "eval_loss": 0.6291629076004028,
718
+ "eval_matthews_correlation": 0.6750024002589107,
719
+ "eval_runtime": 2.0387,
720
+ "eval_samples_per_second": 511.593,
721
+ "eval_steps_per_second": 32.373,
722
+ "step": 536
723
+ },
724
+ {
725
+ "epoch": 8.0,
726
+ "step": 536,
727
+ "total_flos": 4499725164216320.0,
728
+ "train_loss": 0.148403340177402,
729
+ "train_runtime": 707.366,
730
+ "train_samples_per_second": 96.708,
731
+ "train_steps_per_second": 0.758
732
+ }
733
+ ],
734
+ "max_steps": 536,
735
+ "num_train_epochs": 8,
736
+ "total_flos": 4499725164216320.0,
737
+ "trial_name": null,
738
+ "trial_params": null
739
+ }