File size: 40,965 Bytes
e0c1c90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 13.0,
  "eval_steps": 50,
  "global_step": 884,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "grad_norm": 37.9459228515625,
      "learning_rate": 1.5280898876404495e-05,
      "loss": 2.6802,
      "step": 68
    },
    {
      "epoch": 1.0,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.8529411764705882,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.7407679557800293,
      "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8861687056931384,
      "eval_custom-arc-semantics-data-jp_cosine_f1": 0.863013698630137,
      "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.7407679557800293,
      "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8873239436619719,
      "eval_custom-arc-semantics-data-jp_cosine_recall": 0.84,
      "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.8529411764705882,
      "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 413.34197998046875,
      "eval_custom-arc-semantics-data-jp_dot_ap": 0.887218522968793,
      "eval_custom-arc-semantics-data-jp_dot_f1": 0.8648648648648648,
      "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 413.34197998046875,
      "eval_custom-arc-semantics-data-jp_dot_precision": 0.8767123287671232,
      "eval_custom-arc-semantics-data-jp_dot_recall": 0.8533333333333334,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.8602941176470589,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 17.441856384277344,
      "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8855166425710176,
      "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.87248322147651,
      "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 17.441856384277344,
      "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8783783783783784,
      "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8666666666666667,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.8602941176470589,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 382.9211730957031,
      "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8854876210985647,
      "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8741721854304636,
      "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 386.8717956542969,
      "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.868421052631579,
      "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.88,
      "eval_custom-arc-semantics-data-jp_max_accuracy": 0.8602941176470589,
      "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 413.34197998046875,
      "eval_custom-arc-semantics-data-jp_max_ap": 0.887218522968793,
      "eval_custom-arc-semantics-data-jp_max_f1": 0.8741721854304636,
      "eval_custom-arc-semantics-data-jp_max_f1_threshold": 413.34197998046875,
      "eval_custom-arc-semantics-data-jp_max_precision": 0.8873239436619719,
      "eval_custom-arc-semantics-data-jp_max_recall": 0.88,
      "eval_loss": 1.7807204723358154,
      "eval_runtime": 14.8133,
      "eval_samples_per_second": 9.181,
      "eval_steps_per_second": 1.148,
      "step": 68
    },
    {
      "epoch": 2.0,
      "grad_norm": 101.59432983398438,
      "learning_rate": 1.8817610062893082e-05,
      "loss": 1.4014,
      "step": 136
    },
    {
      "epoch": 2.0,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.8676470588235294,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.656826376914978,
      "eval_custom-arc-semantics-data-jp_cosine_ap": 0.894049495028942,
      "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8783783783783784,
      "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.656826376914978,
      "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8904109589041096,
      "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8666666666666667,
      "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.8602941176470589,
      "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 367.9239501953125,
      "eval_custom-arc-semantics-data-jp_dot_ap": 0.8915744113817081,
      "eval_custom-arc-semantics-data-jp_dot_f1": 0.87248322147651,
      "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 363.2611999511719,
      "eval_custom-arc-semantics-data-jp_dot_precision": 0.8783783783783784,
      "eval_custom-arc-semantics-data-jp_dot_recall": 0.8666666666666667,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.8602941176470589,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 19.78958511352539,
      "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8925974243856872,
      "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8819875776397516,
      "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 20.95208740234375,
      "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8255813953488372,
      "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.9466666666666667,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.875,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 456.61602783203125,
      "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8944639878933996,
      "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8930817610062893,
      "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 456.61602783203125,
      "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8452380952380952,
      "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.9466666666666667,
      "eval_custom-arc-semantics-data-jp_max_accuracy": 0.875,
      "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 456.61602783203125,
      "eval_custom-arc-semantics-data-jp_max_ap": 0.8944639878933996,
      "eval_custom-arc-semantics-data-jp_max_f1": 0.8930817610062893,
      "eval_custom-arc-semantics-data-jp_max_f1_threshold": 456.61602783203125,
      "eval_custom-arc-semantics-data-jp_max_precision": 0.8904109589041096,
      "eval_custom-arc-semantics-data-jp_max_recall": 0.9466666666666667,
      "eval_loss": 1.7683357000350952,
      "eval_runtime": 13.9834,
      "eval_samples_per_second": 9.726,
      "eval_steps_per_second": 1.216,
      "step": 136
    },
    {
      "epoch": 3.0,
      "grad_norm": 102.5489730834961,
      "learning_rate": 1.7106918238993714e-05,
      "loss": 0.7937,
      "step": 204
    },
    {
      "epoch": 3.0,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.8897058823529411,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.5597226619720459,
      "eval_custom-arc-semantics-data-jp_cosine_ap": 0.9038906301326961,
      "eval_custom-arc-semantics-data-jp_cosine_f1": 0.9006622516556291,
      "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.5076813697814941,
      "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8947368421052632,
      "eval_custom-arc-semantics-data-jp_cosine_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.8823529411764706,
      "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 314.1024475097656,
      "eval_custom-arc-semantics-data-jp_dot_ap": 0.9012158574599884,
      "eval_custom-arc-semantics-data-jp_dot_f1": 0.8904109589041095,
      "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 295.25762939453125,
      "eval_custom-arc-semantics-data-jp_dot_precision": 0.9154929577464789,
      "eval_custom-arc-semantics-data-jp_dot_recall": 0.8666666666666667,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.8970588235294118,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 22.511234283447266,
      "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.9033993255493061,
      "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.9054054054054055,
      "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 22.511234283447266,
      "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.9178082191780822,
      "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8933333333333333,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.8897058823529411,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 480.63677978515625,
      "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.9033851155012284,
      "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8993288590604026,
      "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 502.66156005859375,
      "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.9054054054054054,
      "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8933333333333333,
      "eval_custom-arc-semantics-data-jp_max_accuracy": 0.8970588235294118,
      "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 480.63677978515625,
      "eval_custom-arc-semantics-data-jp_max_ap": 0.9038906301326961,
      "eval_custom-arc-semantics-data-jp_max_f1": 0.9054054054054055,
      "eval_custom-arc-semantics-data-jp_max_f1_threshold": 502.66156005859375,
      "eval_custom-arc-semantics-data-jp_max_precision": 0.9178082191780822,
      "eval_custom-arc-semantics-data-jp_max_recall": 0.9066666666666666,
      "eval_loss": 1.9876813888549805,
      "eval_runtime": 15.0797,
      "eval_samples_per_second": 9.019,
      "eval_steps_per_second": 1.127,
      "step": 204
    },
    {
      "epoch": 4.0,
      "grad_norm": 54.64971160888672,
      "learning_rate": 1.539622641509434e-05,
      "loss": 0.5443,
      "step": 272
    },
    {
      "epoch": 4.0,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.5411365032196045,
      "eval_custom-arc-semantics-data-jp_cosine_ap": 0.9075251070302803,
      "eval_custom-arc-semantics-data-jp_cosine_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.5411365032196045,
      "eval_custom-arc-semantics-data-jp_cosine_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_cosine_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.8897058823529411,
      "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 311.26678466796875,
      "eval_custom-arc-semantics-data-jp_dot_ap": 0.9040853015076152,
      "eval_custom-arc-semantics-data-jp_dot_f1": 0.9006622516556291,
      "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 285.96728515625,
      "eval_custom-arc-semantics-data-jp_dot_precision": 0.8947368421052632,
      "eval_custom-arc-semantics-data-jp_dot_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 22.630477905273438,
      "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.906411643380887,
      "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 22.630477905273438,
      "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 495.93194580078125,
      "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.9068108312799359,
      "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 495.93194580078125,
      "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_max_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 495.93194580078125,
      "eval_custom-arc-semantics-data-jp_max_ap": 0.9075251070302803,
      "eval_custom-arc-semantics-data-jp_max_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_max_f1_threshold": 495.93194580078125,
      "eval_custom-arc-semantics-data-jp_max_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_max_recall": 0.9066666666666666,
      "eval_loss": 1.910571813583374,
      "eval_runtime": 14.3178,
      "eval_samples_per_second": 9.499,
      "eval_steps_per_second": 1.187,
      "step": 272
    },
    {
      "epoch": 5.0,
      "grad_norm": 7.771420955657959,
      "learning_rate": 1.368553459119497e-05,
      "loss": 0.4225,
      "step": 340
    },
    {
      "epoch": 5.0,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.5279971361160278,
      "eval_custom-arc-semantics-data-jp_cosine_ap": 0.9104573720737034,
      "eval_custom-arc-semantics-data-jp_cosine_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.5279971361160278,
      "eval_custom-arc-semantics-data-jp_cosine_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_cosine_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 319.6995849609375,
      "eval_custom-arc-semantics-data-jp_dot_ap": 0.907606464463115,
      "eval_custom-arc-semantics-data-jp_dot_f1": 0.9115646258503401,
      "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 290.19317626953125,
      "eval_custom-arc-semantics-data-jp_dot_precision": 0.9305555555555556,
      "eval_custom-arc-semantics-data-jp_dot_recall": 0.8933333333333333,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 22.626989364624023,
      "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.9092245976210003,
      "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 22.626989364624023,
      "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 494.6650390625,
      "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.9108555121538641,
      "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.92,
      "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 514.7510986328125,
      "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.92,
      "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.92,
      "eval_custom-arc-semantics-data-jp_max_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 494.6650390625,
      "eval_custom-arc-semantics-data-jp_max_ap": 0.9108555121538641,
      "eval_custom-arc-semantics-data-jp_max_f1": 0.92,
      "eval_custom-arc-semantics-data-jp_max_f1_threshold": 514.7510986328125,
      "eval_custom-arc-semantics-data-jp_max_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_max_recall": 0.92,
      "eval_loss": 1.9418467283248901,
      "eval_runtime": 14.4159,
      "eval_samples_per_second": 9.434,
      "eval_steps_per_second": 1.179,
      "step": 340
    },
    {
      "epoch": 6.0,
      "grad_norm": 0.8858943581581116,
      "learning_rate": 1.1974842767295597e-05,
      "loss": 0.3347,
      "step": 408
    },
    {
      "epoch": 6.0,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.5068449974060059,
      "eval_custom-arc-semantics-data-jp_cosine_ap": 0.9088529074244793,
      "eval_custom-arc-semantics-data-jp_cosine_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.5068449974060059,
      "eval_custom-arc-semantics-data-jp_cosine_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_cosine_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.8970588235294118,
      "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 322.01324462890625,
      "eval_custom-arc-semantics-data-jp_dot_ap": 0.9107273300403762,
      "eval_custom-arc-semantics-data-jp_dot_f1": 0.9054054054054055,
      "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 276.34893798828125,
      "eval_custom-arc-semantics-data-jp_dot_precision": 0.9178082191780822,
      "eval_custom-arc-semantics-data-jp_dot_recall": 0.8933333333333333,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 23.167011260986328,
      "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.9088745561179659,
      "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 23.167011260986328,
      "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 506.7710266113281,
      "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.9092068556819233,
      "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 506.7710266113281,
      "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_max_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 506.7710266113281,
      "eval_custom-arc-semantics-data-jp_max_ap": 0.9107273300403762,
      "eval_custom-arc-semantics-data-jp_max_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_max_f1_threshold": 506.7710266113281,
      "eval_custom-arc-semantics-data-jp_max_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_max_recall": 0.9066666666666666,
      "eval_loss": 2.012265920639038,
      "eval_runtime": 13.9077,
      "eval_samples_per_second": 9.779,
      "eval_steps_per_second": 1.222,
      "step": 408
    },
    {
      "epoch": 7.0,
      "grad_norm": 0.24478188157081604,
      "learning_rate": 1.0264150943396227e-05,
      "loss": 0.3425,
      "step": 476
    },
    {
      "epoch": 7.0,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.5215883255004883,
      "eval_custom-arc-semantics-data-jp_cosine_ap": 0.9081430244026829,
      "eval_custom-arc-semantics-data-jp_cosine_f1": 0.912751677852349,
      "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.5215883255004883,
      "eval_custom-arc-semantics-data-jp_cosine_precision": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_cosine_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 313.0162353515625,
      "eval_custom-arc-semantics-data-jp_dot_ap": 0.9094090969210814,
      "eval_custom-arc-semantics-data-jp_dot_f1": 0.9090909090909091,
      "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 313.0162353515625,
      "eval_custom-arc-semantics-data-jp_dot_precision": 0.9558823529411765,
      "eval_custom-arc-semantics-data-jp_dot_recall": 0.8666666666666667,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 23.342838287353516,
      "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.907981722850528,
      "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.912751677852349,
      "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 23.342838287353516,
      "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 511.61737060546875,
      "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.9085688725632555,
      "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.912751677852349,
      "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 511.61737060546875,
      "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_max_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 511.61737060546875,
      "eval_custom-arc-semantics-data-jp_max_ap": 0.9094090969210814,
      "eval_custom-arc-semantics-data-jp_max_f1": 0.912751677852349,
      "eval_custom-arc-semantics-data-jp_max_f1_threshold": 511.61737060546875,
      "eval_custom-arc-semantics-data-jp_max_precision": 0.9558823529411765,
      "eval_custom-arc-semantics-data-jp_max_recall": 0.9066666666666666,
      "eval_loss": 2.038686752319336,
      "eval_runtime": 14.7627,
      "eval_samples_per_second": 9.212,
      "eval_steps_per_second": 1.152,
      "step": 476
    },
    {
      "epoch": 8.0,
      "grad_norm": 0.09618318825960159,
      "learning_rate": 8.553459119496857e-06,
      "loss": 0.2427,
      "step": 544
    },
    {
      "epoch": 8.0,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.5501157641410828,
      "eval_custom-arc-semantics-data-jp_cosine_ap": 0.9086817589029806,
      "eval_custom-arc-semantics-data-jp_cosine_f1": 0.912751677852349,
      "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.5119773149490356,
      "eval_custom-arc-semantics-data-jp_cosine_precision": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_cosine_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 294.31005859375,
      "eval_custom-arc-semantics-data-jp_dot_ap": 0.9096226292878296,
      "eval_custom-arc-semantics-data-jp_dot_f1": 0.9166666666666666,
      "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 294.31005859375,
      "eval_custom-arc-semantics-data-jp_dot_precision": 0.9565217391304348,
      "eval_custom-arc-semantics-data-jp_dot_recall": 0.88,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 23.28342056274414,
      "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.9089227316320502,
      "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 23.28342056274414,
      "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 509.5169677734375,
      "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.9102569832574411,
      "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 509.5169677734375,
      "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_max_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 509.5169677734375,
      "eval_custom-arc-semantics-data-jp_max_ap": 0.9102569832574411,
      "eval_custom-arc-semantics-data-jp_max_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_max_f1_threshold": 509.5169677734375,
      "eval_custom-arc-semantics-data-jp_max_precision": 0.9565217391304348,
      "eval_custom-arc-semantics-data-jp_max_recall": 0.9066666666666666,
      "eval_loss": 1.9878324270248413,
      "eval_runtime": 13.7764,
      "eval_samples_per_second": 9.872,
      "eval_steps_per_second": 1.234,
      "step": 544
    },
    {
      "epoch": 9.0,
      "grad_norm": 0.046123113483190536,
      "learning_rate": 6.842767295597485e-06,
      "loss": 0.2412,
      "step": 612
    },
    {
      "epoch": 9.0,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.5531296730041504,
      "eval_custom-arc-semantics-data-jp_cosine_ap": 0.9088232956862466,
      "eval_custom-arc-semantics-data-jp_cosine_f1": 0.912751677852349,
      "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.49595877528190613,
      "eval_custom-arc-semantics-data-jp_cosine_precision": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_cosine_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 295.7380676269531,
      "eval_custom-arc-semantics-data-jp_dot_ap": 0.9178358242971421,
      "eval_custom-arc-semantics-data-jp_dot_f1": 0.9166666666666666,
      "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 295.7380676269531,
      "eval_custom-arc-semantics-data-jp_dot_precision": 0.9565217391304348,
      "eval_custom-arc-semantics-data-jp_dot_recall": 0.88,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 23.565818786621094,
      "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.9092338106583002,
      "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 23.565818786621094,
      "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 515.80322265625,
      "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.9102278723159124,
      "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 515.80322265625,
      "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_max_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 515.80322265625,
      "eval_custom-arc-semantics-data-jp_max_ap": 0.9178358242971421,
      "eval_custom-arc-semantics-data-jp_max_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_max_f1_threshold": 515.80322265625,
      "eval_custom-arc-semantics-data-jp_max_precision": 0.9565217391304348,
      "eval_custom-arc-semantics-data-jp_max_recall": 0.9066666666666666,
      "eval_loss": 2.0423502922058105,
      "eval_runtime": 18.8817,
      "eval_samples_per_second": 7.203,
      "eval_steps_per_second": 0.9,
      "step": 612
    },
    {
      "epoch": 10.0,
      "grad_norm": 0.029908249154686928,
      "learning_rate": 5.1320754716981136e-06,
      "loss": 0.1623,
      "step": 680
    },
    {
      "epoch": 10.0,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.5516438484191895,
      "eval_custom-arc-semantics-data-jp_cosine_ap": 0.9085774598037802,
      "eval_custom-arc-semantics-data-jp_cosine_f1": 0.9103448275862068,
      "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.519783079624176,
      "eval_custom-arc-semantics-data-jp_cosine_precision": 0.9428571428571428,
      "eval_custom-arc-semantics-data-jp_cosine_recall": 0.88,
      "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 294.6772766113281,
      "eval_custom-arc-semantics-data-jp_dot_ap": 0.918847565389113,
      "eval_custom-arc-semantics-data-jp_dot_f1": 0.9166666666666666,
      "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 294.6772766113281,
      "eval_custom-arc-semantics-data-jp_dot_precision": 0.9565217391304348,
      "eval_custom-arc-semantics-data-jp_dot_recall": 0.88,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 21.938934326171875,
      "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.9089511554768472,
      "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.912751677852349,
      "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 23.713035583496094,
      "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 482.1662292480469,
      "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.910254731797207,
      "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.912751677852349,
      "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 524.188232421875,
      "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_max_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 482.1662292480469,
      "eval_custom-arc-semantics-data-jp_max_ap": 0.918847565389113,
      "eval_custom-arc-semantics-data-jp_max_f1": 0.9166666666666666,
      "eval_custom-arc-semantics-data-jp_max_f1_threshold": 524.188232421875,
      "eval_custom-arc-semantics-data-jp_max_precision": 0.9565217391304348,
      "eval_custom-arc-semantics-data-jp_max_recall": 0.9066666666666666,
      "eval_loss": 2.0273122787475586,
      "eval_runtime": 20.5658,
      "eval_samples_per_second": 6.613,
      "eval_steps_per_second": 0.827,
      "step": 680
    },
    {
      "epoch": 11.0,
      "grad_norm": 0.007907501421868801,
      "learning_rate": 3.4213836477987424e-06,
      "loss": 0.1909,
      "step": 748
    },
    {
      "epoch": 11.0,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.550028383731842,
      "eval_custom-arc-semantics-data-jp_cosine_ap": 0.9090819039672458,
      "eval_custom-arc-semantics-data-jp_cosine_f1": 0.912751677852349,
      "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.48157936334609985,
      "eval_custom-arc-semantics-data-jp_cosine_precision": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_cosine_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 293.80010986328125,
      "eval_custom-arc-semantics-data-jp_dot_ap": 0.9219827879602106,
      "eval_custom-arc-semantics-data-jp_dot_f1": 0.9166666666666666,
      "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 293.80010986328125,
      "eval_custom-arc-semantics-data-jp_dot_precision": 0.9565217391304348,
      "eval_custom-arc-semantics-data-jp_dot_recall": 0.88,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 23.849559783935547,
      "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.90922024992935,
      "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 23.849559783935547,
      "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 482.9535827636719,
      "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.910689468531078,
      "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.912751677852349,
      "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 527.540771484375,
      "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_max_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 482.9535827636719,
      "eval_custom-arc-semantics-data-jp_max_ap": 0.9219827879602106,
      "eval_custom-arc-semantics-data-jp_max_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_max_f1_threshold": 527.540771484375,
      "eval_custom-arc-semantics-data-jp_max_precision": 0.9565217391304348,
      "eval_custom-arc-semantics-data-jp_max_recall": 0.9066666666666666,
      "eval_loss": 2.0955376625061035,
      "eval_runtime": 20.4117,
      "eval_samples_per_second": 6.663,
      "eval_steps_per_second": 0.833,
      "step": 748
    },
    {
      "epoch": 12.0,
      "grad_norm": 0.3338662087917328,
      "learning_rate": 1.7106918238993712e-06,
      "loss": 0.1507,
      "step": 816
    },
    {
      "epoch": 12.0,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.5501536726951599,
      "eval_custom-arc-semantics-data-jp_cosine_ap": 0.9084179566135925,
      "eval_custom-arc-semantics-data-jp_cosine_f1": 0.912751677852349,
      "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.4790937304496765,
      "eval_custom-arc-semantics-data-jp_cosine_precision": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_cosine_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 294.13421630859375,
      "eval_custom-arc-semantics-data-jp_dot_ap": 0.915716305189008,
      "eval_custom-arc-semantics-data-jp_dot_f1": 0.9166666666666666,
      "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 294.13421630859375,
      "eval_custom-arc-semantics-data-jp_dot_precision": 0.9565217391304348,
      "eval_custom-arc-semantics-data-jp_dot_recall": 0.88,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 23.818954467773438,
      "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.9093211275077335,
      "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 23.818954467773438,
      "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 482.6566162109375,
      "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.9104676924615509,
      "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.913907284768212,
      "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 532.9744262695312,
      "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.9078947368421053,
      "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.92,
      "eval_custom-arc-semantics-data-jp_max_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 482.6566162109375,
      "eval_custom-arc-semantics-data-jp_max_ap": 0.915716305189008,
      "eval_custom-arc-semantics-data-jp_max_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_max_f1_threshold": 532.9744262695312,
      "eval_custom-arc-semantics-data-jp_max_precision": 0.9565217391304348,
      "eval_custom-arc-semantics-data-jp_max_recall": 0.92,
      "eval_loss": 2.2124059200286865,
      "eval_runtime": 20.6137,
      "eval_samples_per_second": 6.598,
      "eval_steps_per_second": 0.825,
      "step": 816
    },
    {
      "epoch": 13.0,
      "grad_norm": 0.01023839507251978,
      "learning_rate": 0.0,
      "loss": 0.1406,
      "step": 884
    },
    {
      "epoch": 13.0,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.9044117647058824,
      "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.5485918521881104,
      "eval_custom-arc-semantics-data-jp_cosine_ap": 0.9088999169341241,
      "eval_custom-arc-semantics-data-jp_cosine_f1": 0.912751677852349,
      "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.47659817337989807,
      "eval_custom-arc-semantics-data-jp_cosine_precision": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_cosine_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 293.22845458984375,
      "eval_custom-arc-semantics-data-jp_dot_ap": 0.9171086358892895,
      "eval_custom-arc-semantics-data-jp_dot_f1": 0.9166666666666666,
      "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 293.22845458984375,
      "eval_custom-arc-semantics-data-jp_dot_precision": 0.9565217391304348,
      "eval_custom-arc-semantics-data-jp_dot_recall": 0.88,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 23.82940673828125,
      "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.9094221163568814,
      "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 23.82940673828125,
      "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 524.0676879882812,
      "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.9111567321590129,
      "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 524.0676879882812,
      "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.9315068493150684,
      "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.9066666666666666,
      "eval_custom-arc-semantics-data-jp_max_accuracy": 0.9117647058823529,
      "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 524.0676879882812,
      "eval_custom-arc-semantics-data-jp_max_ap": 0.9171086358892895,
      "eval_custom-arc-semantics-data-jp_max_f1": 0.918918918918919,
      "eval_custom-arc-semantics-data-jp_max_f1_threshold": 524.0676879882812,
      "eval_custom-arc-semantics-data-jp_max_precision": 0.9565217391304348,
      "eval_custom-arc-semantics-data-jp_max_recall": 0.9066666666666666,
      "eval_loss": 2.2125871181488037,
      "eval_runtime": 20.4613,
      "eval_samples_per_second": 6.647,
      "eval_steps_per_second": 0.831,
      "step": 884
    }
  ],
  "logging_steps": 500,
  "max_steps": 884,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 13,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}