diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,36214 @@ +{ + "best_metric": 0.5343035343035343, + "best_model_checkpoint": "./final/question/text2sql-t5-base-schema-generator/checkpoint-24000", + "epoch": 102.56410256410257, + "global_step": 24000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 3.337783711615487e-08, + "loss": 2.3603, + "step": 1 + }, + { + "epoch": 0.03, + "learning_rate": 1.335113484646195e-07, + "loss": 2.6238, + "step": 4 + }, + { + "epoch": 0.07, + "learning_rate": 2.67022696929239e-07, + "loss": 2.5591, + "step": 8 + }, + { + "epoch": 0.1, + "learning_rate": 4.005340453938585e-07, + "loss": 2.563, + "step": 12 + }, + { + "epoch": 0.14, + "learning_rate": 5.34045393858478e-07, + "loss": 2.7499, + "step": 16 + }, + { + "epoch": 0.17, + "learning_rate": 6.675567423230975e-07, + "loss": 2.4741, + "step": 20 + }, + { + "epoch": 0.21, + "learning_rate": 8.01068090787717e-07, + "loss": 2.3916, + "step": 24 + }, + { + "epoch": 0.24, + "learning_rate": 9.345794392523364e-07, + "loss": 2.568, + "step": 28 + }, + { + "epoch": 0.27, + "learning_rate": 1.068090787716956e-06, + "loss": 2.5685, + "step": 32 + }, + { + "epoch": 0.31, + "learning_rate": 1.2016021361815755e-06, + "loss": 2.4451, + "step": 36 + }, + { + "epoch": 0.34, + "learning_rate": 1.335113484646195e-06, + "loss": 2.6357, + "step": 40 + }, + { + "epoch": 0.38, + "learning_rate": 1.4686248331108143e-06, + "loss": 2.4322, + "step": 44 + }, + { + "epoch": 0.41, + "learning_rate": 1.602136181575434e-06, + "loss": 2.4776, + "step": 48 + }, + { + "epoch": 0.44, + "learning_rate": 1.7356475300400534e-06, + "loss": 2.1708, + "step": 52 + }, + { + "epoch": 0.48, + "learning_rate": 1.8691588785046728e-06, + "loss": 2.3881, + "step": 56 + }, + { + "epoch": 0.51, + "learning_rate": 2.0026702269692925e-06, + "loss": 2.181, + "step": 60 + }, + { + "epoch": 0.55, + "learning_rate": 2.136181575433912e-06, + "loss": 2.2601, + "step": 64 + }, + { + "epoch": 0.58, + "learning_rate": 2.2696929238985316e-06, + "loss": 2.234, + "step": 68 + }, + { + "epoch": 0.62, + "learning_rate": 2.403204272363151e-06, + "loss": 2.3423, + "step": 72 + }, + { + "epoch": 0.65, + "learning_rate": 2.5367156208277703e-06, + "loss": 2.2052, + "step": 76 + }, + { + "epoch": 0.68, + "learning_rate": 2.67022696929239e-06, + "loss": 2.3827, + "step": 80 + }, + { + "epoch": 0.72, + "learning_rate": 2.8037383177570094e-06, + "loss": 1.9671, + "step": 84 + }, + { + "epoch": 0.75, + "learning_rate": 2.9372496662216287e-06, + "loss": 1.9526, + "step": 88 + }, + { + "epoch": 0.79, + "learning_rate": 3.0707610146862484e-06, + "loss": 1.9553, + "step": 92 + }, + { + "epoch": 0.82, + "learning_rate": 3.204272363150868e-06, + "loss": 2.0414, + "step": 96 + }, + { + "epoch": 0.85, + "learning_rate": 3.337783711615487e-06, + "loss": 2.3561, + "step": 100 + }, + { + "epoch": 0.89, + "learning_rate": 3.471295060080107e-06, + "loss": 1.9045, + "step": 104 + }, + { + "epoch": 0.92, + "learning_rate": 3.6048064085447266e-06, + "loss": 1.841, + "step": 108 + }, + { + "epoch": 0.96, + "learning_rate": 3.7383177570093455e-06, + "loss": 1.7683, + "step": 112 + }, + { + "epoch": 0.99, + "learning_rate": 3.871829105473966e-06, + "loss": 1.8482, + "step": 116 + }, + { + "epoch": 1.03, + "learning_rate": 4.005340453938585e-06, + "loss": 1.7921, + "step": 120 + }, + { + "epoch": 1.06, + "learning_rate": 4.138851802403204e-06, + "loss": 1.7806, + "step": 124 + }, + { + "epoch": 1.09, + "learning_rate": 4.272363150867824e-06, + "loss": 1.6736, + "step": 128 + }, + { + "epoch": 1.13, + "learning_rate": 4.405874499332444e-06, + "loss": 1.8067, + "step": 132 + }, + { + "epoch": 1.16, + "learning_rate": 4.539385847797063e-06, + "loss": 1.7831, + "step": 136 + }, + { + "epoch": 1.2, + "learning_rate": 4.6728971962616825e-06, + "loss": 1.7054, + "step": 140 + }, + { + "epoch": 1.23, + "learning_rate": 4.806408544726302e-06, + "loss": 1.63, + "step": 144 + }, + { + "epoch": 1.26, + "learning_rate": 4.939919893190922e-06, + "loss": 1.5281, + "step": 148 + }, + { + "epoch": 1.3, + "learning_rate": 5.0734312416555405e-06, + "loss": 1.5909, + "step": 152 + }, + { + "epoch": 1.33, + "learning_rate": 5.206942590120161e-06, + "loss": 1.6147, + "step": 156 + }, + { + "epoch": 1.37, + "learning_rate": 5.34045393858478e-06, + "loss": 1.4966, + "step": 160 + }, + { + "epoch": 1.4, + "learning_rate": 5.473965287049399e-06, + "loss": 1.3448, + "step": 164 + }, + { + "epoch": 1.44, + "learning_rate": 5.607476635514019e-06, + "loss": 1.449, + "step": 168 + }, + { + "epoch": 1.47, + "learning_rate": 5.740987983978639e-06, + "loss": 1.388, + "step": 172 + }, + { + "epoch": 1.5, + "learning_rate": 5.874499332443257e-06, + "loss": 1.454, + "step": 176 + }, + { + "epoch": 1.54, + "learning_rate": 6.0080106809078775e-06, + "loss": 1.3239, + "step": 180 + }, + { + "epoch": 1.57, + "learning_rate": 6.141522029372497e-06, + "loss": 1.3004, + "step": 184 + }, + { + "epoch": 1.61, + "learning_rate": 6.275033377837117e-06, + "loss": 1.3924, + "step": 188 + }, + { + "epoch": 1.64, + "learning_rate": 6.408544726301736e-06, + "loss": 1.3636, + "step": 192 + }, + { + "epoch": 1.68, + "learning_rate": 6.542056074766355e-06, + "loss": 1.2384, + "step": 196 + }, + { + "epoch": 1.71, + "learning_rate": 6.675567423230974e-06, + "loss": 1.2257, + "step": 200 + }, + { + "epoch": 1.74, + "learning_rate": 6.809078771695594e-06, + "loss": 1.2446, + "step": 204 + }, + { + "epoch": 1.78, + "learning_rate": 6.942590120160214e-06, + "loss": 1.4018, + "step": 208 + }, + { + "epoch": 1.81, + "learning_rate": 7.076101468624834e-06, + "loss": 1.2668, + "step": 212 + }, + { + "epoch": 1.85, + "learning_rate": 7.209612817089453e-06, + "loss": 1.2525, + "step": 216 + }, + { + "epoch": 1.88, + "learning_rate": 7.343124165554073e-06, + "loss": 1.1977, + "step": 220 + }, + { + "epoch": 1.91, + "learning_rate": 7.476635514018691e-06, + "loss": 1.1998, + "step": 224 + }, + { + "epoch": 1.95, + "learning_rate": 7.610146862483311e-06, + "loss": 1.1087, + "step": 228 + }, + { + "epoch": 1.98, + "learning_rate": 7.743658210947931e-06, + "loss": 1.0731, + "step": 232 + }, + { + "epoch": 2.02, + "learning_rate": 7.87716955941255e-06, + "loss": 1.0327, + "step": 236 + }, + { + "epoch": 2.05, + "learning_rate": 8.01068090787717e-06, + "loss": 1.0915, + "step": 240 + }, + { + "epoch": 2.09, + "learning_rate": 8.14419225634179e-06, + "loss": 1.0304, + "step": 244 + }, + { + "epoch": 2.12, + "learning_rate": 8.277703604806409e-06, + "loss": 1.0082, + "step": 248 + }, + { + "epoch": 2.15, + "learning_rate": 8.411214953271028e-06, + "loss": 1.0408, + "step": 252 + }, + { + "epoch": 2.19, + "learning_rate": 8.544726301735647e-06, + "loss": 0.983, + "step": 256 + }, + { + "epoch": 2.22, + "learning_rate": 8.678237650200267e-06, + "loss": 0.9362, + "step": 260 + }, + { + "epoch": 2.26, + "learning_rate": 8.811748998664888e-06, + "loss": 0.9181, + "step": 264 + }, + { + "epoch": 2.29, + "learning_rate": 8.945260347129507e-06, + "loss": 0.8616, + "step": 268 + }, + { + "epoch": 2.32, + "learning_rate": 9.078771695594126e-06, + "loss": 0.8992, + "step": 272 + }, + { + "epoch": 2.36, + "learning_rate": 9.212283044058744e-06, + "loss": 0.8689, + "step": 276 + }, + { + "epoch": 2.39, + "learning_rate": 9.345794392523365e-06, + "loss": 0.8462, + "step": 280 + }, + { + "epoch": 2.43, + "learning_rate": 9.479305740987984e-06, + "loss": 0.9342, + "step": 284 + }, + { + "epoch": 2.46, + "learning_rate": 9.612817089452604e-06, + "loss": 0.8598, + "step": 288 + }, + { + "epoch": 2.5, + "learning_rate": 9.746328437917223e-06, + "loss": 0.8383, + "step": 292 + }, + { + "epoch": 2.53, + "learning_rate": 9.879839786381844e-06, + "loss": 0.7935, + "step": 296 + }, + { + "epoch": 2.56, + "learning_rate": 1.0013351134846462e-05, + "loss": 0.7795, + "step": 300 + }, + { + "epoch": 2.6, + "learning_rate": 1.0146862483311081e-05, + "loss": 0.8016, + "step": 304 + }, + { + "epoch": 2.63, + "learning_rate": 1.02803738317757e-05, + "loss": 0.743, + "step": 308 + }, + { + "epoch": 2.67, + "learning_rate": 1.0413885180240321e-05, + "loss": 0.7807, + "step": 312 + }, + { + "epoch": 2.7, + "learning_rate": 1.054739652870494e-05, + "loss": 0.7376, + "step": 316 + }, + { + "epoch": 2.74, + "learning_rate": 1.068090787716956e-05, + "loss": 0.7415, + "step": 320 + }, + { + "epoch": 2.77, + "learning_rate": 1.081441922563418e-05, + "loss": 0.7283, + "step": 324 + }, + { + "epoch": 2.8, + "learning_rate": 1.0947930574098799e-05, + "loss": 0.6967, + "step": 328 + }, + { + "epoch": 2.84, + "learning_rate": 1.1081441922563418e-05, + "loss": 0.6988, + "step": 332 + }, + { + "epoch": 2.87, + "learning_rate": 1.1214953271028037e-05, + "loss": 0.7329, + "step": 336 + }, + { + "epoch": 2.91, + "learning_rate": 1.1348464619492657e-05, + "loss": 0.6674, + "step": 340 + }, + { + "epoch": 2.94, + "learning_rate": 1.1481975967957278e-05, + "loss": 0.6287, + "step": 344 + }, + { + "epoch": 2.97, + "learning_rate": 1.1615487316421897e-05, + "loss": 0.7225, + "step": 348 + }, + { + "epoch": 3.01, + "learning_rate": 1.1748998664886515e-05, + "loss": 0.6345, + "step": 352 + }, + { + "epoch": 3.04, + "learning_rate": 1.1882510013351136e-05, + "loss": 0.6217, + "step": 356 + }, + { + "epoch": 3.08, + "learning_rate": 1.2016021361815755e-05, + "loss": 0.6306, + "step": 360 + }, + { + "epoch": 3.11, + "learning_rate": 1.2149532710280374e-05, + "loss": 0.5966, + "step": 364 + }, + { + "epoch": 3.15, + "learning_rate": 1.2283044058744994e-05, + "loss": 0.6166, + "step": 368 + }, + { + "epoch": 3.18, + "learning_rate": 1.2416555407209613e-05, + "loss": 0.588, + "step": 372 + }, + { + "epoch": 3.21, + "learning_rate": 1.2550066755674234e-05, + "loss": 0.6105, + "step": 376 + }, + { + "epoch": 3.25, + "learning_rate": 1.2683578104138852e-05, + "loss": 0.5674, + "step": 380 + }, + { + "epoch": 3.28, + "learning_rate": 1.2817089452603473e-05, + "loss": 0.5682, + "step": 384 + }, + { + "epoch": 3.32, + "learning_rate": 1.295060080106809e-05, + "loss": 0.5839, + "step": 388 + }, + { + "epoch": 3.35, + "learning_rate": 1.308411214953271e-05, + "loss": 0.5281, + "step": 392 + }, + { + "epoch": 3.38, + "learning_rate": 1.321762349799733e-05, + "loss": 0.5438, + "step": 396 + }, + { + "epoch": 3.42, + "learning_rate": 1.3351134846461948e-05, + "loss": 0.544, + "step": 400 + }, + { + "epoch": 3.45, + "learning_rate": 1.348464619492657e-05, + "loss": 0.5287, + "step": 404 + }, + { + "epoch": 3.49, + "learning_rate": 1.3618157543391189e-05, + "loss": 0.5177, + "step": 408 + }, + { + "epoch": 3.52, + "learning_rate": 1.375166889185581e-05, + "loss": 0.5275, + "step": 412 + }, + { + "epoch": 3.56, + "learning_rate": 1.3885180240320427e-05, + "loss": 0.5346, + "step": 416 + }, + { + "epoch": 3.59, + "learning_rate": 1.4018691588785047e-05, + "loss": 0.5277, + "step": 420 + }, + { + "epoch": 3.62, + "learning_rate": 1.4152202937249668e-05, + "loss": 0.5483, + "step": 424 + }, + { + "epoch": 3.66, + "learning_rate": 1.4285714285714285e-05, + "loss": 0.5117, + "step": 428 + }, + { + "epoch": 3.69, + "learning_rate": 1.4419225634178906e-05, + "loss": 0.4839, + "step": 432 + }, + { + "epoch": 3.73, + "learning_rate": 1.4552736982643526e-05, + "loss": 0.4601, + "step": 436 + }, + { + "epoch": 3.76, + "learning_rate": 1.4686248331108147e-05, + "loss": 0.4971, + "step": 440 + }, + { + "epoch": 3.79, + "learning_rate": 1.4819759679572764e-05, + "loss": 0.4743, + "step": 444 + }, + { + "epoch": 3.83, + "learning_rate": 1.4953271028037382e-05, + "loss": 0.4872, + "step": 448 + }, + { + "epoch": 3.86, + "learning_rate": 1.5086782376502003e-05, + "loss": 0.4172, + "step": 452 + }, + { + "epoch": 3.9, + "learning_rate": 1.5220293724966622e-05, + "loss": 0.4379, + "step": 456 + }, + { + "epoch": 3.93, + "learning_rate": 1.5353805073431243e-05, + "loss": 0.4265, + "step": 460 + }, + { + "epoch": 3.97, + "learning_rate": 1.5487316421895863e-05, + "loss": 0.4154, + "step": 464 + }, + { + "epoch": 4.0, + "learning_rate": 1.5620827770360482e-05, + "loss": 0.4336, + "step": 468 + }, + { + "epoch": 4.03, + "learning_rate": 1.57543391188251e-05, + "loss": 0.4155, + "step": 472 + }, + { + "epoch": 4.07, + "learning_rate": 1.588785046728972e-05, + "loss": 0.469, + "step": 476 + }, + { + "epoch": 4.1, + "learning_rate": 1.602136181575434e-05, + "loss": 0.443, + "step": 480 + }, + { + "epoch": 4.14, + "learning_rate": 1.615487316421896e-05, + "loss": 0.4061, + "step": 484 + }, + { + "epoch": 4.17, + "learning_rate": 1.628838451268358e-05, + "loss": 0.417, + "step": 488 + }, + { + "epoch": 4.21, + "learning_rate": 1.6421895861148198e-05, + "loss": 0.3833, + "step": 492 + }, + { + "epoch": 4.24, + "learning_rate": 1.6555407209612817e-05, + "loss": 0.4274, + "step": 496 + }, + { + "epoch": 4.27, + "learning_rate": 1.6688918558077437e-05, + "loss": 0.3719, + "step": 500 + }, + { + "epoch": 4.31, + "learning_rate": 1.6822429906542056e-05, + "loss": 0.4054, + "step": 504 + }, + { + "epoch": 4.34, + "learning_rate": 1.6955941255006675e-05, + "loss": 0.3914, + "step": 508 + }, + { + "epoch": 4.38, + "learning_rate": 1.7089452603471295e-05, + "loss": 0.4241, + "step": 512 + }, + { + "epoch": 4.41, + "learning_rate": 1.7222963951935918e-05, + "loss": 0.3763, + "step": 516 + }, + { + "epoch": 4.44, + "learning_rate": 1.7356475300400533e-05, + "loss": 0.3755, + "step": 520 + }, + { + "epoch": 4.48, + "learning_rate": 1.7489986648865153e-05, + "loss": 0.3663, + "step": 524 + }, + { + "epoch": 4.51, + "learning_rate": 1.7623497997329775e-05, + "loss": 0.3577, + "step": 528 + }, + { + "epoch": 4.55, + "learning_rate": 1.775700934579439e-05, + "loss": 0.3447, + "step": 532 + }, + { + "epoch": 4.58, + "learning_rate": 1.7890520694259014e-05, + "loss": 0.3899, + "step": 536 + }, + { + "epoch": 4.62, + "learning_rate": 1.8024032042723633e-05, + "loss": 0.3441, + "step": 540 + }, + { + "epoch": 4.65, + "learning_rate": 1.8157543391188253e-05, + "loss": 0.3935, + "step": 544 + }, + { + "epoch": 4.68, + "learning_rate": 1.8291054739652872e-05, + "loss": 0.3579, + "step": 548 + }, + { + "epoch": 4.72, + "learning_rate": 1.8424566088117488e-05, + "loss": 0.3555, + "step": 552 + }, + { + "epoch": 4.75, + "learning_rate": 1.855807743658211e-05, + "loss": 0.3561, + "step": 556 + }, + { + "epoch": 4.79, + "learning_rate": 1.869158878504673e-05, + "loss": 0.3363, + "step": 560 + }, + { + "epoch": 4.82, + "learning_rate": 1.882510013351135e-05, + "loss": 0.358, + "step": 564 + }, + { + "epoch": 4.85, + "learning_rate": 1.895861148197597e-05, + "loss": 0.3316, + "step": 568 + }, + { + "epoch": 4.89, + "learning_rate": 1.9092122830440588e-05, + "loss": 0.36, + "step": 572 + }, + { + "epoch": 4.92, + "learning_rate": 1.9225634178905207e-05, + "loss": 0.3553, + "step": 576 + }, + { + "epoch": 4.96, + "learning_rate": 1.9359145527369827e-05, + "loss": 0.3348, + "step": 580 + }, + { + "epoch": 4.99, + "learning_rate": 1.9492656875834446e-05, + "loss": 0.3186, + "step": 584 + }, + { + "epoch": 5.03, + "learning_rate": 1.9626168224299065e-05, + "loss": 0.2991, + "step": 588 + }, + { + "epoch": 5.06, + "learning_rate": 1.9759679572763688e-05, + "loss": 0.3197, + "step": 592 + }, + { + "epoch": 5.09, + "learning_rate": 1.9893190921228304e-05, + "loss": 0.3489, + "step": 596 + }, + { + "epoch": 5.13, + "learning_rate": 2.0026702269692923e-05, + "loss": 0.3344, + "step": 600 + }, + { + "epoch": 5.16, + "learning_rate": 2.0160213618157546e-05, + "loss": 0.3118, + "step": 604 + }, + { + "epoch": 5.2, + "learning_rate": 2.0293724966622162e-05, + "loss": 0.2895, + "step": 608 + }, + { + "epoch": 5.23, + "learning_rate": 2.0427236315086785e-05, + "loss": 0.3054, + "step": 612 + }, + { + "epoch": 5.26, + "learning_rate": 2.05607476635514e-05, + "loss": 0.2828, + "step": 616 + }, + { + "epoch": 5.3, + "learning_rate": 2.0694259012016024e-05, + "loss": 0.305, + "step": 620 + }, + { + "epoch": 5.33, + "learning_rate": 2.0827770360480643e-05, + "loss": 0.3166, + "step": 624 + }, + { + "epoch": 5.37, + "learning_rate": 2.096128170894526e-05, + "loss": 0.3252, + "step": 628 + }, + { + "epoch": 5.4, + "learning_rate": 2.109479305740988e-05, + "loss": 0.3304, + "step": 632 + }, + { + "epoch": 5.44, + "learning_rate": 2.12283044058745e-05, + "loss": 0.2743, + "step": 636 + }, + { + "epoch": 5.47, + "learning_rate": 2.136181575433912e-05, + "loss": 0.2986, + "step": 640 + }, + { + "epoch": 5.5, + "learning_rate": 2.149532710280374e-05, + "loss": 0.293, + "step": 644 + }, + { + "epoch": 5.54, + "learning_rate": 2.162883845126836e-05, + "loss": 0.2777, + "step": 648 + }, + { + "epoch": 5.57, + "learning_rate": 2.1762349799732978e-05, + "loss": 0.2572, + "step": 652 + }, + { + "epoch": 5.61, + "learning_rate": 2.1895861148197598e-05, + "loss": 0.2667, + "step": 656 + }, + { + "epoch": 5.64, + "learning_rate": 2.2029372496662217e-05, + "loss": 0.2817, + "step": 660 + }, + { + "epoch": 5.68, + "learning_rate": 2.2162883845126836e-05, + "loss": 0.2712, + "step": 664 + }, + { + "epoch": 5.71, + "learning_rate": 2.229639519359146e-05, + "loss": 0.2583, + "step": 668 + }, + { + "epoch": 5.74, + "learning_rate": 2.2429906542056075e-05, + "loss": 0.2684, + "step": 672 + }, + { + "epoch": 5.78, + "learning_rate": 2.2563417890520698e-05, + "loss": 0.2811, + "step": 676 + }, + { + "epoch": 5.81, + "learning_rate": 2.2696929238985313e-05, + "loss": 0.2657, + "step": 680 + }, + { + "epoch": 5.85, + "learning_rate": 2.2830440587449933e-05, + "loss": 0.2856, + "step": 684 + }, + { + "epoch": 5.88, + "learning_rate": 2.2963951935914556e-05, + "loss": 0.2647, + "step": 688 + }, + { + "epoch": 5.91, + "learning_rate": 2.309746328437917e-05, + "loss": 0.2373, + "step": 692 + }, + { + "epoch": 5.95, + "learning_rate": 2.3230974632843794e-05, + "loss": 0.2789, + "step": 696 + }, + { + "epoch": 5.98, + "learning_rate": 2.3364485981308414e-05, + "loss": 0.2637, + "step": 700 + }, + { + "epoch": 6.02, + "learning_rate": 2.349799732977303e-05, + "loss": 0.2691, + "step": 704 + }, + { + "epoch": 6.05, + "learning_rate": 2.3631508678237652e-05, + "loss": 0.2516, + "step": 708 + }, + { + "epoch": 6.09, + "learning_rate": 2.376502002670227e-05, + "loss": 0.2382, + "step": 712 + }, + { + "epoch": 6.12, + "learning_rate": 2.389853137516689e-05, + "loss": 0.2398, + "step": 716 + }, + { + "epoch": 6.15, + "learning_rate": 2.403204272363151e-05, + "loss": 0.2353, + "step": 720 + }, + { + "epoch": 6.19, + "learning_rate": 2.416555407209613e-05, + "loss": 0.2257, + "step": 724 + }, + { + "epoch": 6.22, + "learning_rate": 2.429906542056075e-05, + "loss": 0.2939, + "step": 728 + }, + { + "epoch": 6.26, + "learning_rate": 2.4432576769025368e-05, + "loss": 0.2483, + "step": 732 + }, + { + "epoch": 6.29, + "learning_rate": 2.4566088117489988e-05, + "loss": 0.2338, + "step": 736 + }, + { + "epoch": 6.32, + "learning_rate": 2.4699599465954607e-05, + "loss": 0.2811, + "step": 740 + }, + { + "epoch": 6.36, + "learning_rate": 2.4833110814419226e-05, + "loss": 0.2147, + "step": 744 + }, + { + "epoch": 6.39, + "learning_rate": 2.4966622162883846e-05, + "loss": 0.1995, + "step": 748 + }, + { + "epoch": 6.43, + "learning_rate": 2.5100133511348468e-05, + "loss": 0.231, + "step": 752 + }, + { + "epoch": 6.46, + "learning_rate": 2.5233644859813084e-05, + "loss": 0.2072, + "step": 756 + }, + { + "epoch": 6.5, + "learning_rate": 2.5367156208277704e-05, + "loss": 0.2304, + "step": 760 + }, + { + "epoch": 6.53, + "learning_rate": 2.5500667556742326e-05, + "loss": 0.2172, + "step": 764 + }, + { + "epoch": 6.56, + "learning_rate": 2.5634178905206946e-05, + "loss": 0.2318, + "step": 768 + }, + { + "epoch": 6.6, + "learning_rate": 2.576769025367156e-05, + "loss": 0.2285, + "step": 772 + }, + { + "epoch": 6.63, + "learning_rate": 2.590120160213618e-05, + "loss": 0.217, + "step": 776 + }, + { + "epoch": 6.67, + "learning_rate": 2.6034712950600804e-05, + "loss": 0.1973, + "step": 780 + }, + { + "epoch": 6.7, + "learning_rate": 2.616822429906542e-05, + "loss": 0.2131, + "step": 784 + }, + { + "epoch": 6.74, + "learning_rate": 2.630173564753004e-05, + "loss": 0.2343, + "step": 788 + }, + { + "epoch": 6.77, + "learning_rate": 2.643524699599466e-05, + "loss": 0.2307, + "step": 792 + }, + { + "epoch": 6.8, + "learning_rate": 2.656875834445928e-05, + "loss": 0.224, + "step": 796 + }, + { + "epoch": 6.84, + "learning_rate": 2.6702269692923897e-05, + "loss": 0.2008, + "step": 800 + }, + { + "epoch": 6.87, + "learning_rate": 2.683578104138852e-05, + "loss": 0.2024, + "step": 804 + }, + { + "epoch": 6.91, + "learning_rate": 2.696929238985314e-05, + "loss": 0.2134, + "step": 808 + }, + { + "epoch": 6.94, + "learning_rate": 2.7102803738317755e-05, + "loss": 0.1942, + "step": 812 + }, + { + "epoch": 6.97, + "learning_rate": 2.7236315086782378e-05, + "loss": 0.2103, + "step": 816 + }, + { + "epoch": 7.01, + "learning_rate": 2.7369826435246997e-05, + "loss": 0.212, + "step": 820 + }, + { + "epoch": 7.04, + "learning_rate": 2.750333778371162e-05, + "loss": 0.2368, + "step": 824 + }, + { + "epoch": 7.08, + "learning_rate": 2.7636849132176236e-05, + "loss": 0.1905, + "step": 828 + }, + { + "epoch": 7.11, + "learning_rate": 2.7770360480640855e-05, + "loss": 0.1892, + "step": 832 + }, + { + "epoch": 7.15, + "learning_rate": 2.7903871829105478e-05, + "loss": 0.2069, + "step": 836 + }, + { + "epoch": 7.18, + "learning_rate": 2.8037383177570094e-05, + "loss": 0.1957, + "step": 840 + }, + { + "epoch": 7.21, + "learning_rate": 2.8170894526034713e-05, + "loss": 0.2093, + "step": 844 + }, + { + "epoch": 7.25, + "learning_rate": 2.8304405874499336e-05, + "loss": 0.2003, + "step": 848 + }, + { + "epoch": 7.28, + "learning_rate": 2.8437917222963955e-05, + "loss": 0.2081, + "step": 852 + }, + { + "epoch": 7.32, + "learning_rate": 2.857142857142857e-05, + "loss": 0.1746, + "step": 856 + }, + { + "epoch": 7.35, + "learning_rate": 2.8704939919893194e-05, + "loss": 0.1738, + "step": 860 + }, + { + "epoch": 7.38, + "learning_rate": 2.8838451268357813e-05, + "loss": 0.1868, + "step": 864 + }, + { + "epoch": 7.42, + "learning_rate": 2.897196261682243e-05, + "loss": 0.1829, + "step": 868 + }, + { + "epoch": 7.45, + "learning_rate": 2.910547396528705e-05, + "loss": 0.1794, + "step": 872 + }, + { + "epoch": 7.49, + "learning_rate": 2.923898531375167e-05, + "loss": 0.1696, + "step": 876 + }, + { + "epoch": 7.52, + "learning_rate": 2.9372496662216294e-05, + "loss": 0.1907, + "step": 880 + }, + { + "epoch": 7.56, + "learning_rate": 2.9506008010680906e-05, + "loss": 0.1813, + "step": 884 + }, + { + "epoch": 7.59, + "learning_rate": 2.963951935914553e-05, + "loss": 0.1519, + "step": 888 + }, + { + "epoch": 7.62, + "learning_rate": 2.977303070761015e-05, + "loss": 0.1903, + "step": 892 + }, + { + "epoch": 7.66, + "learning_rate": 2.9906542056074764e-05, + "loss": 0.1712, + "step": 896 + }, + { + "epoch": 7.69, + "learning_rate": 3.0040053404539387e-05, + "loss": 0.1914, + "step": 900 + }, + { + "epoch": 7.73, + "learning_rate": 3.0173564753004006e-05, + "loss": 0.164, + "step": 904 + }, + { + "epoch": 7.76, + "learning_rate": 3.030707610146863e-05, + "loss": 0.1671, + "step": 908 + }, + { + "epoch": 7.79, + "learning_rate": 3.0440587449933245e-05, + "loss": 0.1688, + "step": 912 + }, + { + "epoch": 7.83, + "learning_rate": 3.0574098798397864e-05, + "loss": 0.1588, + "step": 916 + }, + { + "epoch": 7.86, + "learning_rate": 3.070761014686249e-05, + "loss": 0.1685, + "step": 920 + }, + { + "epoch": 7.9, + "learning_rate": 3.08411214953271e-05, + "loss": 0.1704, + "step": 924 + }, + { + "epoch": 7.93, + "learning_rate": 3.0974632843791726e-05, + "loss": 0.1467, + "step": 928 + }, + { + "epoch": 7.97, + "learning_rate": 3.110814419225634e-05, + "loss": 0.1631, + "step": 932 + }, + { + "epoch": 8.0, + "learning_rate": 3.1241655540720964e-05, + "loss": 0.1584, + "step": 936 + }, + { + "epoch": 8.03, + "learning_rate": 3.137516688918558e-05, + "loss": 0.1595, + "step": 940 + }, + { + "epoch": 8.07, + "learning_rate": 3.15086782376502e-05, + "loss": 0.1576, + "step": 944 + }, + { + "epoch": 8.1, + "learning_rate": 3.1642189586114826e-05, + "loss": 0.173, + "step": 948 + }, + { + "epoch": 8.14, + "learning_rate": 3.177570093457944e-05, + "loss": 0.1775, + "step": 952 + }, + { + "epoch": 8.17, + "learning_rate": 3.190921228304406e-05, + "loss": 0.1542, + "step": 956 + }, + { + "epoch": 8.21, + "learning_rate": 3.204272363150868e-05, + "loss": 0.129, + "step": 960 + }, + { + "epoch": 8.24, + "learning_rate": 3.2176234979973296e-05, + "loss": 0.1671, + "step": 964 + }, + { + "epoch": 8.27, + "learning_rate": 3.230974632843792e-05, + "loss": 0.1541, + "step": 968 + }, + { + "epoch": 8.31, + "learning_rate": 3.244325767690254e-05, + "loss": 0.1524, + "step": 972 + }, + { + "epoch": 8.34, + "learning_rate": 3.257676902536716e-05, + "loss": 0.1559, + "step": 976 + }, + { + "epoch": 8.38, + "learning_rate": 3.2710280373831774e-05, + "loss": 0.1447, + "step": 980 + }, + { + "epoch": 8.41, + "learning_rate": 3.2843791722296396e-05, + "loss": 0.1596, + "step": 984 + }, + { + "epoch": 8.44, + "learning_rate": 3.297730307076102e-05, + "loss": 0.1287, + "step": 988 + }, + { + "epoch": 8.48, + "learning_rate": 3.3110814419225635e-05, + "loss": 0.1448, + "step": 992 + }, + { + "epoch": 8.51, + "learning_rate": 3.324432576769025e-05, + "loss": 0.1479, + "step": 996 + }, + { + "epoch": 8.55, + "learning_rate": 3.3377837116154874e-05, + "loss": 0.1587, + "step": 1000 + }, + { + "epoch": 8.55, + "eval_exact_match": 0.35654885654885654, + "eval_loss": 0.4734349548816681, + "eval_runtime": 95.1537, + "eval_samples_per_second": 10.11, + "step": 1000 + }, + { + "epoch": 8.58, + "learning_rate": 3.3511348464619496e-05, + "loss": 0.1445, + "step": 1004 + }, + { + "epoch": 8.62, + "learning_rate": 3.364485981308411e-05, + "loss": 0.1414, + "step": 1008 + }, + { + "epoch": 8.65, + "learning_rate": 3.3778371161548735e-05, + "loss": 0.153, + "step": 1012 + }, + { + "epoch": 8.68, + "learning_rate": 3.391188251001335e-05, + "loss": 0.1389, + "step": 1016 + }, + { + "epoch": 8.72, + "learning_rate": 3.404539385847797e-05, + "loss": 0.1528, + "step": 1020 + }, + { + "epoch": 8.75, + "learning_rate": 3.417890520694259e-05, + "loss": 0.1281, + "step": 1024 + }, + { + "epoch": 8.79, + "learning_rate": 3.431241655540721e-05, + "loss": 0.1358, + "step": 1028 + }, + { + "epoch": 8.82, + "learning_rate": 3.4445927903871835e-05, + "loss": 0.1392, + "step": 1032 + }, + { + "epoch": 8.85, + "learning_rate": 3.457943925233645e-05, + "loss": 0.115, + "step": 1036 + }, + { + "epoch": 8.89, + "learning_rate": 3.471295060080107e-05, + "loss": 0.1376, + "step": 1040 + }, + { + "epoch": 8.92, + "learning_rate": 3.484646194926569e-05, + "loss": 0.1347, + "step": 1044 + }, + { + "epoch": 8.96, + "learning_rate": 3.4979973297730306e-05, + "loss": 0.1198, + "step": 1048 + }, + { + "epoch": 8.99, + "learning_rate": 3.511348464619493e-05, + "loss": 0.1278, + "step": 1052 + }, + { + "epoch": 9.03, + "learning_rate": 3.524699599465955e-05, + "loss": 0.141, + "step": 1056 + }, + { + "epoch": 9.06, + "learning_rate": 3.538050734312417e-05, + "loss": 0.1191, + "step": 1060 + }, + { + "epoch": 9.09, + "learning_rate": 3.551401869158878e-05, + "loss": 0.1298, + "step": 1064 + }, + { + "epoch": 9.13, + "learning_rate": 3.5647530040053406e-05, + "loss": 0.1373, + "step": 1068 + }, + { + "epoch": 9.16, + "learning_rate": 3.578104138851803e-05, + "loss": 0.1241, + "step": 1072 + }, + { + "epoch": 9.2, + "learning_rate": 3.5914552736982644e-05, + "loss": 0.126, + "step": 1076 + }, + { + "epoch": 9.23, + "learning_rate": 3.604806408544727e-05, + "loss": 0.1065, + "step": 1080 + }, + { + "epoch": 9.26, + "learning_rate": 3.618157543391188e-05, + "loss": 0.1143, + "step": 1084 + }, + { + "epoch": 9.3, + "learning_rate": 3.6315086782376506e-05, + "loss": 0.1323, + "step": 1088 + }, + { + "epoch": 9.33, + "learning_rate": 3.644859813084112e-05, + "loss": 0.1351, + "step": 1092 + }, + { + "epoch": 9.37, + "learning_rate": 3.6582109479305744e-05, + "loss": 0.1231, + "step": 1096 + }, + { + "epoch": 9.4, + "learning_rate": 3.671562082777037e-05, + "loss": 0.123, + "step": 1100 + }, + { + "epoch": 9.44, + "learning_rate": 3.6849132176234976e-05, + "loss": 0.1214, + "step": 1104 + }, + { + "epoch": 9.47, + "learning_rate": 3.69826435246996e-05, + "loss": 0.115, + "step": 1108 + }, + { + "epoch": 9.5, + "learning_rate": 3.711615487316422e-05, + "loss": 0.1416, + "step": 1112 + }, + { + "epoch": 9.54, + "learning_rate": 3.7249666221628844e-05, + "loss": 0.1126, + "step": 1116 + }, + { + "epoch": 9.57, + "learning_rate": 3.738317757009346e-05, + "loss": 0.1179, + "step": 1120 + }, + { + "epoch": 9.61, + "learning_rate": 3.7516688918558076e-05, + "loss": 0.1122, + "step": 1124 + }, + { + "epoch": 9.64, + "learning_rate": 3.76502002670227e-05, + "loss": 0.1116, + "step": 1128 + }, + { + "epoch": 9.68, + "learning_rate": 3.7783711615487315e-05, + "loss": 0.1148, + "step": 1132 + }, + { + "epoch": 9.71, + "learning_rate": 3.791722296395194e-05, + "loss": 0.1102, + "step": 1136 + }, + { + "epoch": 9.74, + "learning_rate": 3.805073431241656e-05, + "loss": 0.1213, + "step": 1140 + }, + { + "epoch": 9.78, + "learning_rate": 3.8184245660881176e-05, + "loss": 0.1105, + "step": 1144 + }, + { + "epoch": 9.81, + "learning_rate": 3.831775700934579e-05, + "loss": 0.1012, + "step": 1148 + }, + { + "epoch": 9.85, + "learning_rate": 3.8451268357810415e-05, + "loss": 0.128, + "step": 1152 + }, + { + "epoch": 9.88, + "learning_rate": 3.858477970627504e-05, + "loss": 0.1074, + "step": 1156 + }, + { + "epoch": 9.91, + "learning_rate": 3.8718291054739654e-05, + "loss": 0.1104, + "step": 1160 + }, + { + "epoch": 9.95, + "learning_rate": 3.8851802403204276e-05, + "loss": 0.1231, + "step": 1164 + }, + { + "epoch": 9.98, + "learning_rate": 3.898531375166889e-05, + "loss": 0.1078, + "step": 1168 + }, + { + "epoch": 10.02, + "learning_rate": 3.9118825100133515e-05, + "loss": 0.1061, + "step": 1172 + }, + { + "epoch": 10.05, + "learning_rate": 3.925233644859813e-05, + "loss": 0.1007, + "step": 1176 + }, + { + "epoch": 10.09, + "learning_rate": 3.9385847797062754e-05, + "loss": 0.0987, + "step": 1180 + }, + { + "epoch": 10.12, + "learning_rate": 3.9519359145527376e-05, + "loss": 0.121, + "step": 1184 + }, + { + "epoch": 10.15, + "learning_rate": 3.965287049399199e-05, + "loss": 0.1, + "step": 1188 + }, + { + "epoch": 10.19, + "learning_rate": 3.978638184245661e-05, + "loss": 0.1001, + "step": 1192 + }, + { + "epoch": 10.22, + "learning_rate": 3.991989319092123e-05, + "loss": 0.1033, + "step": 1196 + }, + { + "epoch": 10.26, + "learning_rate": 4.005340453938585e-05, + "loss": 0.0859, + "step": 1200 + }, + { + "epoch": 10.29, + "learning_rate": 4.018691588785047e-05, + "loss": 0.1105, + "step": 1204 + }, + { + "epoch": 10.32, + "learning_rate": 4.032042723631509e-05, + "loss": 0.1025, + "step": 1208 + }, + { + "epoch": 10.36, + "learning_rate": 4.045393858477971e-05, + "loss": 0.0994, + "step": 1212 + }, + { + "epoch": 10.39, + "learning_rate": 4.0587449933244324e-05, + "loss": 0.1133, + "step": 1216 + }, + { + "epoch": 10.43, + "learning_rate": 4.072096128170895e-05, + "loss": 0.0973, + "step": 1220 + }, + { + "epoch": 10.46, + "learning_rate": 4.085447263017357e-05, + "loss": 0.0917, + "step": 1224 + }, + { + "epoch": 10.5, + "learning_rate": 4.0987983978638186e-05, + "loss": 0.0861, + "step": 1228 + }, + { + "epoch": 10.53, + "learning_rate": 4.11214953271028e-05, + "loss": 0.0975, + "step": 1232 + }, + { + "epoch": 10.56, + "learning_rate": 4.1255006675567424e-05, + "loss": 0.1049, + "step": 1236 + }, + { + "epoch": 10.6, + "learning_rate": 4.138851802403205e-05, + "loss": 0.1036, + "step": 1240 + }, + { + "epoch": 10.63, + "learning_rate": 4.152202937249666e-05, + "loss": 0.0842, + "step": 1244 + }, + { + "epoch": 10.67, + "learning_rate": 4.1655540720961286e-05, + "loss": 0.1029, + "step": 1248 + }, + { + "epoch": 10.7, + "learning_rate": 4.17890520694259e-05, + "loss": 0.0906, + "step": 1252 + }, + { + "epoch": 10.74, + "learning_rate": 4.192256341789052e-05, + "loss": 0.0986, + "step": 1256 + }, + { + "epoch": 10.77, + "learning_rate": 4.205607476635514e-05, + "loss": 0.0958, + "step": 1260 + }, + { + "epoch": 10.8, + "learning_rate": 4.218958611481976e-05, + "loss": 0.0898, + "step": 1264 + }, + { + "epoch": 10.84, + "learning_rate": 4.2323097463284386e-05, + "loss": 0.0979, + "step": 1268 + }, + { + "epoch": 10.87, + "learning_rate": 4.2456608811749e-05, + "loss": 0.0911, + "step": 1272 + }, + { + "epoch": 10.91, + "learning_rate": 4.259012016021362e-05, + "loss": 0.0909, + "step": 1276 + }, + { + "epoch": 10.94, + "learning_rate": 4.272363150867824e-05, + "loss": 0.0909, + "step": 1280 + }, + { + "epoch": 10.97, + "learning_rate": 4.2857142857142856e-05, + "loss": 0.0849, + "step": 1284 + }, + { + "epoch": 11.01, + "learning_rate": 4.299065420560748e-05, + "loss": 0.095, + "step": 1288 + }, + { + "epoch": 11.04, + "learning_rate": 4.31241655540721e-05, + "loss": 0.0726, + "step": 1292 + }, + { + "epoch": 11.08, + "learning_rate": 4.325767690253672e-05, + "loss": 0.0996, + "step": 1296 + }, + { + "epoch": 11.11, + "learning_rate": 4.3391188251001334e-05, + "loss": 0.0783, + "step": 1300 + }, + { + "epoch": 11.15, + "learning_rate": 4.3524699599465956e-05, + "loss": 0.082, + "step": 1304 + }, + { + "epoch": 11.18, + "learning_rate": 4.365821094793058e-05, + "loss": 0.0966, + "step": 1308 + }, + { + "epoch": 11.21, + "learning_rate": 4.3791722296395195e-05, + "loss": 0.0815, + "step": 1312 + }, + { + "epoch": 11.25, + "learning_rate": 4.392523364485982e-05, + "loss": 0.0783, + "step": 1316 + }, + { + "epoch": 11.28, + "learning_rate": 4.4058744993324434e-05, + "loss": 0.0772, + "step": 1320 + }, + { + "epoch": 11.32, + "learning_rate": 4.4192256341789056e-05, + "loss": 0.0763, + "step": 1324 + }, + { + "epoch": 11.35, + "learning_rate": 4.432576769025367e-05, + "loss": 0.0859, + "step": 1328 + }, + { + "epoch": 11.38, + "learning_rate": 4.4459279038718295e-05, + "loss": 0.0764, + "step": 1332 + }, + { + "epoch": 11.42, + "learning_rate": 4.459279038718292e-05, + "loss": 0.0784, + "step": 1336 + }, + { + "epoch": 11.45, + "learning_rate": 4.472630173564753e-05, + "loss": 0.0779, + "step": 1340 + }, + { + "epoch": 11.49, + "learning_rate": 4.485981308411215e-05, + "loss": 0.0706, + "step": 1344 + }, + { + "epoch": 11.52, + "learning_rate": 4.499332443257677e-05, + "loss": 0.0837, + "step": 1348 + }, + { + "epoch": 11.56, + "learning_rate": 4.5126835781041395e-05, + "loss": 0.0881, + "step": 1352 + }, + { + "epoch": 11.59, + "learning_rate": 4.526034712950601e-05, + "loss": 0.0766, + "step": 1356 + }, + { + "epoch": 11.62, + "learning_rate": 4.539385847797063e-05, + "loss": 0.0777, + "step": 1360 + }, + { + "epoch": 11.66, + "learning_rate": 4.552736982643525e-05, + "loss": 0.0893, + "step": 1364 + }, + { + "epoch": 11.69, + "learning_rate": 4.5660881174899866e-05, + "loss": 0.0801, + "step": 1368 + }, + { + "epoch": 11.73, + "learning_rate": 4.579439252336449e-05, + "loss": 0.0758, + "step": 1372 + }, + { + "epoch": 11.76, + "learning_rate": 4.592790387182911e-05, + "loss": 0.0781, + "step": 1376 + }, + { + "epoch": 11.79, + "learning_rate": 4.606141522029373e-05, + "loss": 0.0725, + "step": 1380 + }, + { + "epoch": 11.83, + "learning_rate": 4.619492656875834e-05, + "loss": 0.0707, + "step": 1384 + }, + { + "epoch": 11.86, + "learning_rate": 4.6328437917222966e-05, + "loss": 0.073, + "step": 1388 + }, + { + "epoch": 11.9, + "learning_rate": 4.646194926568759e-05, + "loss": 0.0707, + "step": 1392 + }, + { + "epoch": 11.93, + "learning_rate": 4.6595460614152204e-05, + "loss": 0.0692, + "step": 1396 + }, + { + "epoch": 11.97, + "learning_rate": 4.672897196261683e-05, + "loss": 0.0793, + "step": 1400 + }, + { + "epoch": 12.0, + "learning_rate": 4.686248331108144e-05, + "loss": 0.076, + "step": 1404 + }, + { + "epoch": 12.03, + "learning_rate": 4.699599465954606e-05, + "loss": 0.073, + "step": 1408 + }, + { + "epoch": 12.07, + "learning_rate": 4.712950600801068e-05, + "loss": 0.0662, + "step": 1412 + }, + { + "epoch": 12.1, + "learning_rate": 4.7263017356475304e-05, + "loss": 0.0678, + "step": 1416 + }, + { + "epoch": 12.14, + "learning_rate": 4.739652870493993e-05, + "loss": 0.0677, + "step": 1420 + }, + { + "epoch": 12.17, + "learning_rate": 4.753004005340454e-05, + "loss": 0.0697, + "step": 1424 + }, + { + "epoch": 12.21, + "learning_rate": 4.766355140186916e-05, + "loss": 0.0655, + "step": 1428 + }, + { + "epoch": 12.24, + "learning_rate": 4.779706275033378e-05, + "loss": 0.0676, + "step": 1432 + }, + { + "epoch": 12.27, + "learning_rate": 4.79305740987984e-05, + "loss": 0.0618, + "step": 1436 + }, + { + "epoch": 12.31, + "learning_rate": 4.806408544726302e-05, + "loss": 0.0716, + "step": 1440 + }, + { + "epoch": 12.34, + "learning_rate": 4.819759679572764e-05, + "loss": 0.0745, + "step": 1444 + }, + { + "epoch": 12.38, + "learning_rate": 4.833110814419226e-05, + "loss": 0.0706, + "step": 1448 + }, + { + "epoch": 12.41, + "learning_rate": 4.8464619492656875e-05, + "loss": 0.0709, + "step": 1452 + }, + { + "epoch": 12.44, + "learning_rate": 4.85981308411215e-05, + "loss": 0.064, + "step": 1456 + }, + { + "epoch": 12.48, + "learning_rate": 4.873164218958612e-05, + "loss": 0.0654, + "step": 1460 + }, + { + "epoch": 12.51, + "learning_rate": 4.8865153538050736e-05, + "loss": 0.0687, + "step": 1464 + }, + { + "epoch": 12.55, + "learning_rate": 4.899866488651535e-05, + "loss": 0.0764, + "step": 1468 + }, + { + "epoch": 12.58, + "learning_rate": 4.9132176234979975e-05, + "loss": 0.0666, + "step": 1472 + }, + { + "epoch": 12.62, + "learning_rate": 4.92656875834446e-05, + "loss": 0.069, + "step": 1476 + }, + { + "epoch": 12.65, + "learning_rate": 4.9399198931909214e-05, + "loss": 0.0684, + "step": 1480 + }, + { + "epoch": 12.68, + "learning_rate": 4.9532710280373836e-05, + "loss": 0.0606, + "step": 1484 + }, + { + "epoch": 12.72, + "learning_rate": 4.966622162883845e-05, + "loss": 0.0658, + "step": 1488 + }, + { + "epoch": 12.75, + "learning_rate": 4.979973297730307e-05, + "loss": 0.0677, + "step": 1492 + }, + { + "epoch": 12.79, + "learning_rate": 4.993324432576769e-05, + "loss": 0.0651, + "step": 1496 + }, + { + "epoch": 12.82, + "learning_rate": 5.0066755674232314e-05, + "loss": 0.0679, + "step": 1500 + }, + { + "epoch": 12.85, + "learning_rate": 5.0200267022696936e-05, + "loss": 0.0582, + "step": 1504 + }, + { + "epoch": 12.89, + "learning_rate": 5.033377837116155e-05, + "loss": 0.047, + "step": 1508 + }, + { + "epoch": 12.92, + "learning_rate": 5.046728971962617e-05, + "loss": 0.0541, + "step": 1512 + }, + { + "epoch": 12.96, + "learning_rate": 5.0600801068090784e-05, + "loss": 0.0634, + "step": 1516 + }, + { + "epoch": 12.99, + "learning_rate": 5.073431241655541e-05, + "loss": 0.061, + "step": 1520 + }, + { + "epoch": 13.03, + "learning_rate": 5.086782376502003e-05, + "loss": 0.0724, + "step": 1524 + }, + { + "epoch": 13.06, + "learning_rate": 5.100133511348465e-05, + "loss": 0.058, + "step": 1528 + }, + { + "epoch": 13.09, + "learning_rate": 5.113484646194927e-05, + "loss": 0.0561, + "step": 1532 + }, + { + "epoch": 13.13, + "learning_rate": 5.126835781041389e-05, + "loss": 0.058, + "step": 1536 + }, + { + "epoch": 13.16, + "learning_rate": 5.14018691588785e-05, + "loss": 0.0563, + "step": 1540 + }, + { + "epoch": 13.2, + "learning_rate": 5.153538050734312e-05, + "loss": 0.06, + "step": 1544 + }, + { + "epoch": 13.23, + "learning_rate": 5.1668891855807746e-05, + "loss": 0.0575, + "step": 1548 + }, + { + "epoch": 13.26, + "learning_rate": 5.180240320427236e-05, + "loss": 0.0602, + "step": 1552 + }, + { + "epoch": 13.3, + "learning_rate": 5.1935914552736984e-05, + "loss": 0.0645, + "step": 1556 + }, + { + "epoch": 13.33, + "learning_rate": 5.206942590120161e-05, + "loss": 0.061, + "step": 1560 + }, + { + "epoch": 13.37, + "learning_rate": 5.220293724966623e-05, + "loss": 0.0522, + "step": 1564 + }, + { + "epoch": 13.4, + "learning_rate": 5.233644859813084e-05, + "loss": 0.0489, + "step": 1568 + }, + { + "epoch": 13.44, + "learning_rate": 5.246995994659546e-05, + "loss": 0.0543, + "step": 1572 + }, + { + "epoch": 13.47, + "learning_rate": 5.260347129506008e-05, + "loss": 0.0581, + "step": 1576 + }, + { + "epoch": 13.5, + "learning_rate": 5.27369826435247e-05, + "loss": 0.0564, + "step": 1580 + }, + { + "epoch": 13.54, + "learning_rate": 5.287049399198932e-05, + "loss": 0.0569, + "step": 1584 + }, + { + "epoch": 13.57, + "learning_rate": 5.3004005340453946e-05, + "loss": 0.0568, + "step": 1588 + }, + { + "epoch": 13.61, + "learning_rate": 5.313751668891856e-05, + "loss": 0.0524, + "step": 1592 + }, + { + "epoch": 13.64, + "learning_rate": 5.327102803738318e-05, + "loss": 0.0515, + "step": 1596 + }, + { + "epoch": 13.68, + "learning_rate": 5.3404539385847794e-05, + "loss": 0.0517, + "step": 1600 + }, + { + "epoch": 13.71, + "learning_rate": 5.3538050734312416e-05, + "loss": 0.062, + "step": 1604 + }, + { + "epoch": 13.74, + "learning_rate": 5.367156208277704e-05, + "loss": 0.0501, + "step": 1608 + }, + { + "epoch": 13.78, + "learning_rate": 5.380507343124166e-05, + "loss": 0.0555, + "step": 1612 + }, + { + "epoch": 13.81, + "learning_rate": 5.393858477970628e-05, + "loss": 0.0587, + "step": 1616 + }, + { + "epoch": 13.85, + "learning_rate": 5.40720961281709e-05, + "loss": 0.0568, + "step": 1620 + }, + { + "epoch": 13.88, + "learning_rate": 5.420560747663551e-05, + "loss": 0.0502, + "step": 1624 + }, + { + "epoch": 13.91, + "learning_rate": 5.433911882510013e-05, + "loss": 0.0512, + "step": 1628 + }, + { + "epoch": 13.95, + "learning_rate": 5.4472630173564755e-05, + "loss": 0.0551, + "step": 1632 + }, + { + "epoch": 13.98, + "learning_rate": 5.460614152202938e-05, + "loss": 0.0509, + "step": 1636 + }, + { + "epoch": 14.02, + "learning_rate": 5.4739652870493994e-05, + "loss": 0.0587, + "step": 1640 + }, + { + "epoch": 14.05, + "learning_rate": 5.4873164218958616e-05, + "loss": 0.0472, + "step": 1644 + }, + { + "epoch": 14.09, + "learning_rate": 5.500667556742324e-05, + "loss": 0.0514, + "step": 1648 + }, + { + "epoch": 14.12, + "learning_rate": 5.514018691588785e-05, + "loss": 0.0434, + "step": 1652 + }, + { + "epoch": 14.15, + "learning_rate": 5.527369826435247e-05, + "loss": 0.0548, + "step": 1656 + }, + { + "epoch": 14.19, + "learning_rate": 5.540720961281709e-05, + "loss": 0.0514, + "step": 1660 + }, + { + "epoch": 14.22, + "learning_rate": 5.554072096128171e-05, + "loss": 0.0511, + "step": 1664 + }, + { + "epoch": 14.26, + "learning_rate": 5.567423230974633e-05, + "loss": 0.0581, + "step": 1668 + }, + { + "epoch": 14.29, + "learning_rate": 5.5807743658210955e-05, + "loss": 0.0444, + "step": 1672 + }, + { + "epoch": 14.32, + "learning_rate": 5.594125500667558e-05, + "loss": 0.0597, + "step": 1676 + }, + { + "epoch": 14.36, + "learning_rate": 5.607476635514019e-05, + "loss": 0.0542, + "step": 1680 + }, + { + "epoch": 14.39, + "learning_rate": 5.62082777036048e-05, + "loss": 0.0452, + "step": 1684 + }, + { + "epoch": 14.43, + "learning_rate": 5.6341789052069426e-05, + "loss": 0.0511, + "step": 1688 + }, + { + "epoch": 14.46, + "learning_rate": 5.647530040053405e-05, + "loss": 0.0468, + "step": 1692 + }, + { + "epoch": 14.5, + "learning_rate": 5.660881174899867e-05, + "loss": 0.0488, + "step": 1696 + }, + { + "epoch": 14.53, + "learning_rate": 5.674232309746329e-05, + "loss": 0.0429, + "step": 1700 + }, + { + "epoch": 14.56, + "learning_rate": 5.687583444592791e-05, + "loss": 0.0441, + "step": 1704 + }, + { + "epoch": 14.6, + "learning_rate": 5.700934579439252e-05, + "loss": 0.0515, + "step": 1708 + }, + { + "epoch": 14.63, + "learning_rate": 5.714285714285714e-05, + "loss": 0.0462, + "step": 1712 + }, + { + "epoch": 14.67, + "learning_rate": 5.7276368491321764e-05, + "loss": 0.0416, + "step": 1716 + }, + { + "epoch": 14.7, + "learning_rate": 5.740987983978639e-05, + "loss": 0.0509, + "step": 1720 + }, + { + "epoch": 14.74, + "learning_rate": 5.7543391188251e-05, + "loss": 0.0376, + "step": 1724 + }, + { + "epoch": 14.77, + "learning_rate": 5.7676902536715626e-05, + "loss": 0.0408, + "step": 1728 + }, + { + "epoch": 14.8, + "learning_rate": 5.781041388518025e-05, + "loss": 0.047, + "step": 1732 + }, + { + "epoch": 14.84, + "learning_rate": 5.794392523364486e-05, + "loss": 0.05, + "step": 1736 + }, + { + "epoch": 14.87, + "learning_rate": 5.807743658210948e-05, + "loss": 0.0406, + "step": 1740 + }, + { + "epoch": 14.91, + "learning_rate": 5.82109479305741e-05, + "loss": 0.0408, + "step": 1744 + }, + { + "epoch": 14.94, + "learning_rate": 5.834445927903872e-05, + "loss": 0.0472, + "step": 1748 + }, + { + "epoch": 14.97, + "learning_rate": 5.847797062750334e-05, + "loss": 0.0468, + "step": 1752 + }, + { + "epoch": 15.01, + "learning_rate": 5.8611481975967965e-05, + "loss": 0.0552, + "step": 1756 + }, + { + "epoch": 15.04, + "learning_rate": 5.874499332443259e-05, + "loss": 0.0395, + "step": 1760 + }, + { + "epoch": 15.08, + "learning_rate": 5.8878504672897196e-05, + "loss": 0.0378, + "step": 1764 + }, + { + "epoch": 15.11, + "learning_rate": 5.901201602136181e-05, + "loss": 0.0389, + "step": 1768 + }, + { + "epoch": 15.15, + "learning_rate": 5.9145527369826435e-05, + "loss": 0.0375, + "step": 1772 + }, + { + "epoch": 15.18, + "learning_rate": 5.927903871829106e-05, + "loss": 0.0478, + "step": 1776 + }, + { + "epoch": 15.21, + "learning_rate": 5.941255006675568e-05, + "loss": 0.0348, + "step": 1780 + }, + { + "epoch": 15.25, + "learning_rate": 5.95460614152203e-05, + "loss": 0.04, + "step": 1784 + }, + { + "epoch": 15.28, + "learning_rate": 5.967957276368492e-05, + "loss": 0.0459, + "step": 1788 + }, + { + "epoch": 15.32, + "learning_rate": 5.981308411214953e-05, + "loss": 0.0428, + "step": 1792 + }, + { + "epoch": 15.35, + "learning_rate": 5.994659546061415e-05, + "loss": 0.0408, + "step": 1796 + }, + { + "epoch": 15.38, + "learning_rate": 6.0080106809078774e-05, + "loss": 0.0428, + "step": 1800 + }, + { + "epoch": 15.42, + "learning_rate": 6.0213618157543397e-05, + "loss": 0.0366, + "step": 1804 + }, + { + "epoch": 15.45, + "learning_rate": 6.034712950600801e-05, + "loss": 0.0474, + "step": 1808 + }, + { + "epoch": 15.49, + "learning_rate": 6.0480640854472635e-05, + "loss": 0.0432, + "step": 1812 + }, + { + "epoch": 15.52, + "learning_rate": 6.061415220293726e-05, + "loss": 0.0309, + "step": 1816 + }, + { + "epoch": 15.56, + "learning_rate": 6.074766355140187e-05, + "loss": 0.0418, + "step": 1820 + }, + { + "epoch": 15.59, + "learning_rate": 6.088117489986649e-05, + "loss": 0.0387, + "step": 1824 + }, + { + "epoch": 15.62, + "learning_rate": 6.101468624833111e-05, + "loss": 0.0433, + "step": 1828 + }, + { + "epoch": 15.66, + "learning_rate": 6.114819759679573e-05, + "loss": 0.0363, + "step": 1832 + }, + { + "epoch": 15.69, + "learning_rate": 6.128170894526035e-05, + "loss": 0.0438, + "step": 1836 + }, + { + "epoch": 15.73, + "learning_rate": 6.141522029372497e-05, + "loss": 0.0396, + "step": 1840 + }, + { + "epoch": 15.76, + "learning_rate": 6.15487316421896e-05, + "loss": 0.0319, + "step": 1844 + }, + { + "epoch": 15.79, + "learning_rate": 6.16822429906542e-05, + "loss": 0.0409, + "step": 1848 + }, + { + "epoch": 15.83, + "learning_rate": 6.181575433911883e-05, + "loss": 0.0412, + "step": 1852 + }, + { + "epoch": 15.86, + "learning_rate": 6.194926568758345e-05, + "loss": 0.0395, + "step": 1856 + }, + { + "epoch": 15.9, + "learning_rate": 6.208277703604807e-05, + "loss": 0.0372, + "step": 1860 + }, + { + "epoch": 15.93, + "learning_rate": 6.221628838451268e-05, + "loss": 0.0427, + "step": 1864 + }, + { + "epoch": 15.97, + "learning_rate": 6.23497997329773e-05, + "loss": 0.0407, + "step": 1868 + }, + { + "epoch": 16.0, + "learning_rate": 6.248331108144193e-05, + "loss": 0.0417, + "step": 1872 + }, + { + "epoch": 16.03, + "learning_rate": 6.261682242990654e-05, + "loss": 0.0361, + "step": 1876 + }, + { + "epoch": 16.07, + "learning_rate": 6.275033377837116e-05, + "loss": 0.0367, + "step": 1880 + }, + { + "epoch": 16.1, + "learning_rate": 6.288384512683578e-05, + "loss": 0.0411, + "step": 1884 + }, + { + "epoch": 16.14, + "learning_rate": 6.30173564753004e-05, + "loss": 0.0433, + "step": 1888 + }, + { + "epoch": 16.17, + "learning_rate": 6.315086782376503e-05, + "loss": 0.0355, + "step": 1892 + }, + { + "epoch": 16.21, + "learning_rate": 6.328437917222965e-05, + "loss": 0.0365, + "step": 1896 + }, + { + "epoch": 16.24, + "learning_rate": 6.341789052069427e-05, + "loss": 0.0378, + "step": 1900 + }, + { + "epoch": 16.27, + "learning_rate": 6.355140186915888e-05, + "loss": 0.0369, + "step": 1904 + }, + { + "epoch": 16.31, + "learning_rate": 6.368491321762349e-05, + "loss": 0.0341, + "step": 1908 + }, + { + "epoch": 16.34, + "learning_rate": 6.381842456608812e-05, + "loss": 0.0329, + "step": 1912 + }, + { + "epoch": 16.38, + "learning_rate": 6.395193591455274e-05, + "loss": 0.0377, + "step": 1916 + }, + { + "epoch": 16.41, + "learning_rate": 6.408544726301736e-05, + "loss": 0.0372, + "step": 1920 + }, + { + "epoch": 16.44, + "learning_rate": 6.421895861148198e-05, + "loss": 0.0326, + "step": 1924 + }, + { + "epoch": 16.48, + "learning_rate": 6.435246995994659e-05, + "loss": 0.0353, + "step": 1928 + }, + { + "epoch": 16.51, + "learning_rate": 6.448598130841122e-05, + "loss": 0.0338, + "step": 1932 + }, + { + "epoch": 16.55, + "learning_rate": 6.461949265687584e-05, + "loss": 0.0377, + "step": 1936 + }, + { + "epoch": 16.58, + "learning_rate": 6.475300400534046e-05, + "loss": 0.039, + "step": 1940 + }, + { + "epoch": 16.62, + "learning_rate": 6.488651535380508e-05, + "loss": 0.0354, + "step": 1944 + }, + { + "epoch": 16.65, + "learning_rate": 6.502002670226969e-05, + "loss": 0.0377, + "step": 1948 + }, + { + "epoch": 16.68, + "learning_rate": 6.515353805073432e-05, + "loss": 0.0379, + "step": 1952 + }, + { + "epoch": 16.72, + "learning_rate": 6.528704939919892e-05, + "loss": 0.034, + "step": 1956 + }, + { + "epoch": 16.75, + "learning_rate": 6.542056074766355e-05, + "loss": 0.0309, + "step": 1960 + }, + { + "epoch": 16.79, + "learning_rate": 6.555407209612817e-05, + "loss": 0.0367, + "step": 1964 + }, + { + "epoch": 16.82, + "learning_rate": 6.568758344459279e-05, + "loss": 0.0303, + "step": 1968 + }, + { + "epoch": 16.85, + "learning_rate": 6.582109479305742e-05, + "loss": 0.0269, + "step": 1972 + }, + { + "epoch": 16.89, + "learning_rate": 6.595460614152204e-05, + "loss": 0.0352, + "step": 1976 + }, + { + "epoch": 16.92, + "learning_rate": 6.608811748998666e-05, + "loss": 0.0372, + "step": 1980 + }, + { + "epoch": 16.96, + "learning_rate": 6.622162883845127e-05, + "loss": 0.0355, + "step": 1984 + }, + { + "epoch": 16.99, + "learning_rate": 6.635514018691589e-05, + "loss": 0.0359, + "step": 1988 + }, + { + "epoch": 17.03, + "learning_rate": 6.64886515353805e-05, + "loss": 0.0377, + "step": 1992 + }, + { + "epoch": 17.06, + "learning_rate": 6.662216288384512e-05, + "loss": 0.0301, + "step": 1996 + }, + { + "epoch": 17.09, + "learning_rate": 6.675567423230975e-05, + "loss": 0.0362, + "step": 2000 + }, + { + "epoch": 17.09, + "eval_exact_match": 0.446985446985447, + "eval_loss": 0.5254557728767395, + "eval_runtime": 86.8762, + "eval_samples_per_second": 11.073, + "step": 2000 + }, + { + "epoch": 17.13, + "learning_rate": 6.688918558077437e-05, + "loss": 0.0301, + "step": 2004 + }, + { + "epoch": 17.16, + "learning_rate": 6.702269692923899e-05, + "loss": 0.0302, + "step": 2008 + }, + { + "epoch": 17.2, + "learning_rate": 6.71562082777036e-05, + "loss": 0.0299, + "step": 2012 + }, + { + "epoch": 17.23, + "learning_rate": 6.728971962616822e-05, + "loss": 0.0341, + "step": 2016 + }, + { + "epoch": 17.26, + "learning_rate": 6.742323097463285e-05, + "loss": 0.0311, + "step": 2020 + }, + { + "epoch": 17.3, + "learning_rate": 6.755674232309747e-05, + "loss": 0.0277, + "step": 2024 + }, + { + "epoch": 17.33, + "learning_rate": 6.769025367156209e-05, + "loss": 0.0366, + "step": 2028 + }, + { + "epoch": 17.37, + "learning_rate": 6.78237650200267e-05, + "loss": 0.0338, + "step": 2032 + }, + { + "epoch": 17.4, + "learning_rate": 6.795727636849132e-05, + "loss": 0.0433, + "step": 2036 + }, + { + "epoch": 17.44, + "learning_rate": 6.809078771695593e-05, + "loss": 0.0266, + "step": 2040 + }, + { + "epoch": 17.47, + "learning_rate": 6.822429906542056e-05, + "loss": 0.0312, + "step": 2044 + }, + { + "epoch": 17.5, + "learning_rate": 6.835781041388518e-05, + "loss": 0.0337, + "step": 2048 + }, + { + "epoch": 17.54, + "learning_rate": 6.84913217623498e-05, + "loss": 0.036, + "step": 2052 + }, + { + "epoch": 17.57, + "learning_rate": 6.862483311081442e-05, + "loss": 0.0397, + "step": 2056 + }, + { + "epoch": 17.61, + "learning_rate": 6.875834445927905e-05, + "loss": 0.0328, + "step": 2060 + }, + { + "epoch": 17.64, + "learning_rate": 6.889185580774367e-05, + "loss": 0.0318, + "step": 2064 + }, + { + "epoch": 17.68, + "learning_rate": 6.902536715620828e-05, + "loss": 0.0326, + "step": 2068 + }, + { + "epoch": 17.71, + "learning_rate": 6.91588785046729e-05, + "loss": 0.026, + "step": 2072 + }, + { + "epoch": 17.74, + "learning_rate": 6.929238985313752e-05, + "loss": 0.0295, + "step": 2076 + }, + { + "epoch": 17.78, + "learning_rate": 6.942590120160213e-05, + "loss": 0.0328, + "step": 2080 + }, + { + "epoch": 17.81, + "learning_rate": 6.955941255006676e-05, + "loss": 0.0286, + "step": 2084 + }, + { + "epoch": 17.85, + "learning_rate": 6.969292389853138e-05, + "loss": 0.0308, + "step": 2088 + }, + { + "epoch": 17.88, + "learning_rate": 6.9826435246996e-05, + "loss": 0.0301, + "step": 2092 + }, + { + "epoch": 17.91, + "learning_rate": 6.995994659546061e-05, + "loss": 0.0271, + "step": 2096 + }, + { + "epoch": 17.95, + "learning_rate": 7.009345794392523e-05, + "loss": 0.0303, + "step": 2100 + }, + { + "epoch": 17.98, + "learning_rate": 7.022696929238986e-05, + "loss": 0.0236, + "step": 2104 + }, + { + "epoch": 18.02, + "learning_rate": 7.036048064085448e-05, + "loss": 0.0297, + "step": 2108 + }, + { + "epoch": 18.05, + "learning_rate": 7.04939919893191e-05, + "loss": 0.0216, + "step": 2112 + }, + { + "epoch": 18.09, + "learning_rate": 7.062750333778372e-05, + "loss": 0.026, + "step": 2116 + }, + { + "epoch": 18.12, + "learning_rate": 7.076101468624833e-05, + "loss": 0.0299, + "step": 2120 + }, + { + "epoch": 18.15, + "learning_rate": 7.089452603471294e-05, + "loss": 0.0303, + "step": 2124 + }, + { + "epoch": 18.19, + "learning_rate": 7.102803738317757e-05, + "loss": 0.0256, + "step": 2128 + }, + { + "epoch": 18.22, + "learning_rate": 7.116154873164219e-05, + "loss": 0.0304, + "step": 2132 + }, + { + "epoch": 18.26, + "learning_rate": 7.129506008010681e-05, + "loss": 0.0241, + "step": 2136 + }, + { + "epoch": 18.29, + "learning_rate": 7.142857142857143e-05, + "loss": 0.0294, + "step": 2140 + }, + { + "epoch": 18.32, + "learning_rate": 7.156208277703606e-05, + "loss": 0.0279, + "step": 2144 + }, + { + "epoch": 18.36, + "learning_rate": 7.169559412550068e-05, + "loss": 0.0332, + "step": 2148 + }, + { + "epoch": 18.39, + "learning_rate": 7.182910547396529e-05, + "loss": 0.0315, + "step": 2152 + }, + { + "epoch": 18.43, + "learning_rate": 7.196261682242991e-05, + "loss": 0.0215, + "step": 2156 + }, + { + "epoch": 18.46, + "learning_rate": 7.209612817089453e-05, + "loss": 0.032, + "step": 2160 + }, + { + "epoch": 18.5, + "learning_rate": 7.222963951935914e-05, + "loss": 0.0257, + "step": 2164 + }, + { + "epoch": 18.53, + "learning_rate": 7.236315086782377e-05, + "loss": 0.0287, + "step": 2168 + }, + { + "epoch": 18.56, + "learning_rate": 7.249666221628839e-05, + "loss": 0.0284, + "step": 2172 + }, + { + "epoch": 18.6, + "learning_rate": 7.263017356475301e-05, + "loss": 0.0316, + "step": 2176 + }, + { + "epoch": 18.63, + "learning_rate": 7.276368491321762e-05, + "loss": 0.0268, + "step": 2180 + }, + { + "epoch": 18.67, + "learning_rate": 7.289719626168224e-05, + "loss": 0.0291, + "step": 2184 + }, + { + "epoch": 18.7, + "learning_rate": 7.303070761014687e-05, + "loss": 0.0284, + "step": 2188 + }, + { + "epoch": 18.74, + "learning_rate": 7.316421895861149e-05, + "loss": 0.0239, + "step": 2192 + }, + { + "epoch": 18.77, + "learning_rate": 7.329773030707611e-05, + "loss": 0.0247, + "step": 2196 + }, + { + "epoch": 18.8, + "learning_rate": 7.343124165554073e-05, + "loss": 0.0192, + "step": 2200 + }, + { + "epoch": 18.84, + "learning_rate": 7.356475300400534e-05, + "loss": 0.0236, + "step": 2204 + }, + { + "epoch": 18.87, + "learning_rate": 7.369826435246995e-05, + "loss": 0.0235, + "step": 2208 + }, + { + "epoch": 18.91, + "learning_rate": 7.383177570093458e-05, + "loss": 0.0313, + "step": 2212 + }, + { + "epoch": 18.94, + "learning_rate": 7.39652870493992e-05, + "loss": 0.0292, + "step": 2216 + }, + { + "epoch": 18.97, + "learning_rate": 7.409879839786382e-05, + "loss": 0.0295, + "step": 2220 + }, + { + "epoch": 19.01, + "learning_rate": 7.423230974632844e-05, + "loss": 0.0251, + "step": 2224 + }, + { + "epoch": 19.04, + "learning_rate": 7.436582109479307e-05, + "loss": 0.0245, + "step": 2228 + }, + { + "epoch": 19.08, + "learning_rate": 7.449933244325769e-05, + "loss": 0.0205, + "step": 2232 + }, + { + "epoch": 19.11, + "learning_rate": 7.46328437917223e-05, + "loss": 0.0219, + "step": 2236 + }, + { + "epoch": 19.15, + "learning_rate": 7.476635514018692e-05, + "loss": 0.0262, + "step": 2240 + }, + { + "epoch": 19.18, + "learning_rate": 7.489986648865154e-05, + "loss": 0.0237, + "step": 2244 + }, + { + "epoch": 19.21, + "learning_rate": 7.503337783711615e-05, + "loss": 0.024, + "step": 2248 + }, + { + "epoch": 19.25, + "learning_rate": 7.516688918558078e-05, + "loss": 0.0244, + "step": 2252 + }, + { + "epoch": 19.28, + "learning_rate": 7.53004005340454e-05, + "loss": 0.0257, + "step": 2256 + }, + { + "epoch": 19.32, + "learning_rate": 7.543391188251002e-05, + "loss": 0.0238, + "step": 2260 + }, + { + "epoch": 19.35, + "learning_rate": 7.556742323097463e-05, + "loss": 0.0306, + "step": 2264 + }, + { + "epoch": 19.38, + "learning_rate": 7.570093457943925e-05, + "loss": 0.0274, + "step": 2268 + }, + { + "epoch": 19.42, + "learning_rate": 7.583444592790388e-05, + "loss": 0.0236, + "step": 2272 + }, + { + "epoch": 19.45, + "learning_rate": 7.59679572763685e-05, + "loss": 0.0282, + "step": 2276 + }, + { + "epoch": 19.49, + "learning_rate": 7.610146862483312e-05, + "loss": 0.0274, + "step": 2280 + }, + { + "epoch": 19.52, + "learning_rate": 7.623497997329774e-05, + "loss": 0.0228, + "step": 2284 + }, + { + "epoch": 19.56, + "learning_rate": 7.636849132176235e-05, + "loss": 0.0242, + "step": 2288 + }, + { + "epoch": 19.59, + "learning_rate": 7.650200267022698e-05, + "loss": 0.0209, + "step": 2292 + }, + { + "epoch": 19.62, + "learning_rate": 7.663551401869158e-05, + "loss": 0.0224, + "step": 2296 + }, + { + "epoch": 19.66, + "learning_rate": 7.676902536715621e-05, + "loss": 0.0221, + "step": 2300 + }, + { + "epoch": 19.69, + "learning_rate": 7.690253671562083e-05, + "loss": 0.0209, + "step": 2304 + }, + { + "epoch": 19.73, + "learning_rate": 7.703604806408545e-05, + "loss": 0.021, + "step": 2308 + }, + { + "epoch": 19.76, + "learning_rate": 7.716955941255008e-05, + "loss": 0.023, + "step": 2312 + }, + { + "epoch": 19.79, + "learning_rate": 7.73030707610147e-05, + "loss": 0.0221, + "step": 2316 + }, + { + "epoch": 19.83, + "learning_rate": 7.743658210947931e-05, + "loss": 0.0222, + "step": 2320 + }, + { + "epoch": 19.86, + "learning_rate": 7.757009345794393e-05, + "loss": 0.0233, + "step": 2324 + }, + { + "epoch": 19.9, + "learning_rate": 7.770360480640855e-05, + "loss": 0.0213, + "step": 2328 + }, + { + "epoch": 19.93, + "learning_rate": 7.783711615487318e-05, + "loss": 0.0216, + "step": 2332 + }, + { + "epoch": 19.97, + "learning_rate": 7.797062750333778e-05, + "loss": 0.0225, + "step": 2336 + }, + { + "epoch": 20.0, + "learning_rate": 7.810413885180241e-05, + "loss": 0.0263, + "step": 2340 + }, + { + "epoch": 20.03, + "learning_rate": 7.823765020026703e-05, + "loss": 0.019, + "step": 2344 + }, + { + "epoch": 20.07, + "learning_rate": 7.837116154873164e-05, + "loss": 0.0232, + "step": 2348 + }, + { + "epoch": 20.1, + "learning_rate": 7.850467289719626e-05, + "loss": 0.0274, + "step": 2352 + }, + { + "epoch": 20.14, + "learning_rate": 7.863818424566088e-05, + "loss": 0.0227, + "step": 2356 + }, + { + "epoch": 20.17, + "learning_rate": 7.877169559412551e-05, + "loss": 0.0229, + "step": 2360 + }, + { + "epoch": 20.21, + "learning_rate": 7.890520694259013e-05, + "loss": 0.0222, + "step": 2364 + }, + { + "epoch": 20.24, + "learning_rate": 7.903871829105475e-05, + "loss": 0.0241, + "step": 2368 + }, + { + "epoch": 20.27, + "learning_rate": 7.917222963951936e-05, + "loss": 0.0236, + "step": 2372 + }, + { + "epoch": 20.31, + "learning_rate": 7.930574098798398e-05, + "loss": 0.0204, + "step": 2376 + }, + { + "epoch": 20.34, + "learning_rate": 7.94392523364486e-05, + "loss": 0.0221, + "step": 2380 + }, + { + "epoch": 20.38, + "learning_rate": 7.957276368491322e-05, + "loss": 0.0165, + "step": 2384 + }, + { + "epoch": 20.41, + "learning_rate": 7.970627503337784e-05, + "loss": 0.0171, + "step": 2388 + }, + { + "epoch": 20.44, + "learning_rate": 7.983978638184246e-05, + "loss": 0.0206, + "step": 2392 + }, + { + "epoch": 20.48, + "learning_rate": 7.997329773030708e-05, + "loss": 0.0207, + "step": 2396 + }, + { + "epoch": 20.51, + "learning_rate": 8.01068090787717e-05, + "loss": 0.0176, + "step": 2400 + }, + { + "epoch": 20.55, + "learning_rate": 8.024032042723632e-05, + "loss": 0.0198, + "step": 2404 + }, + { + "epoch": 20.58, + "learning_rate": 8.037383177570094e-05, + "loss": 0.0249, + "step": 2408 + }, + { + "epoch": 20.62, + "learning_rate": 8.050734312416556e-05, + "loss": 0.02, + "step": 2412 + }, + { + "epoch": 20.65, + "learning_rate": 8.064085447263018e-05, + "loss": 0.0216, + "step": 2416 + }, + { + "epoch": 20.68, + "learning_rate": 8.07743658210948e-05, + "loss": 0.0259, + "step": 2420 + }, + { + "epoch": 20.72, + "learning_rate": 8.090787716955942e-05, + "loss": 0.0226, + "step": 2424 + }, + { + "epoch": 20.75, + "learning_rate": 8.104138851802403e-05, + "loss": 0.025, + "step": 2428 + }, + { + "epoch": 20.79, + "learning_rate": 8.117489986648865e-05, + "loss": 0.0201, + "step": 2432 + }, + { + "epoch": 20.82, + "learning_rate": 8.130841121495327e-05, + "loss": 0.0225, + "step": 2436 + }, + { + "epoch": 20.85, + "learning_rate": 8.14419225634179e-05, + "loss": 0.0217, + "step": 2440 + }, + { + "epoch": 20.89, + "learning_rate": 8.157543391188252e-05, + "loss": 0.0217, + "step": 2444 + }, + { + "epoch": 20.92, + "learning_rate": 8.170894526034714e-05, + "loss": 0.0199, + "step": 2448 + }, + { + "epoch": 20.96, + "learning_rate": 8.184245660881176e-05, + "loss": 0.0194, + "step": 2452 + }, + { + "epoch": 20.99, + "learning_rate": 8.197596795727637e-05, + "loss": 0.0223, + "step": 2456 + }, + { + "epoch": 21.03, + "learning_rate": 8.2109479305741e-05, + "loss": 0.0181, + "step": 2460 + }, + { + "epoch": 21.06, + "learning_rate": 8.22429906542056e-05, + "loss": 0.0187, + "step": 2464 + }, + { + "epoch": 21.09, + "learning_rate": 8.237650200267023e-05, + "loss": 0.0177, + "step": 2468 + }, + { + "epoch": 21.13, + "learning_rate": 8.251001335113485e-05, + "loss": 0.0204, + "step": 2472 + }, + { + "epoch": 21.16, + "learning_rate": 8.264352469959947e-05, + "loss": 0.0186, + "step": 2476 + }, + { + "epoch": 21.2, + "learning_rate": 8.27770360480641e-05, + "loss": 0.02, + "step": 2480 + }, + { + "epoch": 21.23, + "learning_rate": 8.29105473965287e-05, + "loss": 0.0203, + "step": 2484 + }, + { + "epoch": 21.26, + "learning_rate": 8.304405874499333e-05, + "loss": 0.0219, + "step": 2488 + }, + { + "epoch": 21.3, + "learning_rate": 8.317757009345795e-05, + "loss": 0.0197, + "step": 2492 + }, + { + "epoch": 21.33, + "learning_rate": 8.331108144192257e-05, + "loss": 0.0167, + "step": 2496 + }, + { + "epoch": 21.37, + "learning_rate": 8.34445927903872e-05, + "loss": 0.0163, + "step": 2500 + }, + { + "epoch": 21.4, + "learning_rate": 8.35781041388518e-05, + "loss": 0.0157, + "step": 2504 + }, + { + "epoch": 21.44, + "learning_rate": 8.371161548731643e-05, + "loss": 0.0235, + "step": 2508 + }, + { + "epoch": 21.47, + "learning_rate": 8.384512683578104e-05, + "loss": 0.0212, + "step": 2512 + }, + { + "epoch": 21.5, + "learning_rate": 8.397863818424566e-05, + "loss": 0.0187, + "step": 2516 + }, + { + "epoch": 21.54, + "learning_rate": 8.411214953271028e-05, + "loss": 0.015, + "step": 2520 + }, + { + "epoch": 21.57, + "learning_rate": 8.42456608811749e-05, + "loss": 0.0163, + "step": 2524 + }, + { + "epoch": 21.61, + "learning_rate": 8.437917222963953e-05, + "loss": 0.0167, + "step": 2528 + }, + { + "epoch": 21.64, + "learning_rate": 8.451268357810415e-05, + "loss": 0.019, + "step": 2532 + }, + { + "epoch": 21.68, + "learning_rate": 8.464619492656877e-05, + "loss": 0.017, + "step": 2536 + }, + { + "epoch": 21.71, + "learning_rate": 8.477970627503338e-05, + "loss": 0.0211, + "step": 2540 + }, + { + "epoch": 21.74, + "learning_rate": 8.4913217623498e-05, + "loss": 0.0154, + "step": 2544 + }, + { + "epoch": 21.78, + "learning_rate": 8.504672897196261e-05, + "loss": 0.0236, + "step": 2548 + }, + { + "epoch": 21.81, + "learning_rate": 8.518024032042724e-05, + "loss": 0.0182, + "step": 2552 + }, + { + "epoch": 21.85, + "learning_rate": 8.531375166889186e-05, + "loss": 0.0207, + "step": 2556 + }, + { + "epoch": 21.88, + "learning_rate": 8.544726301735648e-05, + "loss": 0.0171, + "step": 2560 + }, + { + "epoch": 21.91, + "learning_rate": 8.55807743658211e-05, + "loss": 0.0187, + "step": 2564 + }, + { + "epoch": 21.95, + "learning_rate": 8.571428571428571e-05, + "loss": 0.0175, + "step": 2568 + }, + { + "epoch": 21.98, + "learning_rate": 8.584779706275034e-05, + "loss": 0.0176, + "step": 2572 + }, + { + "epoch": 22.02, + "learning_rate": 8.598130841121496e-05, + "loss": 0.0159, + "step": 2576 + }, + { + "epoch": 22.05, + "learning_rate": 8.611481975967958e-05, + "loss": 0.0172, + "step": 2580 + }, + { + "epoch": 22.09, + "learning_rate": 8.62483311081442e-05, + "loss": 0.017, + "step": 2584 + }, + { + "epoch": 22.12, + "learning_rate": 8.638184245660881e-05, + "loss": 0.0187, + "step": 2588 + }, + { + "epoch": 22.15, + "learning_rate": 8.651535380507344e-05, + "loss": 0.0171, + "step": 2592 + }, + { + "epoch": 22.19, + "learning_rate": 8.664886515353804e-05, + "loss": 0.0175, + "step": 2596 + }, + { + "epoch": 22.22, + "learning_rate": 8.678237650200267e-05, + "loss": 0.0168, + "step": 2600 + }, + { + "epoch": 22.26, + "learning_rate": 8.691588785046729e-05, + "loss": 0.0182, + "step": 2604 + }, + { + "epoch": 22.29, + "learning_rate": 8.704939919893191e-05, + "loss": 0.0211, + "step": 2608 + }, + { + "epoch": 22.32, + "learning_rate": 8.718291054739654e-05, + "loss": 0.0226, + "step": 2612 + }, + { + "epoch": 22.36, + "learning_rate": 8.731642189586116e-05, + "loss": 0.0118, + "step": 2616 + }, + { + "epoch": 22.39, + "learning_rate": 8.744993324432578e-05, + "loss": 0.0179, + "step": 2620 + }, + { + "epoch": 22.43, + "learning_rate": 8.758344459279039e-05, + "loss": 0.0161, + "step": 2624 + }, + { + "epoch": 22.46, + "learning_rate": 8.771695594125501e-05, + "loss": 0.016, + "step": 2628 + }, + { + "epoch": 22.5, + "learning_rate": 8.785046728971964e-05, + "loss": 0.0194, + "step": 2632 + }, + { + "epoch": 22.53, + "learning_rate": 8.798397863818424e-05, + "loss": 0.0161, + "step": 2636 + }, + { + "epoch": 22.56, + "learning_rate": 8.811748998664887e-05, + "loss": 0.0177, + "step": 2640 + }, + { + "epoch": 22.6, + "learning_rate": 8.825100133511349e-05, + "loss": 0.0165, + "step": 2644 + }, + { + "epoch": 22.63, + "learning_rate": 8.838451268357811e-05, + "loss": 0.0147, + "step": 2648 + }, + { + "epoch": 22.67, + "learning_rate": 8.851802403204272e-05, + "loss": 0.0148, + "step": 2652 + }, + { + "epoch": 22.7, + "learning_rate": 8.865153538050734e-05, + "loss": 0.0209, + "step": 2656 + }, + { + "epoch": 22.74, + "learning_rate": 8.878504672897197e-05, + "loss": 0.0135, + "step": 2660 + }, + { + "epoch": 22.77, + "learning_rate": 8.891855807743659e-05, + "loss": 0.0192, + "step": 2664 + }, + { + "epoch": 22.8, + "learning_rate": 8.905206942590121e-05, + "loss": 0.0183, + "step": 2668 + }, + { + "epoch": 22.84, + "learning_rate": 8.918558077436584e-05, + "loss": 0.019, + "step": 2672 + }, + { + "epoch": 22.87, + "learning_rate": 8.931909212283044e-05, + "loss": 0.0151, + "step": 2676 + }, + { + "epoch": 22.91, + "learning_rate": 8.945260347129505e-05, + "loss": 0.0127, + "step": 2680 + }, + { + "epoch": 22.94, + "learning_rate": 8.958611481975968e-05, + "loss": 0.0169, + "step": 2684 + }, + { + "epoch": 22.97, + "learning_rate": 8.97196261682243e-05, + "loss": 0.0143, + "step": 2688 + }, + { + "epoch": 23.01, + "learning_rate": 8.985313751668892e-05, + "loss": 0.0195, + "step": 2692 + }, + { + "epoch": 23.04, + "learning_rate": 8.998664886515354e-05, + "loss": 0.0151, + "step": 2696 + }, + { + "epoch": 23.08, + "learning_rate": 9.012016021361817e-05, + "loss": 0.0124, + "step": 2700 + }, + { + "epoch": 23.11, + "learning_rate": 9.025367156208279e-05, + "loss": 0.0156, + "step": 2704 + }, + { + "epoch": 23.15, + "learning_rate": 9.03871829105474e-05, + "loss": 0.0157, + "step": 2708 + }, + { + "epoch": 23.18, + "learning_rate": 9.052069425901202e-05, + "loss": 0.0169, + "step": 2712 + }, + { + "epoch": 23.21, + "learning_rate": 9.065420560747664e-05, + "loss": 0.018, + "step": 2716 + }, + { + "epoch": 23.25, + "learning_rate": 9.078771695594125e-05, + "loss": 0.0156, + "step": 2720 + }, + { + "epoch": 23.28, + "learning_rate": 9.092122830440588e-05, + "loss": 0.0147, + "step": 2724 + }, + { + "epoch": 23.32, + "learning_rate": 9.10547396528705e-05, + "loss": 0.0145, + "step": 2728 + }, + { + "epoch": 23.35, + "learning_rate": 9.118825100133512e-05, + "loss": 0.0122, + "step": 2732 + }, + { + "epoch": 23.38, + "learning_rate": 9.132176234979973e-05, + "loss": 0.0126, + "step": 2736 + }, + { + "epoch": 23.42, + "learning_rate": 9.145527369826435e-05, + "loss": 0.0147, + "step": 2740 + }, + { + "epoch": 23.45, + "learning_rate": 9.158878504672898e-05, + "loss": 0.0209, + "step": 2744 + }, + { + "epoch": 23.49, + "learning_rate": 9.17222963951936e-05, + "loss": 0.0159, + "step": 2748 + }, + { + "epoch": 23.52, + "learning_rate": 9.185580774365822e-05, + "loss": 0.0116, + "step": 2752 + }, + { + "epoch": 23.56, + "learning_rate": 9.198931909212284e-05, + "loss": 0.0136, + "step": 2756 + }, + { + "epoch": 23.59, + "learning_rate": 9.212283044058745e-05, + "loss": 0.016, + "step": 2760 + }, + { + "epoch": 23.62, + "learning_rate": 9.225634178905206e-05, + "loss": 0.0152, + "step": 2764 + }, + { + "epoch": 23.66, + "learning_rate": 9.238985313751669e-05, + "loss": 0.0125, + "step": 2768 + }, + { + "epoch": 23.69, + "learning_rate": 9.252336448598131e-05, + "loss": 0.0166, + "step": 2772 + }, + { + "epoch": 23.73, + "learning_rate": 9.265687583444593e-05, + "loss": 0.013, + "step": 2776 + }, + { + "epoch": 23.76, + "learning_rate": 9.279038718291055e-05, + "loss": 0.0162, + "step": 2780 + }, + { + "epoch": 23.79, + "learning_rate": 9.292389853137518e-05, + "loss": 0.0128, + "step": 2784 + }, + { + "epoch": 23.83, + "learning_rate": 9.305740987983979e-05, + "loss": 0.0104, + "step": 2788 + }, + { + "epoch": 23.86, + "learning_rate": 9.319092122830441e-05, + "loss": 0.0143, + "step": 2792 + }, + { + "epoch": 23.9, + "learning_rate": 9.332443257676903e-05, + "loss": 0.0143, + "step": 2796 + }, + { + "epoch": 23.93, + "learning_rate": 9.345794392523365e-05, + "loss": 0.0162, + "step": 2800 + }, + { + "epoch": 23.97, + "learning_rate": 9.359145527369826e-05, + "loss": 0.0167, + "step": 2804 + }, + { + "epoch": 24.0, + "learning_rate": 9.372496662216289e-05, + "loss": 0.0143, + "step": 2808 + }, + { + "epoch": 24.03, + "learning_rate": 9.385847797062751e-05, + "loss": 0.015, + "step": 2812 + }, + { + "epoch": 24.07, + "learning_rate": 9.399198931909212e-05, + "loss": 0.012, + "step": 2816 + }, + { + "epoch": 24.1, + "learning_rate": 9.412550066755674e-05, + "loss": 0.0107, + "step": 2820 + }, + { + "epoch": 24.14, + "learning_rate": 9.425901201602136e-05, + "loss": 0.0114, + "step": 2824 + }, + { + "epoch": 24.17, + "learning_rate": 9.439252336448599e-05, + "loss": 0.0133, + "step": 2828 + }, + { + "epoch": 24.21, + "learning_rate": 9.452603471295061e-05, + "loss": 0.0135, + "step": 2832 + }, + { + "epoch": 24.24, + "learning_rate": 9.465954606141523e-05, + "loss": 0.0157, + "step": 2836 + }, + { + "epoch": 24.27, + "learning_rate": 9.479305740987985e-05, + "loss": 0.0134, + "step": 2840 + }, + { + "epoch": 24.31, + "learning_rate": 9.492656875834446e-05, + "loss": 0.0155, + "step": 2844 + }, + { + "epoch": 24.34, + "learning_rate": 9.506008010680909e-05, + "loss": 0.0098, + "step": 2848 + }, + { + "epoch": 24.38, + "learning_rate": 9.51935914552737e-05, + "loss": 0.0121, + "step": 2852 + }, + { + "epoch": 24.41, + "learning_rate": 9.532710280373832e-05, + "loss": 0.0147, + "step": 2856 + }, + { + "epoch": 24.44, + "learning_rate": 9.546061415220294e-05, + "loss": 0.0128, + "step": 2860 + }, + { + "epoch": 24.48, + "learning_rate": 9.559412550066756e-05, + "loss": 0.0153, + "step": 2864 + }, + { + "epoch": 24.51, + "learning_rate": 9.572763684913219e-05, + "loss": 0.0118, + "step": 2868 + }, + { + "epoch": 24.55, + "learning_rate": 9.58611481975968e-05, + "loss": 0.0139, + "step": 2872 + }, + { + "epoch": 24.58, + "learning_rate": 9.599465954606142e-05, + "loss": 0.0115, + "step": 2876 + }, + { + "epoch": 24.62, + "learning_rate": 9.612817089452604e-05, + "loss": 0.0127, + "step": 2880 + }, + { + "epoch": 24.65, + "learning_rate": 9.626168224299066e-05, + "loss": 0.0127, + "step": 2884 + }, + { + "epoch": 24.68, + "learning_rate": 9.639519359145529e-05, + "loss": 0.0121, + "step": 2888 + }, + { + "epoch": 24.72, + "learning_rate": 9.65287049399199e-05, + "loss": 0.011, + "step": 2892 + }, + { + "epoch": 24.75, + "learning_rate": 9.666221628838452e-05, + "loss": 0.0126, + "step": 2896 + }, + { + "epoch": 24.79, + "learning_rate": 9.679572763684913e-05, + "loss": 0.0112, + "step": 2900 + }, + { + "epoch": 24.82, + "learning_rate": 9.692923898531375e-05, + "loss": 0.0152, + "step": 2904 + }, + { + "epoch": 24.85, + "learning_rate": 9.706275033377837e-05, + "loss": 0.014, + "step": 2908 + }, + { + "epoch": 24.89, + "learning_rate": 9.7196261682243e-05, + "loss": 0.013, + "step": 2912 + }, + { + "epoch": 24.92, + "learning_rate": 9.732977303070762e-05, + "loss": 0.0113, + "step": 2916 + }, + { + "epoch": 24.96, + "learning_rate": 9.746328437917224e-05, + "loss": 0.0125, + "step": 2920 + }, + { + "epoch": 24.99, + "learning_rate": 9.759679572763686e-05, + "loss": 0.012, + "step": 2924 + }, + { + "epoch": 25.03, + "learning_rate": 9.773030707610147e-05, + "loss": 0.0127, + "step": 2928 + }, + { + "epoch": 25.06, + "learning_rate": 9.78638184245661e-05, + "loss": 0.0107, + "step": 2932 + }, + { + "epoch": 25.09, + "learning_rate": 9.79973297730307e-05, + "loss": 0.0094, + "step": 2936 + }, + { + "epoch": 25.13, + "learning_rate": 9.813084112149533e-05, + "loss": 0.0148, + "step": 2940 + }, + { + "epoch": 25.16, + "learning_rate": 9.826435246995995e-05, + "loss": 0.0133, + "step": 2944 + }, + { + "epoch": 25.2, + "learning_rate": 9.839786381842457e-05, + "loss": 0.0105, + "step": 2948 + }, + { + "epoch": 25.23, + "learning_rate": 9.85313751668892e-05, + "loss": 0.0114, + "step": 2952 + }, + { + "epoch": 25.26, + "learning_rate": 9.86648865153538e-05, + "loss": 0.0133, + "step": 2956 + }, + { + "epoch": 25.3, + "learning_rate": 9.879839786381843e-05, + "loss": 0.01, + "step": 2960 + }, + { + "epoch": 25.33, + "learning_rate": 9.893190921228305e-05, + "loss": 0.0107, + "step": 2964 + }, + { + "epoch": 25.37, + "learning_rate": 9.906542056074767e-05, + "loss": 0.0138, + "step": 2968 + }, + { + "epoch": 25.4, + "learning_rate": 9.91989319092123e-05, + "loss": 0.0114, + "step": 2972 + }, + { + "epoch": 25.44, + "learning_rate": 9.93324432576769e-05, + "loss": 0.0128, + "step": 2976 + }, + { + "epoch": 25.47, + "learning_rate": 9.946595460614153e-05, + "loss": 0.0095, + "step": 2980 + }, + { + "epoch": 25.5, + "learning_rate": 9.959946595460614e-05, + "loss": 0.0113, + "step": 2984 + }, + { + "epoch": 25.54, + "learning_rate": 9.973297730307076e-05, + "loss": 0.0093, + "step": 2988 + }, + { + "epoch": 25.57, + "learning_rate": 9.986648865153538e-05, + "loss": 0.0118, + "step": 2992 + }, + { + "epoch": 25.61, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 2996 + }, + { + "epoch": 25.64, + "learning_rate": 9.99999945668858e-05, + "loss": 0.0147, + "step": 3000 + }, + { + "epoch": 25.64, + "eval_exact_match": 0.4625779625779626, + "eval_loss": 0.6119692325592041, + "eval_runtime": 85.6703, + "eval_samples_per_second": 11.229, + "step": 3000 + }, + { + "epoch": 25.68, + "learning_rate": 9.99999782675444e-05, + "loss": 0.0108, + "step": 3004 + }, + { + "epoch": 25.71, + "learning_rate": 9.999995110197932e-05, + "loss": 0.0127, + "step": 3008 + }, + { + "epoch": 25.74, + "learning_rate": 9.999991307019647e-05, + "loss": 0.0102, + "step": 3012 + }, + { + "epoch": 25.78, + "learning_rate": 9.999986417220411e-05, + "loss": 0.0167, + "step": 3016 + }, + { + "epoch": 25.81, + "learning_rate": 9.999980440801289e-05, + "loss": 0.0135, + "step": 3020 + }, + { + "epoch": 25.85, + "learning_rate": 9.999973377763576e-05, + "loss": 0.0114, + "step": 3024 + }, + { + "epoch": 25.88, + "learning_rate": 9.999965228108811e-05, + "loss": 0.0118, + "step": 3028 + }, + { + "epoch": 25.91, + "learning_rate": 9.999955991838763e-05, + "loss": 0.0123, + "step": 3032 + }, + { + "epoch": 25.95, + "learning_rate": 9.99994566895544e-05, + "loss": 0.0136, + "step": 3036 + }, + { + "epoch": 25.98, + "learning_rate": 9.999934259461086e-05, + "loss": 0.0105, + "step": 3040 + }, + { + "epoch": 26.02, + "learning_rate": 9.999921763358177e-05, + "loss": 0.0088, + "step": 3044 + }, + { + "epoch": 26.05, + "learning_rate": 9.999908180649433e-05, + "loss": 0.0111, + "step": 3048 + }, + { + "epoch": 26.09, + "learning_rate": 9.999893511337803e-05, + "loss": 0.0089, + "step": 3052 + }, + { + "epoch": 26.12, + "learning_rate": 9.999877755426476e-05, + "loss": 0.0081, + "step": 3056 + }, + { + "epoch": 26.15, + "learning_rate": 9.99986091291888e-05, + "loss": 0.0105, + "step": 3060 + }, + { + "epoch": 26.19, + "learning_rate": 9.999842983818668e-05, + "loss": 0.0115, + "step": 3064 + }, + { + "epoch": 26.22, + "learning_rate": 9.99982396812974e-05, + "loss": 0.0082, + "step": 3068 + }, + { + "epoch": 26.26, + "learning_rate": 9.99980386585623e-05, + "loss": 0.0141, + "step": 3072 + }, + { + "epoch": 26.29, + "learning_rate": 9.999782677002505e-05, + "loss": 0.0092, + "step": 3076 + }, + { + "epoch": 26.32, + "learning_rate": 9.999760401573169e-05, + "loss": 0.0117, + "step": 3080 + }, + { + "epoch": 26.36, + "learning_rate": 9.999737039573065e-05, + "loss": 0.0092, + "step": 3084 + }, + { + "epoch": 26.39, + "learning_rate": 9.99971259100727e-05, + "loss": 0.011, + "step": 3088 + }, + { + "epoch": 26.43, + "learning_rate": 9.999687055881095e-05, + "loss": 0.0115, + "step": 3092 + }, + { + "epoch": 26.46, + "learning_rate": 9.999660434200093e-05, + "loss": 0.007, + "step": 3096 + }, + { + "epoch": 26.5, + "learning_rate": 9.999632725970047e-05, + "loss": 0.0127, + "step": 3100 + }, + { + "epoch": 26.53, + "learning_rate": 9.999603931196979e-05, + "loss": 0.0122, + "step": 3104 + }, + { + "epoch": 26.56, + "learning_rate": 9.999574049887146e-05, + "loss": 0.0111, + "step": 3108 + }, + { + "epoch": 26.6, + "learning_rate": 9.999543082047044e-05, + "loss": 0.0138, + "step": 3112 + }, + { + "epoch": 26.63, + "learning_rate": 9.999511027683402e-05, + "loss": 0.0079, + "step": 3116 + }, + { + "epoch": 26.67, + "learning_rate": 9.999477886803186e-05, + "loss": 0.0105, + "step": 3120 + }, + { + "epoch": 26.7, + "learning_rate": 9.999443659413598e-05, + "loss": 0.0112, + "step": 3124 + }, + { + "epoch": 26.74, + "learning_rate": 9.99940834552208e-05, + "loss": 0.0124, + "step": 3128 + }, + { + "epoch": 26.77, + "learning_rate": 9.9993719451363e-05, + "loss": 0.0089, + "step": 3132 + }, + { + "epoch": 26.8, + "learning_rate": 9.999334458264173e-05, + "loss": 0.0087, + "step": 3136 + }, + { + "epoch": 26.84, + "learning_rate": 9.999295884913846e-05, + "loss": 0.0126, + "step": 3140 + }, + { + "epoch": 26.87, + "learning_rate": 9.9992562250937e-05, + "loss": 0.009, + "step": 3144 + }, + { + "epoch": 26.91, + "learning_rate": 9.999215478812358e-05, + "loss": 0.011, + "step": 3148 + }, + { + "epoch": 26.94, + "learning_rate": 9.99917364607867e-05, + "loss": 0.0114, + "step": 3152 + }, + { + "epoch": 26.97, + "learning_rate": 9.999130726901729e-05, + "loss": 0.0098, + "step": 3156 + }, + { + "epoch": 27.01, + "learning_rate": 9.999086721290864e-05, + "loss": 0.0096, + "step": 3160 + }, + { + "epoch": 27.04, + "learning_rate": 9.999041629255637e-05, + "loss": 0.0122, + "step": 3164 + }, + { + "epoch": 27.08, + "learning_rate": 9.99899545080585e-05, + "loss": 0.0073, + "step": 3168 + }, + { + "epoch": 27.11, + "learning_rate": 9.998948185951535e-05, + "loss": 0.0094, + "step": 3172 + }, + { + "epoch": 27.15, + "learning_rate": 9.998899834702964e-05, + "loss": 0.008, + "step": 3176 + }, + { + "epoch": 27.18, + "learning_rate": 9.99885039707065e-05, + "loss": 0.0086, + "step": 3180 + }, + { + "epoch": 27.21, + "learning_rate": 9.99879987306533e-05, + "loss": 0.0122, + "step": 3184 + }, + { + "epoch": 27.25, + "learning_rate": 9.99874826269799e-05, + "loss": 0.0103, + "step": 3188 + }, + { + "epoch": 27.28, + "learning_rate": 9.998695565979844e-05, + "loss": 0.0085, + "step": 3192 + }, + { + "epoch": 27.32, + "learning_rate": 9.998641782922342e-05, + "loss": 0.0083, + "step": 3196 + }, + { + "epoch": 27.35, + "learning_rate": 9.998586913537177e-05, + "loss": 0.0111, + "step": 3200 + }, + { + "epoch": 27.38, + "learning_rate": 9.99853095783627e-05, + "loss": 0.0077, + "step": 3204 + }, + { + "epoch": 27.42, + "learning_rate": 9.99847391583178e-05, + "loss": 0.012, + "step": 3208 + }, + { + "epoch": 27.45, + "learning_rate": 9.998415787536111e-05, + "loss": 0.0117, + "step": 3212 + }, + { + "epoch": 27.49, + "learning_rate": 9.998356572961887e-05, + "loss": 0.0099, + "step": 3216 + }, + { + "epoch": 27.52, + "learning_rate": 9.998296272121983e-05, + "loss": 0.0098, + "step": 3220 + }, + { + "epoch": 27.56, + "learning_rate": 9.998234885029501e-05, + "loss": 0.0066, + "step": 3224 + }, + { + "epoch": 27.59, + "learning_rate": 9.998172411697781e-05, + "loss": 0.0109, + "step": 3228 + }, + { + "epoch": 27.62, + "learning_rate": 9.998108852140402e-05, + "loss": 0.0069, + "step": 3232 + }, + { + "epoch": 27.66, + "learning_rate": 9.998044206371177e-05, + "loss": 0.0079, + "step": 3236 + }, + { + "epoch": 27.69, + "learning_rate": 9.997978474404154e-05, + "loss": 0.0117, + "step": 3240 + }, + { + "epoch": 27.73, + "learning_rate": 9.997911656253618e-05, + "loss": 0.009, + "step": 3244 + }, + { + "epoch": 27.76, + "learning_rate": 9.997843751934093e-05, + "loss": 0.0104, + "step": 3248 + }, + { + "epoch": 27.79, + "learning_rate": 9.997774761460332e-05, + "loss": 0.0116, + "step": 3252 + }, + { + "epoch": 27.83, + "learning_rate": 9.997704684847332e-05, + "loss": 0.0079, + "step": 3256 + }, + { + "epoch": 27.86, + "learning_rate": 9.997633522110322e-05, + "loss": 0.0105, + "step": 3260 + }, + { + "epoch": 27.9, + "learning_rate": 9.997561273264764e-05, + "loss": 0.0091, + "step": 3264 + }, + { + "epoch": 27.93, + "learning_rate": 9.997487938326362e-05, + "loss": 0.0069, + "step": 3268 + }, + { + "epoch": 27.97, + "learning_rate": 9.997413517311055e-05, + "loss": 0.0114, + "step": 3272 + }, + { + "epoch": 28.0, + "learning_rate": 9.997338010235013e-05, + "loss": 0.0084, + "step": 3276 + }, + { + "epoch": 28.03, + "learning_rate": 9.99726141711465e-05, + "loss": 0.0074, + "step": 3280 + }, + { + "epoch": 28.07, + "learning_rate": 9.997183737966606e-05, + "loss": 0.0082, + "step": 3284 + }, + { + "epoch": 28.1, + "learning_rate": 9.997104972807768e-05, + "loss": 0.0112, + "step": 3288 + }, + { + "epoch": 28.14, + "learning_rate": 9.997025121655248e-05, + "loss": 0.0125, + "step": 3292 + }, + { + "epoch": 28.17, + "learning_rate": 9.996944184526405e-05, + "loss": 0.0116, + "step": 3296 + }, + { + "epoch": 28.21, + "learning_rate": 9.996862161438825e-05, + "loss": 0.0083, + "step": 3300 + }, + { + "epoch": 28.24, + "learning_rate": 9.996779052410337e-05, + "loss": 0.0099, + "step": 3304 + }, + { + "epoch": 28.27, + "learning_rate": 9.996694857459e-05, + "loss": 0.0095, + "step": 3308 + }, + { + "epoch": 28.31, + "learning_rate": 9.99660957660311e-05, + "loss": 0.0057, + "step": 3312 + }, + { + "epoch": 28.34, + "learning_rate": 9.996523209861204e-05, + "loss": 0.0047, + "step": 3316 + }, + { + "epoch": 28.38, + "learning_rate": 9.996435757252052e-05, + "loss": 0.0108, + "step": 3320 + }, + { + "epoch": 28.41, + "learning_rate": 9.996347218794656e-05, + "loss": 0.0081, + "step": 3324 + }, + { + "epoch": 28.44, + "learning_rate": 9.99625759450826e-05, + "loss": 0.0095, + "step": 3328 + }, + { + "epoch": 28.48, + "learning_rate": 9.996166884412342e-05, + "loss": 0.0069, + "step": 3332 + }, + { + "epoch": 28.51, + "learning_rate": 9.996075088526615e-05, + "loss": 0.0106, + "step": 3336 + }, + { + "epoch": 28.55, + "learning_rate": 9.995982206871029e-05, + "loss": 0.009, + "step": 3340 + }, + { + "epoch": 28.58, + "learning_rate": 9.995888239465768e-05, + "loss": 0.0081, + "step": 3344 + }, + { + "epoch": 28.62, + "learning_rate": 9.995793186331253e-05, + "loss": 0.0075, + "step": 3348 + }, + { + "epoch": 28.65, + "learning_rate": 9.995697047488142e-05, + "loss": 0.0068, + "step": 3352 + }, + { + "epoch": 28.68, + "learning_rate": 9.99559982295733e-05, + "loss": 0.0093, + "step": 3356 + }, + { + "epoch": 28.72, + "learning_rate": 9.995501512759946e-05, + "loss": 0.0068, + "step": 3360 + }, + { + "epoch": 28.75, + "learning_rate": 9.995402116917352e-05, + "loss": 0.0098, + "step": 3364 + }, + { + "epoch": 28.79, + "learning_rate": 9.995301635451155e-05, + "loss": 0.0096, + "step": 3368 + }, + { + "epoch": 28.82, + "learning_rate": 9.995200068383186e-05, + "loss": 0.0102, + "step": 3372 + }, + { + "epoch": 28.85, + "learning_rate": 9.995097415735521e-05, + "loss": 0.0081, + "step": 3376 + }, + { + "epoch": 28.89, + "learning_rate": 9.994993677530468e-05, + "loss": 0.0099, + "step": 3380 + }, + { + "epoch": 28.92, + "learning_rate": 9.994888853790574e-05, + "loss": 0.0122, + "step": 3384 + }, + { + "epoch": 28.96, + "learning_rate": 9.994782944538618e-05, + "loss": 0.0105, + "step": 3388 + }, + { + "epoch": 28.99, + "learning_rate": 9.994675949797616e-05, + "loss": 0.007, + "step": 3392 + }, + { + "epoch": 29.03, + "learning_rate": 9.994567869590822e-05, + "loss": 0.0078, + "step": 3396 + }, + { + "epoch": 29.06, + "learning_rate": 9.994458703941723e-05, + "loss": 0.0104, + "step": 3400 + }, + { + "epoch": 29.09, + "learning_rate": 9.994348452874046e-05, + "loss": 0.0082, + "step": 3404 + }, + { + "epoch": 29.13, + "learning_rate": 9.99423711641175e-05, + "loss": 0.006, + "step": 3408 + }, + { + "epoch": 29.16, + "learning_rate": 9.994124694579028e-05, + "loss": 0.0098, + "step": 3412 + }, + { + "epoch": 29.2, + "learning_rate": 9.994011187400317e-05, + "loss": 0.0094, + "step": 3416 + }, + { + "epoch": 29.23, + "learning_rate": 9.993896594900281e-05, + "loss": 0.0062, + "step": 3420 + }, + { + "epoch": 29.26, + "learning_rate": 9.993780917103825e-05, + "loss": 0.0067, + "step": 3424 + }, + { + "epoch": 29.3, + "learning_rate": 9.993664154036091e-05, + "loss": 0.0069, + "step": 3428 + }, + { + "epoch": 29.33, + "learning_rate": 9.993546305722452e-05, + "loss": 0.0079, + "step": 3432 + }, + { + "epoch": 29.37, + "learning_rate": 9.99342737218852e-05, + "loss": 0.0086, + "step": 3436 + }, + { + "epoch": 29.4, + "learning_rate": 9.993307353460142e-05, + "loss": 0.0068, + "step": 3440 + }, + { + "epoch": 29.44, + "learning_rate": 9.993186249563401e-05, + "loss": 0.0079, + "step": 3444 + }, + { + "epoch": 29.47, + "learning_rate": 9.993064060524615e-05, + "loss": 0.0081, + "step": 3448 + }, + { + "epoch": 29.5, + "learning_rate": 9.992940786370342e-05, + "loss": 0.0072, + "step": 3452 + }, + { + "epoch": 29.54, + "learning_rate": 9.992816427127368e-05, + "loss": 0.006, + "step": 3456 + }, + { + "epoch": 29.57, + "learning_rate": 9.992690982822721e-05, + "loss": 0.0094, + "step": 3460 + }, + { + "epoch": 29.61, + "learning_rate": 9.992564453483667e-05, + "loss": 0.0078, + "step": 3464 + }, + { + "epoch": 29.64, + "learning_rate": 9.992436839137696e-05, + "loss": 0.0045, + "step": 3468 + }, + { + "epoch": 29.68, + "learning_rate": 9.992308139812549e-05, + "loss": 0.0055, + "step": 3472 + }, + { + "epoch": 29.71, + "learning_rate": 9.992178355536192e-05, + "loss": 0.0059, + "step": 3476 + }, + { + "epoch": 29.74, + "learning_rate": 9.99204748633683e-05, + "loss": 0.0087, + "step": 3480 + }, + { + "epoch": 29.78, + "learning_rate": 9.991915532242908e-05, + "loss": 0.0082, + "step": 3484 + }, + { + "epoch": 29.81, + "learning_rate": 9.991782493283097e-05, + "loss": 0.0094, + "step": 3488 + }, + { + "epoch": 29.85, + "learning_rate": 9.991648369486314e-05, + "loss": 0.0077, + "step": 3492 + }, + { + "epoch": 29.88, + "learning_rate": 9.991513160881706e-05, + "loss": 0.008, + "step": 3496 + }, + { + "epoch": 29.91, + "learning_rate": 9.991376867498658e-05, + "loss": 0.0089, + "step": 3500 + }, + { + "epoch": 29.95, + "learning_rate": 9.991239489366788e-05, + "loss": 0.0089, + "step": 3504 + }, + { + "epoch": 29.98, + "learning_rate": 9.991101026515952e-05, + "loss": 0.0076, + "step": 3508 + }, + { + "epoch": 30.02, + "learning_rate": 9.990961478976244e-05, + "loss": 0.0082, + "step": 3512 + }, + { + "epoch": 30.05, + "learning_rate": 9.990820846777988e-05, + "loss": 0.0099, + "step": 3516 + }, + { + "epoch": 30.09, + "learning_rate": 9.990679129951749e-05, + "loss": 0.0075, + "step": 3520 + }, + { + "epoch": 30.12, + "learning_rate": 9.990536328528324e-05, + "loss": 0.0061, + "step": 3524 + }, + { + "epoch": 30.15, + "learning_rate": 9.990392442538747e-05, + "loss": 0.0067, + "step": 3528 + }, + { + "epoch": 30.19, + "learning_rate": 9.990247472014291e-05, + "loss": 0.0074, + "step": 3532 + }, + { + "epoch": 30.22, + "learning_rate": 9.990101416986457e-05, + "loss": 0.0051, + "step": 3536 + }, + { + "epoch": 30.26, + "learning_rate": 9.98995427748699e-05, + "loss": 0.0063, + "step": 3540 + }, + { + "epoch": 30.29, + "learning_rate": 9.989806053547866e-05, + "loss": 0.0062, + "step": 3544 + }, + { + "epoch": 30.32, + "learning_rate": 9.989656745201298e-05, + "loss": 0.0071, + "step": 3548 + }, + { + "epoch": 30.36, + "learning_rate": 9.989506352479734e-05, + "loss": 0.0062, + "step": 3552 + }, + { + "epoch": 30.39, + "learning_rate": 9.989354875415857e-05, + "loss": 0.0066, + "step": 3556 + }, + { + "epoch": 30.43, + "learning_rate": 9.989202314042588e-05, + "loss": 0.0071, + "step": 3560 + }, + { + "epoch": 30.46, + "learning_rate": 9.989048668393082e-05, + "loss": 0.0087, + "step": 3564 + }, + { + "epoch": 30.5, + "learning_rate": 9.988893938500731e-05, + "loss": 0.0075, + "step": 3568 + }, + { + "epoch": 30.53, + "learning_rate": 9.988738124399159e-05, + "loss": 0.0102, + "step": 3572 + }, + { + "epoch": 30.56, + "learning_rate": 9.988581226122231e-05, + "loss": 0.0075, + "step": 3576 + }, + { + "epoch": 30.6, + "learning_rate": 9.988423243704042e-05, + "loss": 0.0095, + "step": 3580 + }, + { + "epoch": 30.63, + "learning_rate": 9.988264177178929e-05, + "loss": 0.0061, + "step": 3584 + }, + { + "epoch": 30.67, + "learning_rate": 9.988104026581458e-05, + "loss": 0.0063, + "step": 3588 + }, + { + "epoch": 30.7, + "learning_rate": 9.987942791946436e-05, + "loss": 0.0057, + "step": 3592 + }, + { + "epoch": 30.74, + "learning_rate": 9.987780473308901e-05, + "loss": 0.0054, + "step": 3596 + }, + { + "epoch": 30.77, + "learning_rate": 9.98761707070413e-05, + "loss": 0.0053, + "step": 3600 + }, + { + "epoch": 30.8, + "learning_rate": 9.987452584167635e-05, + "loss": 0.0096, + "step": 3604 + }, + { + "epoch": 30.84, + "learning_rate": 9.987287013735161e-05, + "loss": 0.0066, + "step": 3608 + }, + { + "epoch": 30.87, + "learning_rate": 9.987120359442693e-05, + "loss": 0.0088, + "step": 3612 + }, + { + "epoch": 30.91, + "learning_rate": 9.986952621326448e-05, + "loss": 0.0064, + "step": 3616 + }, + { + "epoch": 30.94, + "learning_rate": 9.986783799422879e-05, + "loss": 0.0091, + "step": 3620 + }, + { + "epoch": 30.97, + "learning_rate": 9.986613893768675e-05, + "loss": 0.0064, + "step": 3624 + }, + { + "epoch": 31.01, + "learning_rate": 9.986442904400763e-05, + "loss": 0.0065, + "step": 3628 + }, + { + "epoch": 31.04, + "learning_rate": 9.9862708313563e-05, + "loss": 0.0055, + "step": 3632 + }, + { + "epoch": 31.08, + "learning_rate": 9.986097674672683e-05, + "loss": 0.0041, + "step": 3636 + }, + { + "epoch": 31.11, + "learning_rate": 9.985923434387545e-05, + "loss": 0.0072, + "step": 3640 + }, + { + "epoch": 31.15, + "learning_rate": 9.98574811053875e-05, + "loss": 0.0084, + "step": 3644 + }, + { + "epoch": 31.18, + "learning_rate": 9.985571703164402e-05, + "loss": 0.0074, + "step": 3648 + }, + { + "epoch": 31.21, + "learning_rate": 9.985394212302836e-05, + "loss": 0.0073, + "step": 3652 + }, + { + "epoch": 31.25, + "learning_rate": 9.98521563799263e-05, + "loss": 0.0059, + "step": 3656 + }, + { + "epoch": 31.28, + "learning_rate": 9.985035980272588e-05, + "loss": 0.0065, + "step": 3660 + }, + { + "epoch": 31.32, + "learning_rate": 9.984855239181755e-05, + "loss": 0.0058, + "step": 3664 + }, + { + "epoch": 31.35, + "learning_rate": 9.984673414759411e-05, + "loss": 0.0069, + "step": 3668 + }, + { + "epoch": 31.38, + "learning_rate": 9.984490507045073e-05, + "loss": 0.0068, + "step": 3672 + }, + { + "epoch": 31.42, + "learning_rate": 9.984306516078488e-05, + "loss": 0.006, + "step": 3676 + }, + { + "epoch": 31.45, + "learning_rate": 9.984121441899644e-05, + "loss": 0.0071, + "step": 3680 + }, + { + "epoch": 31.49, + "learning_rate": 9.983935284548761e-05, + "loss": 0.0052, + "step": 3684 + }, + { + "epoch": 31.52, + "learning_rate": 9.983748044066296e-05, + "loss": 0.0055, + "step": 3688 + }, + { + "epoch": 31.56, + "learning_rate": 9.983559720492941e-05, + "loss": 0.0065, + "step": 3692 + }, + { + "epoch": 31.59, + "learning_rate": 9.983370313869622e-05, + "loss": 0.0063, + "step": 3696 + }, + { + "epoch": 31.62, + "learning_rate": 9.983179824237505e-05, + "loss": 0.0045, + "step": 3700 + }, + { + "epoch": 31.66, + "learning_rate": 9.982988251637983e-05, + "loss": 0.0054, + "step": 3704 + }, + { + "epoch": 31.69, + "learning_rate": 9.982795596112695e-05, + "loss": 0.0062, + "step": 3708 + }, + { + "epoch": 31.73, + "learning_rate": 9.982601857703507e-05, + "loss": 0.009, + "step": 3712 + }, + { + "epoch": 31.76, + "learning_rate": 9.982407036452523e-05, + "loss": 0.0089, + "step": 3716 + }, + { + "epoch": 31.79, + "learning_rate": 9.982211132402082e-05, + "loss": 0.0065, + "step": 3720 + }, + { + "epoch": 31.83, + "learning_rate": 9.98201414559476e-05, + "loss": 0.0053, + "step": 3724 + }, + { + "epoch": 31.86, + "learning_rate": 9.981816076073368e-05, + "loss": 0.0073, + "step": 3728 + }, + { + "epoch": 31.9, + "learning_rate": 9.981616923880947e-05, + "loss": 0.0063, + "step": 3732 + }, + { + "epoch": 31.93, + "learning_rate": 9.981416689060784e-05, + "loss": 0.0052, + "step": 3736 + }, + { + "epoch": 31.97, + "learning_rate": 9.981215371656388e-05, + "loss": 0.0062, + "step": 3740 + }, + { + "epoch": 32.0, + "learning_rate": 9.981012971711516e-05, + "loss": 0.0058, + "step": 3744 + }, + { + "epoch": 32.03, + "learning_rate": 9.980809489270152e-05, + "loss": 0.0065, + "step": 3748 + }, + { + "epoch": 32.07, + "learning_rate": 9.980604924376518e-05, + "loss": 0.0044, + "step": 3752 + }, + { + "epoch": 32.1, + "learning_rate": 9.98039927707507e-05, + "loss": 0.0059, + "step": 3756 + }, + { + "epoch": 32.14, + "learning_rate": 9.980192547410503e-05, + "loss": 0.0069, + "step": 3760 + }, + { + "epoch": 32.17, + "learning_rate": 9.979984735427741e-05, + "loss": 0.006, + "step": 3764 + }, + { + "epoch": 32.21, + "learning_rate": 9.979775841171949e-05, + "loss": 0.0053, + "step": 3768 + }, + { + "epoch": 32.24, + "learning_rate": 9.979565864688524e-05, + "loss": 0.005, + "step": 3772 + }, + { + "epoch": 32.27, + "learning_rate": 9.9793548060231e-05, + "loss": 0.0054, + "step": 3776 + }, + { + "epoch": 32.31, + "learning_rate": 9.979142665221544e-05, + "loss": 0.0042, + "step": 3780 + }, + { + "epoch": 32.34, + "learning_rate": 9.978929442329959e-05, + "loss": 0.0063, + "step": 3784 + }, + { + "epoch": 32.38, + "learning_rate": 9.978715137394685e-05, + "loss": 0.0053, + "step": 3788 + }, + { + "epoch": 32.41, + "learning_rate": 9.978499750462295e-05, + "loss": 0.0052, + "step": 3792 + }, + { + "epoch": 32.44, + "learning_rate": 9.978283281579598e-05, + "loss": 0.005, + "step": 3796 + }, + { + "epoch": 32.48, + "learning_rate": 9.978065730793638e-05, + "loss": 0.0068, + "step": 3800 + }, + { + "epoch": 32.51, + "learning_rate": 9.977847098151694e-05, + "loss": 0.0057, + "step": 3804 + }, + { + "epoch": 32.55, + "learning_rate": 9.97762738370128e-05, + "loss": 0.0074, + "step": 3808 + }, + { + "epoch": 32.58, + "learning_rate": 9.977406587490146e-05, + "loss": 0.005, + "step": 3812 + }, + { + "epoch": 32.62, + "learning_rate": 9.977184709566277e-05, + "loss": 0.0055, + "step": 3816 + }, + { + "epoch": 32.65, + "learning_rate": 9.976961749977892e-05, + "loss": 0.0075, + "step": 3820 + }, + { + "epoch": 32.68, + "learning_rate": 9.976737708773445e-05, + "loss": 0.0045, + "step": 3824 + }, + { + "epoch": 32.72, + "learning_rate": 9.976512586001625e-05, + "loss": 0.0066, + "step": 3828 + }, + { + "epoch": 32.75, + "learning_rate": 9.976286381711357e-05, + "loss": 0.0064, + "step": 3832 + }, + { + "epoch": 32.79, + "learning_rate": 9.976059095951804e-05, + "loss": 0.0086, + "step": 3836 + }, + { + "epoch": 32.82, + "learning_rate": 9.975830728772355e-05, + "loss": 0.0069, + "step": 3840 + }, + { + "epoch": 32.85, + "learning_rate": 9.975601280222647e-05, + "loss": 0.006, + "step": 3844 + }, + { + "epoch": 32.89, + "learning_rate": 9.975370750352538e-05, + "loss": 0.0046, + "step": 3848 + }, + { + "epoch": 32.92, + "learning_rate": 9.975139139212131e-05, + "loss": 0.0065, + "step": 3852 + }, + { + "epoch": 32.96, + "learning_rate": 9.97490644685176e-05, + "loss": 0.007, + "step": 3856 + }, + { + "epoch": 32.99, + "learning_rate": 9.974672673321995e-05, + "loss": 0.0056, + "step": 3860 + }, + { + "epoch": 33.03, + "learning_rate": 9.974437818673642e-05, + "loss": 0.0051, + "step": 3864 + }, + { + "epoch": 33.06, + "learning_rate": 9.974201882957739e-05, + "loss": 0.0041, + "step": 3868 + }, + { + "epoch": 33.09, + "learning_rate": 9.97396486622556e-05, + "loss": 0.0049, + "step": 3872 + }, + { + "epoch": 33.13, + "learning_rate": 9.973726768528618e-05, + "loss": 0.006, + "step": 3876 + }, + { + "epoch": 33.16, + "learning_rate": 9.973487589918655e-05, + "loss": 0.0065, + "step": 3880 + }, + { + "epoch": 33.2, + "learning_rate": 9.973247330447649e-05, + "loss": 0.0057, + "step": 3884 + }, + { + "epoch": 33.23, + "learning_rate": 9.973005990167816e-05, + "loss": 0.0059, + "step": 3888 + }, + { + "epoch": 33.26, + "learning_rate": 9.972763569131606e-05, + "loss": 0.0059, + "step": 3892 + }, + { + "epoch": 33.3, + "learning_rate": 9.972520067391702e-05, + "loss": 0.0051, + "step": 3896 + }, + { + "epoch": 33.33, + "learning_rate": 9.972275485001024e-05, + "loss": 0.0061, + "step": 3900 + }, + { + "epoch": 33.37, + "learning_rate": 9.972029822012722e-05, + "loss": 0.006, + "step": 3904 + }, + { + "epoch": 33.4, + "learning_rate": 9.971783078480191e-05, + "loss": 0.0049, + "step": 3908 + }, + { + "epoch": 33.44, + "learning_rate": 9.97153525445705e-05, + "loss": 0.004, + "step": 3912 + }, + { + "epoch": 33.47, + "learning_rate": 9.971286349997156e-05, + "loss": 0.0072, + "step": 3916 + }, + { + "epoch": 33.5, + "learning_rate": 9.971036365154604e-05, + "loss": 0.0062, + "step": 3920 + }, + { + "epoch": 33.54, + "learning_rate": 9.970785299983725e-05, + "loss": 0.0049, + "step": 3924 + }, + { + "epoch": 33.57, + "learning_rate": 9.970533154539077e-05, + "loss": 0.0048, + "step": 3928 + }, + { + "epoch": 33.61, + "learning_rate": 9.970279928875458e-05, + "loss": 0.0048, + "step": 3932 + }, + { + "epoch": 33.64, + "learning_rate": 9.970025623047902e-05, + "loss": 0.0066, + "step": 3936 + }, + { + "epoch": 33.68, + "learning_rate": 9.969770237111676e-05, + "loss": 0.0068, + "step": 3940 + }, + { + "epoch": 33.71, + "learning_rate": 9.969513771122278e-05, + "loss": 0.0052, + "step": 3944 + }, + { + "epoch": 33.74, + "learning_rate": 9.96925622513545e-05, + "loss": 0.0049, + "step": 3948 + }, + { + "epoch": 33.78, + "learning_rate": 9.96899759920716e-05, + "loss": 0.0068, + "step": 3952 + }, + { + "epoch": 33.81, + "learning_rate": 9.968737893393612e-05, + "loss": 0.0072, + "step": 3956 + }, + { + "epoch": 33.85, + "learning_rate": 9.96847710775125e-05, + "loss": 0.0118, + "step": 3960 + }, + { + "epoch": 33.88, + "learning_rate": 9.968215242336746e-05, + "loss": 0.0051, + "step": 3964 + }, + { + "epoch": 33.91, + "learning_rate": 9.967952297207013e-05, + "loss": 0.0039, + "step": 3968 + }, + { + "epoch": 33.95, + "learning_rate": 9.967688272419193e-05, + "loss": 0.0049, + "step": 3972 + }, + { + "epoch": 33.98, + "learning_rate": 9.967423168030665e-05, + "loss": 0.0058, + "step": 3976 + }, + { + "epoch": 34.02, + "learning_rate": 9.967156984099044e-05, + "loss": 0.0049, + "step": 3980 + }, + { + "epoch": 34.05, + "learning_rate": 9.966889720682177e-05, + "loss": 0.005, + "step": 3984 + }, + { + "epoch": 34.09, + "learning_rate": 9.96662137783815e-05, + "loss": 0.0055, + "step": 3988 + }, + { + "epoch": 34.12, + "learning_rate": 9.966351955625277e-05, + "loss": 0.0038, + "step": 3992 + }, + { + "epoch": 34.15, + "learning_rate": 9.96608145410211e-05, + "loss": 0.0038, + "step": 3996 + }, + { + "epoch": 34.19, + "learning_rate": 9.965809873327439e-05, + "loss": 0.0054, + "step": 4000 + }, + { + "epoch": 34.19, + "eval_exact_match": 0.4896049896049896, + "eval_loss": 0.668160617351532, + "eval_runtime": 89.1426, + "eval_samples_per_second": 10.792, + "step": 4000 + }, + { + "epoch": 34.22, + "learning_rate": 9.965537213360282e-05, + "loss": 0.0065, + "step": 4004 + }, + { + "epoch": 34.26, + "learning_rate": 9.965263474259896e-05, + "loss": 0.006, + "step": 4008 + }, + { + "epoch": 34.29, + "learning_rate": 9.964988656085771e-05, + "loss": 0.0042, + "step": 4012 + }, + { + "epoch": 34.32, + "learning_rate": 9.964712758897632e-05, + "loss": 0.0054, + "step": 4016 + }, + { + "epoch": 34.36, + "learning_rate": 9.964435782755437e-05, + "loss": 0.0047, + "step": 4020 + }, + { + "epoch": 34.39, + "learning_rate": 9.964157727719381e-05, + "loss": 0.0032, + "step": 4024 + }, + { + "epoch": 34.43, + "learning_rate": 9.963878593849893e-05, + "loss": 0.0052, + "step": 4028 + }, + { + "epoch": 34.46, + "learning_rate": 9.963598381207632e-05, + "loss": 0.0039, + "step": 4032 + }, + { + "epoch": 34.5, + "learning_rate": 9.963317089853502e-05, + "loss": 0.0038, + "step": 4036 + }, + { + "epoch": 34.53, + "learning_rate": 9.963034719848626e-05, + "loss": 0.0049, + "step": 4040 + }, + { + "epoch": 34.56, + "learning_rate": 9.962751271254375e-05, + "loss": 0.0105, + "step": 4044 + }, + { + "epoch": 34.6, + "learning_rate": 9.96246674413235e-05, + "loss": 0.0042, + "step": 4048 + }, + { + "epoch": 34.63, + "learning_rate": 9.962181138544383e-05, + "loss": 0.0053, + "step": 4052 + }, + { + "epoch": 34.67, + "learning_rate": 9.961894454552545e-05, + "loss": 0.0056, + "step": 4056 + }, + { + "epoch": 34.7, + "learning_rate": 9.961606692219137e-05, + "loss": 0.0054, + "step": 4060 + }, + { + "epoch": 34.74, + "learning_rate": 9.961317851606701e-05, + "loss": 0.0051, + "step": 4064 + }, + { + "epoch": 34.77, + "learning_rate": 9.961027932778005e-05, + "loss": 0.0037, + "step": 4068 + }, + { + "epoch": 34.8, + "learning_rate": 9.960736935796058e-05, + "loss": 0.004, + "step": 4072 + }, + { + "epoch": 34.84, + "learning_rate": 9.9604448607241e-05, + "loss": 0.0058, + "step": 4076 + }, + { + "epoch": 34.87, + "learning_rate": 9.960151707625605e-05, + "loss": 0.0044, + "step": 4080 + }, + { + "epoch": 34.91, + "learning_rate": 9.959857476564285e-05, + "loss": 0.0056, + "step": 4084 + }, + { + "epoch": 34.94, + "learning_rate": 9.959562167604082e-05, + "loss": 0.0066, + "step": 4088 + }, + { + "epoch": 34.97, + "learning_rate": 9.959265780809172e-05, + "loss": 0.0058, + "step": 4092 + }, + { + "epoch": 35.01, + "learning_rate": 9.958968316243972e-05, + "loss": 0.0067, + "step": 4096 + }, + { + "epoch": 35.04, + "learning_rate": 9.958669773973123e-05, + "loss": 0.0067, + "step": 4100 + }, + { + "epoch": 35.08, + "learning_rate": 9.958370154061511e-05, + "loss": 0.0054, + "step": 4104 + }, + { + "epoch": 35.11, + "learning_rate": 9.958069456574246e-05, + "loss": 0.0042, + "step": 4108 + }, + { + "epoch": 35.15, + "learning_rate": 9.957767681576679e-05, + "loss": 0.0037, + "step": 4112 + }, + { + "epoch": 35.18, + "learning_rate": 9.957464829134391e-05, + "loss": 0.0043, + "step": 4116 + }, + { + "epoch": 35.21, + "learning_rate": 9.957160899313205e-05, + "loss": 0.0041, + "step": 4120 + }, + { + "epoch": 35.25, + "learning_rate": 9.956855892179167e-05, + "loss": 0.0047, + "step": 4124 + }, + { + "epoch": 35.28, + "learning_rate": 9.956549807798563e-05, + "loss": 0.0033, + "step": 4128 + }, + { + "epoch": 35.32, + "learning_rate": 9.956242646237914e-05, + "loss": 0.0049, + "step": 4132 + }, + { + "epoch": 35.35, + "learning_rate": 9.955934407563974e-05, + "loss": 0.0048, + "step": 4136 + }, + { + "epoch": 35.38, + "learning_rate": 9.95562509184373e-05, + "loss": 0.005, + "step": 4140 + }, + { + "epoch": 35.42, + "learning_rate": 9.955314699144406e-05, + "loss": 0.0036, + "step": 4144 + }, + { + "epoch": 35.45, + "learning_rate": 9.955003229533455e-05, + "loss": 0.0044, + "step": 4148 + }, + { + "epoch": 35.49, + "learning_rate": 9.954690683078569e-05, + "loss": 0.0039, + "step": 4152 + }, + { + "epoch": 35.52, + "learning_rate": 9.954377059847669e-05, + "loss": 0.004, + "step": 4156 + }, + { + "epoch": 35.56, + "learning_rate": 9.954062359908918e-05, + "loss": 0.0055, + "step": 4160 + }, + { + "epoch": 35.59, + "learning_rate": 9.953746583330703e-05, + "loss": 0.0043, + "step": 4164 + }, + { + "epoch": 35.62, + "learning_rate": 9.953429730181653e-05, + "loss": 0.0056, + "step": 4168 + }, + { + "epoch": 35.66, + "learning_rate": 9.953111800530628e-05, + "loss": 0.0053, + "step": 4172 + }, + { + "epoch": 35.69, + "learning_rate": 9.952792794446722e-05, + "loss": 0.003, + "step": 4176 + }, + { + "epoch": 35.73, + "learning_rate": 9.952472711999261e-05, + "loss": 0.0057, + "step": 4180 + }, + { + "epoch": 35.76, + "learning_rate": 9.952151553257809e-05, + "loss": 0.0052, + "step": 4184 + }, + { + "epoch": 35.79, + "learning_rate": 9.951829318292159e-05, + "loss": 0.0046, + "step": 4188 + }, + { + "epoch": 35.83, + "learning_rate": 9.951506007172343e-05, + "loss": 0.0028, + "step": 4192 + }, + { + "epoch": 35.86, + "learning_rate": 9.951181619968624e-05, + "loss": 0.0056, + "step": 4196 + }, + { + "epoch": 35.9, + "learning_rate": 9.950856156751498e-05, + "loss": 0.0029, + "step": 4200 + }, + { + "epoch": 35.93, + "learning_rate": 9.950529617591698e-05, + "loss": 0.0046, + "step": 4204 + }, + { + "epoch": 35.97, + "learning_rate": 9.950202002560187e-05, + "loss": 0.0031, + "step": 4208 + }, + { + "epoch": 36.0, + "learning_rate": 9.949873311728166e-05, + "loss": 0.0036, + "step": 4212 + }, + { + "epoch": 36.03, + "learning_rate": 9.949543545167066e-05, + "loss": 0.0046, + "step": 4216 + }, + { + "epoch": 36.07, + "learning_rate": 9.949212702948552e-05, + "loss": 0.0026, + "step": 4220 + }, + { + "epoch": 36.1, + "learning_rate": 9.948880785144528e-05, + "loss": 0.0036, + "step": 4224 + }, + { + "epoch": 36.14, + "learning_rate": 9.948547791827126e-05, + "loss": 0.0046, + "step": 4228 + }, + { + "epoch": 36.17, + "learning_rate": 9.948213723068712e-05, + "loss": 0.0025, + "step": 4232 + }, + { + "epoch": 36.21, + "learning_rate": 9.94787857894189e-05, + "loss": 0.0043, + "step": 4236 + }, + { + "epoch": 36.24, + "learning_rate": 9.947542359519492e-05, + "loss": 0.0045, + "step": 4240 + }, + { + "epoch": 36.27, + "learning_rate": 9.947205064874591e-05, + "loss": 0.0057, + "step": 4244 + }, + { + "epoch": 36.31, + "learning_rate": 9.946866695080486e-05, + "loss": 0.0023, + "step": 4248 + }, + { + "epoch": 36.34, + "learning_rate": 9.946527250210715e-05, + "loss": 0.0033, + "step": 4252 + }, + { + "epoch": 36.38, + "learning_rate": 9.946186730339047e-05, + "loss": 0.0053, + "step": 4256 + }, + { + "epoch": 36.41, + "learning_rate": 9.945845135539483e-05, + "loss": 0.0028, + "step": 4260 + }, + { + "epoch": 36.44, + "learning_rate": 9.945502465886263e-05, + "loss": 0.0037, + "step": 4264 + }, + { + "epoch": 36.48, + "learning_rate": 9.945158721453857e-05, + "loss": 0.007, + "step": 4268 + }, + { + "epoch": 36.51, + "learning_rate": 9.94481390231697e-05, + "loss": 0.0027, + "step": 4272 + }, + { + "epoch": 36.55, + "learning_rate": 9.944468008550536e-05, + "loss": 0.0021, + "step": 4276 + }, + { + "epoch": 36.58, + "learning_rate": 9.94412104022973e-05, + "loss": 0.0047, + "step": 4280 + }, + { + "epoch": 36.62, + "learning_rate": 9.943772997429954e-05, + "loss": 0.0056, + "step": 4284 + }, + { + "epoch": 36.65, + "learning_rate": 9.943423880226848e-05, + "loss": 0.0046, + "step": 4288 + }, + { + "epoch": 36.68, + "learning_rate": 9.943073688696285e-05, + "loss": 0.0053, + "step": 4292 + }, + { + "epoch": 36.72, + "learning_rate": 9.942722422914367e-05, + "loss": 0.0032, + "step": 4296 + }, + { + "epoch": 36.75, + "learning_rate": 9.942370082957435e-05, + "loss": 0.0034, + "step": 4300 + }, + { + "epoch": 36.79, + "learning_rate": 9.942016668902058e-05, + "loss": 0.0041, + "step": 4304 + }, + { + "epoch": 36.82, + "learning_rate": 9.941662180825048e-05, + "loss": 0.004, + "step": 4308 + }, + { + "epoch": 36.85, + "learning_rate": 9.941306618803436e-05, + "loss": 0.0058, + "step": 4312 + }, + { + "epoch": 36.89, + "learning_rate": 9.940949982914502e-05, + "loss": 0.0031, + "step": 4316 + }, + { + "epoch": 36.92, + "learning_rate": 9.940592273235744e-05, + "loss": 0.0065, + "step": 4320 + }, + { + "epoch": 36.96, + "learning_rate": 9.94023348984491e-05, + "loss": 0.0041, + "step": 4324 + }, + { + "epoch": 36.99, + "learning_rate": 9.939873632819964e-05, + "loss": 0.0054, + "step": 4328 + }, + { + "epoch": 37.03, + "learning_rate": 9.939512702239116e-05, + "loss": 0.0051, + "step": 4332 + }, + { + "epoch": 37.06, + "learning_rate": 9.939150698180804e-05, + "loss": 0.0032, + "step": 4336 + }, + { + "epoch": 37.09, + "learning_rate": 9.938787620723702e-05, + "loss": 0.0047, + "step": 4340 + }, + { + "epoch": 37.13, + "learning_rate": 9.938423469946713e-05, + "loss": 0.006, + "step": 4344 + }, + { + "epoch": 37.16, + "learning_rate": 9.938058245928978e-05, + "loss": 0.0044, + "step": 4348 + }, + { + "epoch": 37.2, + "learning_rate": 9.937691948749869e-05, + "loss": 0.0051, + "step": 4352 + }, + { + "epoch": 37.23, + "learning_rate": 9.93732457848899e-05, + "loss": 0.0065, + "step": 4356 + }, + { + "epoch": 37.26, + "learning_rate": 9.936956135226181e-05, + "loss": 0.0022, + "step": 4360 + }, + { + "epoch": 37.3, + "learning_rate": 9.936586619041514e-05, + "loss": 0.0027, + "step": 4364 + }, + { + "epoch": 37.33, + "learning_rate": 9.936216030015291e-05, + "loss": 0.0039, + "step": 4368 + }, + { + "epoch": 37.37, + "learning_rate": 9.935844368228054e-05, + "loss": 0.0032, + "step": 4372 + }, + { + "epoch": 37.4, + "learning_rate": 9.935471633760573e-05, + "loss": 0.0056, + "step": 4376 + }, + { + "epoch": 37.44, + "learning_rate": 9.935097826693851e-05, + "loss": 0.0026, + "step": 4380 + }, + { + "epoch": 37.47, + "learning_rate": 9.934722947109125e-05, + "loss": 0.003, + "step": 4384 + }, + { + "epoch": 37.5, + "learning_rate": 9.934346995087868e-05, + "loss": 0.0032, + "step": 4388 + }, + { + "epoch": 37.54, + "learning_rate": 9.933969970711783e-05, + "loss": 0.0028, + "step": 4392 + }, + { + "epoch": 37.57, + "learning_rate": 9.933591874062806e-05, + "loss": 0.004, + "step": 4396 + }, + { + "epoch": 37.61, + "learning_rate": 9.933212705223107e-05, + "loss": 0.0041, + "step": 4400 + }, + { + "epoch": 37.64, + "learning_rate": 9.932832464275088e-05, + "loss": 0.004, + "step": 4404 + }, + { + "epoch": 37.68, + "learning_rate": 9.932451151301386e-05, + "loss": 0.0051, + "step": 4408 + }, + { + "epoch": 37.71, + "learning_rate": 9.932068766384868e-05, + "loss": 0.0033, + "step": 4412 + }, + { + "epoch": 37.74, + "learning_rate": 9.931685309608636e-05, + "loss": 0.003, + "step": 4416 + }, + { + "epoch": 37.78, + "learning_rate": 9.931300781056027e-05, + "loss": 0.0048, + "step": 4420 + }, + { + "epoch": 37.81, + "learning_rate": 9.930915180810605e-05, + "loss": 0.0047, + "step": 4424 + }, + { + "epoch": 37.85, + "learning_rate": 9.930528508956172e-05, + "loss": 0.0037, + "step": 4428 + }, + { + "epoch": 37.88, + "learning_rate": 9.930140765576763e-05, + "loss": 0.0055, + "step": 4432 + }, + { + "epoch": 37.91, + "learning_rate": 9.929751950756641e-05, + "loss": 0.0041, + "step": 4436 + }, + { + "epoch": 37.95, + "learning_rate": 9.929362064580307e-05, + "loss": 0.004, + "step": 4440 + }, + { + "epoch": 37.98, + "learning_rate": 9.928971107132493e-05, + "loss": 0.0047, + "step": 4444 + }, + { + "epoch": 38.02, + "learning_rate": 9.928579078498161e-05, + "loss": 0.0033, + "step": 4448 + }, + { + "epoch": 38.05, + "learning_rate": 9.928185978762512e-05, + "loss": 0.003, + "step": 4452 + }, + { + "epoch": 38.09, + "learning_rate": 9.927791808010976e-05, + "loss": 0.004, + "step": 4456 + }, + { + "epoch": 38.12, + "learning_rate": 9.927396566329212e-05, + "loss": 0.0041, + "step": 4460 + }, + { + "epoch": 38.15, + "learning_rate": 9.92700025380312e-05, + "loss": 0.0055, + "step": 4464 + }, + { + "epoch": 38.19, + "learning_rate": 9.926602870518826e-05, + "loss": 0.0038, + "step": 4468 + }, + { + "epoch": 38.22, + "learning_rate": 9.926204416562692e-05, + "loss": 0.005, + "step": 4472 + }, + { + "epoch": 38.26, + "learning_rate": 9.925804892021313e-05, + "loss": 0.0057, + "step": 4476 + }, + { + "epoch": 38.29, + "learning_rate": 9.925404296981513e-05, + "loss": 0.0034, + "step": 4480 + }, + { + "epoch": 38.32, + "learning_rate": 9.925002631530353e-05, + "loss": 0.0039, + "step": 4484 + }, + { + "epoch": 38.36, + "learning_rate": 9.924599895755126e-05, + "loss": 0.0037, + "step": 4488 + }, + { + "epoch": 38.39, + "learning_rate": 9.924196089743352e-05, + "loss": 0.0038, + "step": 4492 + }, + { + "epoch": 38.43, + "learning_rate": 9.923791213582791e-05, + "loss": 0.0027, + "step": 4496 + }, + { + "epoch": 38.46, + "learning_rate": 9.923385267361434e-05, + "loss": 0.0042, + "step": 4500 + }, + { + "epoch": 38.5, + "learning_rate": 9.9229782511675e-05, + "loss": 0.0054, + "step": 4504 + }, + { + "epoch": 38.53, + "learning_rate": 9.922570165089445e-05, + "loss": 0.0038, + "step": 4508 + }, + { + "epoch": 38.56, + "learning_rate": 9.922161009215956e-05, + "loss": 0.004, + "step": 4512 + }, + { + "epoch": 38.6, + "learning_rate": 9.921750783635952e-05, + "loss": 0.003, + "step": 4516 + }, + { + "epoch": 38.63, + "learning_rate": 9.921339488438585e-05, + "loss": 0.0033, + "step": 4520 + }, + { + "epoch": 38.67, + "learning_rate": 9.920927123713242e-05, + "loss": 0.0029, + "step": 4524 + }, + { + "epoch": 38.7, + "learning_rate": 9.920513689549537e-05, + "loss": 0.0035, + "step": 4528 + }, + { + "epoch": 38.74, + "learning_rate": 9.920099186037321e-05, + "loss": 0.0051, + "step": 4532 + }, + { + "epoch": 38.77, + "learning_rate": 9.919683613266677e-05, + "loss": 0.0037, + "step": 4536 + }, + { + "epoch": 38.8, + "learning_rate": 9.919266971327916e-05, + "loss": 0.0032, + "step": 4540 + }, + { + "epoch": 38.84, + "learning_rate": 9.918849260311586e-05, + "loss": 0.0044, + "step": 4544 + }, + { + "epoch": 38.87, + "learning_rate": 9.918430480308466e-05, + "loss": 0.0022, + "step": 4548 + }, + { + "epoch": 38.91, + "learning_rate": 9.918010631409568e-05, + "loss": 0.0035, + "step": 4552 + }, + { + "epoch": 38.94, + "learning_rate": 9.917589713706135e-05, + "loss": 0.0048, + "step": 4556 + }, + { + "epoch": 38.97, + "learning_rate": 9.91716772728964e-05, + "loss": 0.0029, + "step": 4560 + }, + { + "epoch": 39.01, + "learning_rate": 9.916744672251795e-05, + "loss": 0.0055, + "step": 4564 + }, + { + "epoch": 39.04, + "learning_rate": 9.91632054868454e-05, + "loss": 0.0036, + "step": 4568 + }, + { + "epoch": 39.08, + "learning_rate": 9.915895356680043e-05, + "loss": 0.0035, + "step": 4572 + }, + { + "epoch": 39.11, + "learning_rate": 9.915469096330714e-05, + "loss": 0.0036, + "step": 4576 + }, + { + "epoch": 39.15, + "learning_rate": 9.915041767729188e-05, + "loss": 0.002, + "step": 4580 + }, + { + "epoch": 39.18, + "learning_rate": 9.914613370968333e-05, + "loss": 0.0031, + "step": 4584 + }, + { + "epoch": 39.21, + "learning_rate": 9.914183906141252e-05, + "loss": 0.0043, + "step": 4588 + }, + { + "epoch": 39.25, + "learning_rate": 9.913753373341274e-05, + "loss": 0.0044, + "step": 4592 + }, + { + "epoch": 39.28, + "learning_rate": 9.91332177266197e-05, + "loss": 0.0039, + "step": 4596 + }, + { + "epoch": 39.32, + "learning_rate": 9.912889104197134e-05, + "loss": 0.004, + "step": 4600 + }, + { + "epoch": 39.35, + "learning_rate": 9.912455368040797e-05, + "loss": 0.0034, + "step": 4604 + }, + { + "epoch": 39.38, + "learning_rate": 9.912020564287218e-05, + "loss": 0.0032, + "step": 4608 + }, + { + "epoch": 39.42, + "learning_rate": 9.911584693030895e-05, + "loss": 0.004, + "step": 4612 + }, + { + "epoch": 39.45, + "learning_rate": 9.91114775436655e-05, + "loss": 0.0057, + "step": 4616 + }, + { + "epoch": 39.49, + "learning_rate": 9.910709748389141e-05, + "loss": 0.0039, + "step": 4620 + }, + { + "epoch": 39.52, + "learning_rate": 9.910270675193858e-05, + "loss": 0.0025, + "step": 4624 + }, + { + "epoch": 39.56, + "learning_rate": 9.909830534876123e-05, + "loss": 0.0036, + "step": 4628 + }, + { + "epoch": 39.59, + "learning_rate": 9.909389327531588e-05, + "loss": 0.0035, + "step": 4632 + }, + { + "epoch": 39.62, + "learning_rate": 9.908947053256139e-05, + "loss": 0.0049, + "step": 4636 + }, + { + "epoch": 39.66, + "learning_rate": 9.908503712145892e-05, + "loss": 0.0037, + "step": 4640 + }, + { + "epoch": 39.69, + "learning_rate": 9.908059304297198e-05, + "loss": 0.0029, + "step": 4644 + }, + { + "epoch": 39.73, + "learning_rate": 9.907613829806637e-05, + "loss": 0.0036, + "step": 4648 + }, + { + "epoch": 39.76, + "learning_rate": 9.907167288771019e-05, + "loss": 0.0044, + "step": 4652 + }, + { + "epoch": 39.79, + "learning_rate": 9.90671968128739e-05, + "loss": 0.0043, + "step": 4656 + }, + { + "epoch": 39.83, + "learning_rate": 9.90627100745303e-05, + "loss": 0.0073, + "step": 4660 + }, + { + "epoch": 39.86, + "learning_rate": 9.90582126736544e-05, + "loss": 0.003, + "step": 4664 + }, + { + "epoch": 39.9, + "learning_rate": 9.905370461122366e-05, + "loss": 0.0029, + "step": 4668 + }, + { + "epoch": 39.93, + "learning_rate": 9.904918588821775e-05, + "loss": 0.0045, + "step": 4672 + }, + { + "epoch": 39.97, + "learning_rate": 9.904465650561869e-05, + "loss": 0.0047, + "step": 4676 + }, + { + "epoch": 40.0, + "learning_rate": 9.904011646441087e-05, + "loss": 0.0046, + "step": 4680 + }, + { + "epoch": 40.03, + "learning_rate": 9.903556576558093e-05, + "loss": 0.0058, + "step": 4684 + }, + { + "epoch": 40.07, + "learning_rate": 9.903100441011783e-05, + "loss": 0.0035, + "step": 4688 + }, + { + "epoch": 40.1, + "learning_rate": 9.90264323990129e-05, + "loss": 0.0069, + "step": 4692 + }, + { + "epoch": 40.14, + "learning_rate": 9.902184973325975e-05, + "loss": 0.0032, + "step": 4696 + }, + { + "epoch": 40.17, + "learning_rate": 9.901725641385425e-05, + "loss": 0.0029, + "step": 4700 + }, + { + "epoch": 40.21, + "learning_rate": 9.90126524417947e-05, + "loss": 0.0033, + "step": 4704 + }, + { + "epoch": 40.24, + "learning_rate": 9.900803781808164e-05, + "loss": 0.0023, + "step": 4708 + }, + { + "epoch": 40.27, + "learning_rate": 9.900341254371794e-05, + "loss": 0.0029, + "step": 4712 + }, + { + "epoch": 40.31, + "learning_rate": 9.899877661970877e-05, + "loss": 0.0033, + "step": 4716 + }, + { + "epoch": 40.34, + "learning_rate": 9.899413004706166e-05, + "loss": 0.0025, + "step": 4720 + }, + { + "epoch": 40.38, + "learning_rate": 9.898947282678642e-05, + "loss": 0.0042, + "step": 4724 + }, + { + "epoch": 40.41, + "learning_rate": 9.898480495989514e-05, + "loss": 0.0042, + "step": 4728 + }, + { + "epoch": 40.44, + "learning_rate": 9.898012644740229e-05, + "loss": 0.0021, + "step": 4732 + }, + { + "epoch": 40.48, + "learning_rate": 9.897543729032463e-05, + "loss": 0.0033, + "step": 4736 + }, + { + "epoch": 40.51, + "learning_rate": 9.897073748968124e-05, + "loss": 0.005, + "step": 4740 + }, + { + "epoch": 40.55, + "learning_rate": 9.896602704649346e-05, + "loss": 0.0048, + "step": 4744 + }, + { + "epoch": 40.58, + "learning_rate": 9.896130596178503e-05, + "loss": 0.0023, + "step": 4748 + }, + { + "epoch": 40.62, + "learning_rate": 9.895657423658193e-05, + "loss": 0.0027, + "step": 4752 + }, + { + "epoch": 40.65, + "learning_rate": 9.895183187191251e-05, + "loss": 0.0038, + "step": 4756 + }, + { + "epoch": 40.68, + "learning_rate": 9.894707886880735e-05, + "loss": 0.0034, + "step": 4760 + }, + { + "epoch": 40.72, + "learning_rate": 9.894231522829944e-05, + "loss": 0.0034, + "step": 4764 + }, + { + "epoch": 40.75, + "learning_rate": 9.893754095142403e-05, + "loss": 0.0039, + "step": 4768 + }, + { + "epoch": 40.79, + "learning_rate": 9.893275603921865e-05, + "loss": 0.0024, + "step": 4772 + }, + { + "epoch": 40.82, + "learning_rate": 9.892796049272324e-05, + "loss": 0.002, + "step": 4776 + }, + { + "epoch": 40.85, + "learning_rate": 9.892315431297994e-05, + "loss": 0.0036, + "step": 4780 + }, + { + "epoch": 40.89, + "learning_rate": 9.891833750103328e-05, + "loss": 0.0037, + "step": 4784 + }, + { + "epoch": 40.92, + "learning_rate": 9.891351005793005e-05, + "loss": 0.004, + "step": 4788 + }, + { + "epoch": 40.96, + "learning_rate": 9.890867198471938e-05, + "loss": 0.0036, + "step": 4792 + }, + { + "epoch": 40.99, + "learning_rate": 9.890382328245271e-05, + "loss": 0.0057, + "step": 4796 + }, + { + "epoch": 41.03, + "learning_rate": 9.889896395218378e-05, + "loss": 0.0028, + "step": 4800 + }, + { + "epoch": 41.06, + "learning_rate": 9.889409399496865e-05, + "loss": 0.0018, + "step": 4804 + }, + { + "epoch": 41.09, + "learning_rate": 9.888921341186566e-05, + "loss": 0.0027, + "step": 4808 + }, + { + "epoch": 41.13, + "learning_rate": 9.888432220393549e-05, + "loss": 0.0047, + "step": 4812 + }, + { + "epoch": 41.16, + "learning_rate": 9.887942037224111e-05, + "loss": 0.0037, + "step": 4816 + }, + { + "epoch": 41.2, + "learning_rate": 9.887450791784782e-05, + "loss": 0.0034, + "step": 4820 + }, + { + "epoch": 41.23, + "learning_rate": 9.886958484182324e-05, + "loss": 0.0037, + "step": 4824 + }, + { + "epoch": 41.26, + "learning_rate": 9.886465114523724e-05, + "loss": 0.0048, + "step": 4828 + }, + { + "epoch": 41.3, + "learning_rate": 9.885970682916204e-05, + "loss": 0.0028, + "step": 4832 + }, + { + "epoch": 41.33, + "learning_rate": 9.885475189467217e-05, + "loss": 0.0036, + "step": 4836 + }, + { + "epoch": 41.37, + "learning_rate": 9.884978634284445e-05, + "loss": 0.0055, + "step": 4840 + }, + { + "epoch": 41.4, + "learning_rate": 9.884481017475801e-05, + "loss": 0.0049, + "step": 4844 + }, + { + "epoch": 41.44, + "learning_rate": 9.883982339149433e-05, + "loss": 0.0031, + "step": 4848 + }, + { + "epoch": 41.47, + "learning_rate": 9.883482599413712e-05, + "loss": 0.0038, + "step": 4852 + }, + { + "epoch": 41.5, + "learning_rate": 9.882981798377247e-05, + "loss": 0.0017, + "step": 4856 + }, + { + "epoch": 41.54, + "learning_rate": 9.882479936148872e-05, + "loss": 0.004, + "step": 4860 + }, + { + "epoch": 41.57, + "learning_rate": 9.881977012837653e-05, + "loss": 0.0024, + "step": 4864 + }, + { + "epoch": 41.61, + "learning_rate": 9.88147302855289e-05, + "loss": 0.003, + "step": 4868 + }, + { + "epoch": 41.64, + "learning_rate": 9.880967983404111e-05, + "loss": 0.0016, + "step": 4872 + }, + { + "epoch": 41.68, + "learning_rate": 9.880461877501075e-05, + "loss": 0.0047, + "step": 4876 + }, + { + "epoch": 41.71, + "learning_rate": 9.879954710953769e-05, + "loss": 0.0038, + "step": 4880 + }, + { + "epoch": 41.74, + "learning_rate": 9.879446483872414e-05, + "loss": 0.0022, + "step": 4884 + }, + { + "epoch": 41.78, + "learning_rate": 9.878937196367462e-05, + "loss": 0.004, + "step": 4888 + }, + { + "epoch": 41.81, + "learning_rate": 9.878426848549589e-05, + "loss": 0.0031, + "step": 4892 + }, + { + "epoch": 41.85, + "learning_rate": 9.877915440529711e-05, + "loss": 0.0027, + "step": 4896 + }, + { + "epoch": 41.88, + "learning_rate": 9.877402972418968e-05, + "loss": 0.003, + "step": 4900 + }, + { + "epoch": 41.91, + "learning_rate": 9.876889444328731e-05, + "loss": 0.0024, + "step": 4904 + }, + { + "epoch": 41.95, + "learning_rate": 9.876374856370603e-05, + "loss": 0.0033, + "step": 4908 + }, + { + "epoch": 41.98, + "learning_rate": 9.875859208656418e-05, + "loss": 0.003, + "step": 4912 + }, + { + "epoch": 42.02, + "learning_rate": 9.875342501298235e-05, + "loss": 0.003, + "step": 4916 + }, + { + "epoch": 42.05, + "learning_rate": 9.874824734408351e-05, + "loss": 0.0038, + "step": 4920 + }, + { + "epoch": 42.09, + "learning_rate": 9.874305908099288e-05, + "loss": 0.0034, + "step": 4924 + }, + { + "epoch": 42.12, + "learning_rate": 9.8737860224838e-05, + "loss": 0.0038, + "step": 4928 + }, + { + "epoch": 42.15, + "learning_rate": 9.87326507767487e-05, + "loss": 0.0023, + "step": 4932 + }, + { + "epoch": 42.19, + "learning_rate": 9.872743073785713e-05, + "loss": 0.0036, + "step": 4936 + }, + { + "epoch": 42.22, + "learning_rate": 9.872220010929772e-05, + "loss": 0.0052, + "step": 4940 + }, + { + "epoch": 42.26, + "learning_rate": 9.871695889220725e-05, + "loss": 0.0019, + "step": 4944 + }, + { + "epoch": 42.29, + "learning_rate": 9.871170708772471e-05, + "loss": 0.0019, + "step": 4948 + }, + { + "epoch": 42.32, + "learning_rate": 9.87064446969915e-05, + "loss": 0.0055, + "step": 4952 + }, + { + "epoch": 42.36, + "learning_rate": 9.870117172115123e-05, + "loss": 0.0031, + "step": 4956 + }, + { + "epoch": 42.39, + "learning_rate": 9.869588816134987e-05, + "loss": 0.0028, + "step": 4960 + }, + { + "epoch": 42.43, + "learning_rate": 9.869059401873564e-05, + "loss": 0.0028, + "step": 4964 + }, + { + "epoch": 42.46, + "learning_rate": 9.868528929445912e-05, + "loss": 0.0019, + "step": 4968 + }, + { + "epoch": 42.5, + "learning_rate": 9.867997398967313e-05, + "loss": 0.002, + "step": 4972 + }, + { + "epoch": 42.53, + "learning_rate": 9.867464810553283e-05, + "loss": 0.0035, + "step": 4976 + }, + { + "epoch": 42.56, + "learning_rate": 9.866931164319568e-05, + "loss": 0.0044, + "step": 4980 + }, + { + "epoch": 42.6, + "learning_rate": 9.866396460382139e-05, + "loss": 0.0069, + "step": 4984 + }, + { + "epoch": 42.63, + "learning_rate": 9.865860698857204e-05, + "loss": 0.0025, + "step": 4988 + }, + { + "epoch": 42.67, + "learning_rate": 9.865323879861194e-05, + "loss": 0.0032, + "step": 4992 + }, + { + "epoch": 42.7, + "learning_rate": 9.864786003510773e-05, + "loss": 0.0047, + "step": 4996 + }, + { + "epoch": 42.74, + "learning_rate": 9.864247069922838e-05, + "loss": 0.0027, + "step": 5000 + }, + { + "epoch": 42.74, + "eval_exact_match": 0.4875259875259875, + "eval_loss": 0.7059731483459473, + "eval_runtime": 86.6231, + "eval_samples_per_second": 11.106, + "step": 5000 + }, + { + "epoch": 21.38, + "learning_rate": 8.352528793189786e-05, + "loss": 0.0021, + "step": 5004 + }, + { + "epoch": 21.4, + "learning_rate": 8.359205474878986e-05, + "loss": 0.002, + "step": 5008 + }, + { + "epoch": 21.42, + "learning_rate": 8.365882156568185e-05, + "loss": 0.0031, + "step": 5012 + }, + { + "epoch": 21.44, + "learning_rate": 8.372558838257387e-05, + "loss": 0.0031, + "step": 5016 + }, + { + "epoch": 21.45, + "learning_rate": 8.379235519946587e-05, + "loss": 0.0036, + "step": 5020 + }, + { + "epoch": 21.47, + "learning_rate": 8.385912201635787e-05, + "loss": 0.0018, + "step": 5024 + }, + { + "epoch": 21.49, + "learning_rate": 8.392588883324987e-05, + "loss": 0.0016, + "step": 5028 + }, + { + "epoch": 21.5, + "learning_rate": 8.399265565014188e-05, + "loss": 0.0026, + "step": 5032 + }, + { + "epoch": 21.52, + "learning_rate": 8.405942246703389e-05, + "loss": 0.0018, + "step": 5036 + }, + { + "epoch": 21.54, + "learning_rate": 8.41261892839259e-05, + "loss": 0.0022, + "step": 5040 + }, + { + "epoch": 21.56, + "learning_rate": 8.41929561008179e-05, + "loss": 0.0008, + "step": 5044 + }, + { + "epoch": 21.57, + "learning_rate": 8.42597229177099e-05, + "loss": 0.0035, + "step": 5048 + }, + { + "epoch": 21.59, + "learning_rate": 8.43264897346019e-05, + "loss": 0.0017, + "step": 5052 + }, + { + "epoch": 21.61, + "learning_rate": 8.439325655149392e-05, + "loss": 0.004, + "step": 5056 + }, + { + "epoch": 21.62, + "learning_rate": 8.446002336838592e-05, + "loss": 0.0023, + "step": 5060 + }, + { + "epoch": 21.64, + "learning_rate": 8.452679018527791e-05, + "loss": 0.0032, + "step": 5064 + }, + { + "epoch": 21.66, + "learning_rate": 8.459355700216993e-05, + "loss": 0.003, + "step": 5068 + }, + { + "epoch": 21.68, + "learning_rate": 8.466032381906193e-05, + "loss": 0.0038, + "step": 5072 + }, + { + "epoch": 21.69, + "learning_rate": 8.472709063595393e-05, + "loss": 0.0053, + "step": 5076 + }, + { + "epoch": 21.71, + "learning_rate": 8.479385745284594e-05, + "loss": 0.0018, + "step": 5080 + }, + { + "epoch": 21.73, + "learning_rate": 8.486062426973794e-05, + "loss": 0.0018, + "step": 5084 + }, + { + "epoch": 21.74, + "learning_rate": 8.492739108662995e-05, + "loss": 0.0059, + "step": 5088 + }, + { + "epoch": 21.76, + "learning_rate": 8.499415790352196e-05, + "loss": 0.002, + "step": 5092 + }, + { + "epoch": 21.78, + "learning_rate": 8.506092472041396e-05, + "loss": 0.0042, + "step": 5096 + }, + { + "epoch": 21.79, + "learning_rate": 8.512769153730596e-05, + "loss": 0.0048, + "step": 5100 + }, + { + "epoch": 21.81, + "learning_rate": 8.519445835419796e-05, + "loss": 0.002, + "step": 5104 + }, + { + "epoch": 21.83, + "learning_rate": 8.526122517108998e-05, + "loss": 0.0034, + "step": 5108 + }, + { + "epoch": 21.85, + "learning_rate": 8.532799198798198e-05, + "loss": 0.0044, + "step": 5112 + }, + { + "epoch": 21.86, + "learning_rate": 8.539475880487399e-05, + "loss": 0.0045, + "step": 5116 + }, + { + "epoch": 21.88, + "learning_rate": 8.546152562176597e-05, + "loss": 0.002, + "step": 5120 + }, + { + "epoch": 21.9, + "learning_rate": 8.552829243865799e-05, + "loss": 0.0023, + "step": 5124 + }, + { + "epoch": 21.91, + "learning_rate": 8.559505925555e-05, + "loss": 0.0027, + "step": 5128 + }, + { + "epoch": 21.93, + "learning_rate": 8.5661826072442e-05, + "loss": 0.0055, + "step": 5132 + }, + { + "epoch": 21.95, + "learning_rate": 8.5728592889334e-05, + "loss": 0.0035, + "step": 5136 + }, + { + "epoch": 21.97, + "learning_rate": 8.579535970622602e-05, + "loss": 0.0038, + "step": 5140 + }, + { + "epoch": 21.98, + "learning_rate": 8.586212652311802e-05, + "loss": 0.0031, + "step": 5144 + }, + { + "epoch": 22.0, + "learning_rate": 8.592889334001002e-05, + "loss": 0.0043, + "step": 5148 + }, + { + "epoch": 22.02, + "learning_rate": 8.599566015690202e-05, + "loss": 0.0034, + "step": 5152 + }, + { + "epoch": 22.03, + "learning_rate": 8.606242697379403e-05, + "loss": 0.0042, + "step": 5156 + }, + { + "epoch": 22.05, + "learning_rate": 8.612919379068604e-05, + "loss": 0.0022, + "step": 5160 + }, + { + "epoch": 22.07, + "learning_rate": 8.619596060757804e-05, + "loss": 0.0039, + "step": 5164 + }, + { + "epoch": 22.09, + "learning_rate": 8.626272742447005e-05, + "loss": 0.0026, + "step": 5168 + }, + { + "epoch": 22.1, + "learning_rate": 8.632949424136205e-05, + "loss": 0.0022, + "step": 5172 + }, + { + "epoch": 22.12, + "learning_rate": 8.639626105825405e-05, + "loss": 0.0055, + "step": 5176 + }, + { + "epoch": 22.14, + "learning_rate": 8.646302787514605e-05, + "loss": 0.0034, + "step": 5180 + }, + { + "epoch": 22.15, + "learning_rate": 8.652979469203806e-05, + "loss": 0.0043, + "step": 5184 + }, + { + "epoch": 22.17, + "learning_rate": 8.659656150893006e-05, + "loss": 0.0033, + "step": 5188 + }, + { + "epoch": 22.19, + "learning_rate": 8.666332832582206e-05, + "loss": 0.0072, + "step": 5192 + }, + { + "epoch": 22.21, + "learning_rate": 8.673009514271408e-05, + "loss": 0.0042, + "step": 5196 + }, + { + "epoch": 22.22, + "learning_rate": 8.679686195960608e-05, + "loss": 0.0062, + "step": 5200 + }, + { + "epoch": 22.24, + "learning_rate": 8.686362877649808e-05, + "loss": 0.0025, + "step": 5204 + }, + { + "epoch": 22.26, + "learning_rate": 8.693039559339009e-05, + "loss": 0.0044, + "step": 5208 + }, + { + "epoch": 22.27, + "learning_rate": 8.699716241028209e-05, + "loss": 0.0037, + "step": 5212 + }, + { + "epoch": 22.29, + "learning_rate": 8.70639292271741e-05, + "loss": 0.002, + "step": 5216 + }, + { + "epoch": 22.31, + "learning_rate": 8.713069604406611e-05, + "loss": 0.0024, + "step": 5220 + }, + { + "epoch": 22.32, + "learning_rate": 8.719746286095811e-05, + "loss": 0.0018, + "step": 5224 + }, + { + "epoch": 22.34, + "learning_rate": 8.726422967785011e-05, + "loss": 0.0025, + "step": 5228 + }, + { + "epoch": 22.36, + "learning_rate": 8.733099649474212e-05, + "loss": 0.0051, + "step": 5232 + }, + { + "epoch": 22.38, + "learning_rate": 8.739776331163412e-05, + "loss": 0.0027, + "step": 5236 + }, + { + "epoch": 22.39, + "learning_rate": 8.746453012852612e-05, + "loss": 0.0043, + "step": 5240 + }, + { + "epoch": 22.41, + "learning_rate": 8.753129694541812e-05, + "loss": 0.0048, + "step": 5244 + }, + { + "epoch": 22.43, + "learning_rate": 8.759806376231014e-05, + "loss": 0.0014, + "step": 5248 + }, + { + "epoch": 22.44, + "learning_rate": 8.766483057920214e-05, + "loss": 0.0019, + "step": 5252 + }, + { + "epoch": 22.46, + "learning_rate": 8.773159739609415e-05, + "loss": 0.0038, + "step": 5256 + }, + { + "epoch": 22.48, + "learning_rate": 8.779836421298615e-05, + "loss": 0.0026, + "step": 5260 + }, + { + "epoch": 22.5, + "learning_rate": 8.786513102987815e-05, + "loss": 0.0027, + "step": 5264 + }, + { + "epoch": 22.51, + "learning_rate": 8.793189784677017e-05, + "loss": 0.0042, + "step": 5268 + }, + { + "epoch": 22.53, + "learning_rate": 8.799866466366217e-05, + "loss": 0.0026, + "step": 5272 + }, + { + "epoch": 22.55, + "learning_rate": 8.806543148055417e-05, + "loss": 0.0037, + "step": 5276 + }, + { + "epoch": 22.56, + "learning_rate": 8.813219829744617e-05, + "loss": 0.0058, + "step": 5280 + }, + { + "epoch": 22.58, + "learning_rate": 8.819896511433818e-05, + "loss": 0.004, + "step": 5284 + }, + { + "epoch": 22.6, + "learning_rate": 8.826573193123018e-05, + "loss": 0.0026, + "step": 5288 + }, + { + "epoch": 22.62, + "learning_rate": 8.833249874812218e-05, + "loss": 0.0037, + "step": 5292 + }, + { + "epoch": 22.63, + "learning_rate": 8.839926556501418e-05, + "loss": 0.0024, + "step": 5296 + }, + { + "epoch": 22.65, + "learning_rate": 8.84660323819062e-05, + "loss": 0.0026, + "step": 5300 + }, + { + "epoch": 22.67, + "learning_rate": 8.85327991987982e-05, + "loss": 0.0032, + "step": 5304 + }, + { + "epoch": 22.68, + "learning_rate": 8.85995660156902e-05, + "loss": 0.0031, + "step": 5308 + }, + { + "epoch": 22.7, + "learning_rate": 8.866633283258221e-05, + "loss": 0.0068, + "step": 5312 + }, + { + "epoch": 22.72, + "learning_rate": 8.873309964947421e-05, + "loss": 0.0037, + "step": 5316 + }, + { + "epoch": 22.74, + "learning_rate": 8.879986646636623e-05, + "loss": 0.0068, + "step": 5320 + }, + { + "epoch": 22.75, + "learning_rate": 8.886663328325823e-05, + "loss": 0.0025, + "step": 5324 + }, + { + "epoch": 22.77, + "learning_rate": 8.893340010015023e-05, + "loss": 0.0056, + "step": 5328 + }, + { + "epoch": 22.79, + "learning_rate": 8.900016691704224e-05, + "loss": 0.0053, + "step": 5332 + }, + { + "epoch": 22.8, + "learning_rate": 8.906693373393424e-05, + "loss": 0.0053, + "step": 5336 + }, + { + "epoch": 22.82, + "learning_rate": 8.913370055082625e-05, + "loss": 0.003, + "step": 5340 + }, + { + "epoch": 22.84, + "learning_rate": 8.920046736771824e-05, + "loss": 0.0052, + "step": 5344 + }, + { + "epoch": 22.85, + "learning_rate": 8.926723418461025e-05, + "loss": 0.0072, + "step": 5348 + }, + { + "epoch": 22.87, + "learning_rate": 8.933400100150225e-05, + "loss": 0.0034, + "step": 5352 + }, + { + "epoch": 22.89, + "learning_rate": 8.940076781839426e-05, + "loss": 0.0027, + "step": 5356 + }, + { + "epoch": 22.91, + "learning_rate": 8.946753463528627e-05, + "loss": 0.0032, + "step": 5360 + }, + { + "epoch": 22.92, + "learning_rate": 8.953430145217827e-05, + "loss": 0.0038, + "step": 5364 + }, + { + "epoch": 22.94, + "learning_rate": 8.960106826907027e-05, + "loss": 0.0036, + "step": 5368 + }, + { + "epoch": 22.96, + "learning_rate": 8.966783508596228e-05, + "loss": 0.003, + "step": 5372 + }, + { + "epoch": 22.97, + "learning_rate": 8.973460190285429e-05, + "loss": 0.0026, + "step": 5376 + }, + { + "epoch": 22.99, + "learning_rate": 8.98013687197463e-05, + "loss": 0.0054, + "step": 5380 + }, + { + "epoch": 23.01, + "learning_rate": 8.98681355366383e-05, + "loss": 0.0027, + "step": 5384 + }, + { + "epoch": 23.03, + "learning_rate": 8.99349023535303e-05, + "loss": 0.0022, + "step": 5388 + }, + { + "epoch": 23.04, + "learning_rate": 9.000166917042232e-05, + "loss": 0.0048, + "step": 5392 + }, + { + "epoch": 23.06, + "learning_rate": 9.006843598731432e-05, + "loss": 0.0034, + "step": 5396 + }, + { + "epoch": 23.08, + "learning_rate": 9.013520280420631e-05, + "loss": 0.0033, + "step": 5400 + }, + { + "epoch": 23.09, + "learning_rate": 9.020196962109831e-05, + "loss": 0.0023, + "step": 5404 + }, + { + "epoch": 23.11, + "learning_rate": 9.026873643799033e-05, + "loss": 0.0026, + "step": 5408 + }, + { + "epoch": 23.13, + "learning_rate": 9.033550325488233e-05, + "loss": 0.002, + "step": 5412 + }, + { + "epoch": 23.15, + "learning_rate": 9.040227007177433e-05, + "loss": 0.0022, + "step": 5416 + }, + { + "epoch": 23.16, + "learning_rate": 9.046903688866633e-05, + "loss": 0.0075, + "step": 5420 + }, + { + "epoch": 23.18, + "learning_rate": 9.053580370555834e-05, + "loss": 0.0057, + "step": 5424 + }, + { + "epoch": 23.2, + "learning_rate": 9.060257052245035e-05, + "loss": 0.0016, + "step": 5428 + }, + { + "epoch": 23.21, + "learning_rate": 9.066933733934235e-05, + "loss": 0.0063, + "step": 5432 + }, + { + "epoch": 23.23, + "learning_rate": 9.073610415623436e-05, + "loss": 0.003, + "step": 5436 + }, + { + "epoch": 23.25, + "learning_rate": 9.080287097312636e-05, + "loss": 0.004, + "step": 5440 + }, + { + "epoch": 23.26, + "learning_rate": 9.086963779001836e-05, + "loss": 0.0035, + "step": 5444 + }, + { + "epoch": 23.28, + "learning_rate": 9.093640460691038e-05, + "loss": 0.0033, + "step": 5448 + }, + { + "epoch": 23.3, + "learning_rate": 9.100317142380237e-05, + "loss": 0.0062, + "step": 5452 + }, + { + "epoch": 23.32, + "learning_rate": 9.106993824069437e-05, + "loss": 0.0029, + "step": 5456 + }, + { + "epoch": 23.33, + "learning_rate": 9.113670505758639e-05, + "loss": 0.0055, + "step": 5460 + }, + { + "epoch": 23.35, + "learning_rate": 9.120347187447839e-05, + "loss": 0.0037, + "step": 5464 + }, + { + "epoch": 23.37, + "learning_rate": 9.127023869137039e-05, + "loss": 0.0021, + "step": 5468 + }, + { + "epoch": 23.38, + "learning_rate": 9.13370055082624e-05, + "loss": 0.0035, + "step": 5472 + }, + { + "epoch": 23.4, + "learning_rate": 9.14037723251544e-05, + "loss": 0.0022, + "step": 5476 + }, + { + "epoch": 23.42, + "learning_rate": 9.147053914204641e-05, + "loss": 0.0019, + "step": 5480 + }, + { + "epoch": 23.44, + "learning_rate": 9.153730595893842e-05, + "loss": 0.004, + "step": 5484 + }, + { + "epoch": 23.45, + "learning_rate": 9.160407277583042e-05, + "loss": 0.0026, + "step": 5488 + }, + { + "epoch": 23.47, + "learning_rate": 9.167083959272242e-05, + "loss": 0.0039, + "step": 5492 + }, + { + "epoch": 23.49, + "learning_rate": 9.173760640961442e-05, + "loss": 0.0048, + "step": 5496 + }, + { + "epoch": 23.5, + "learning_rate": 9.180437322650644e-05, + "loss": 0.0026, + "step": 5500 + }, + { + "epoch": 23.52, + "learning_rate": 9.187114004339844e-05, + "loss": 0.005, + "step": 5504 + }, + { + "epoch": 23.54, + "learning_rate": 9.193790686029043e-05, + "loss": 0.0035, + "step": 5508 + }, + { + "epoch": 23.56, + "learning_rate": 9.200467367718243e-05, + "loss": 0.0015, + "step": 5512 + }, + { + "epoch": 23.57, + "learning_rate": 9.207144049407445e-05, + "loss": 0.005, + "step": 5516 + }, + { + "epoch": 23.59, + "learning_rate": 9.213820731096645e-05, + "loss": 0.0016, + "step": 5520 + }, + { + "epoch": 23.61, + "learning_rate": 9.220497412785846e-05, + "loss": 0.0032, + "step": 5524 + }, + { + "epoch": 23.62, + "learning_rate": 9.227174094475046e-05, + "loss": 0.0027, + "step": 5528 + }, + { + "epoch": 23.64, + "learning_rate": 9.233850776164246e-05, + "loss": 0.0033, + "step": 5532 + }, + { + "epoch": 23.66, + "learning_rate": 9.240527457853448e-05, + "loss": 0.0021, + "step": 5536 + }, + { + "epoch": 23.68, + "learning_rate": 9.247204139542648e-05, + "loss": 0.0024, + "step": 5540 + }, + { + "epoch": 23.69, + "learning_rate": 9.253880821231848e-05, + "loss": 0.0036, + "step": 5544 + }, + { + "epoch": 23.71, + "learning_rate": 9.260557502921048e-05, + "loss": 0.0051, + "step": 5548 + }, + { + "epoch": 23.73, + "learning_rate": 9.26723418461025e-05, + "loss": 0.0022, + "step": 5552 + }, + { + "epoch": 23.74, + "learning_rate": 9.27391086629945e-05, + "loss": 0.0042, + "step": 5556 + }, + { + "epoch": 23.76, + "learning_rate": 9.28058754798865e-05, + "loss": 0.003, + "step": 5560 + }, + { + "epoch": 23.78, + "learning_rate": 9.28726422967785e-05, + "loss": 0.0024, + "step": 5564 + }, + { + "epoch": 23.79, + "learning_rate": 9.293940911367051e-05, + "loss": 0.0043, + "step": 5568 + }, + { + "epoch": 23.81, + "learning_rate": 9.300617593056251e-05, + "loss": 0.0047, + "step": 5572 + }, + { + "epoch": 23.83, + "learning_rate": 9.307294274745452e-05, + "loss": 0.0029, + "step": 5576 + }, + { + "epoch": 23.85, + "learning_rate": 9.313970956434652e-05, + "loss": 0.0028, + "step": 5580 + }, + { + "epoch": 23.86, + "learning_rate": 9.320647638123852e-05, + "loss": 0.0057, + "step": 5584 + }, + { + "epoch": 23.88, + "learning_rate": 9.327324319813054e-05, + "loss": 0.0042, + "step": 5588 + }, + { + "epoch": 23.9, + "learning_rate": 9.334001001502254e-05, + "loss": 0.0024, + "step": 5592 + }, + { + "epoch": 23.91, + "learning_rate": 9.340677683191454e-05, + "loss": 0.0037, + "step": 5596 + }, + { + "epoch": 23.93, + "learning_rate": 9.347354364880655e-05, + "loss": 0.005, + "step": 5600 + }, + { + "epoch": 23.95, + "learning_rate": 9.354031046569855e-05, + "loss": 0.006, + "step": 5604 + }, + { + "epoch": 23.97, + "learning_rate": 9.360707728259056e-05, + "loss": 0.0077, + "step": 5608 + }, + { + "epoch": 23.98, + "learning_rate": 9.367384409948257e-05, + "loss": 0.0032, + "step": 5612 + }, + { + "epoch": 24.0, + "learning_rate": 9.374061091637457e-05, + "loss": 0.0043, + "step": 5616 + }, + { + "epoch": 24.02, + "learning_rate": 9.380737773326657e-05, + "loss": 0.0035, + "step": 5620 + }, + { + "epoch": 24.03, + "learning_rate": 9.387414455015858e-05, + "loss": 0.0032, + "step": 5624 + }, + { + "epoch": 24.05, + "learning_rate": 9.394091136705058e-05, + "loss": 0.0015, + "step": 5628 + }, + { + "epoch": 24.07, + "learning_rate": 9.400767818394258e-05, + "loss": 0.0036, + "step": 5632 + }, + { + "epoch": 24.09, + "learning_rate": 9.407444500083458e-05, + "loss": 0.0022, + "step": 5636 + }, + { + "epoch": 24.1, + "learning_rate": 9.41412118177266e-05, + "loss": 0.0055, + "step": 5640 + }, + { + "epoch": 24.12, + "learning_rate": 9.42079786346186e-05, + "loss": 0.0033, + "step": 5644 + }, + { + "epoch": 24.14, + "learning_rate": 9.42747454515106e-05, + "loss": 0.0061, + "step": 5648 + }, + { + "epoch": 24.15, + "learning_rate": 9.434151226840261e-05, + "loss": 0.0037, + "step": 5652 + }, + { + "epoch": 24.17, + "learning_rate": 9.440827908529461e-05, + "loss": 0.004, + "step": 5656 + }, + { + "epoch": 24.19, + "learning_rate": 9.447504590218663e-05, + "loss": 0.0035, + "step": 5660 + }, + { + "epoch": 24.21, + "learning_rate": 9.454181271907863e-05, + "loss": 0.005, + "step": 5664 + }, + { + "epoch": 24.22, + "learning_rate": 9.460857953597063e-05, + "loss": 0.002, + "step": 5668 + }, + { + "epoch": 24.24, + "learning_rate": 9.467534635286263e-05, + "loss": 0.0067, + "step": 5672 + }, + { + "epoch": 24.26, + "learning_rate": 9.474211316975464e-05, + "loss": 0.005, + "step": 5676 + }, + { + "epoch": 24.27, + "learning_rate": 9.480887998664664e-05, + "loss": 0.0032, + "step": 5680 + }, + { + "epoch": 24.29, + "learning_rate": 9.487564680353864e-05, + "loss": 0.0039, + "step": 5684 + }, + { + "epoch": 24.31, + "learning_rate": 9.494241362043064e-05, + "loss": 0.0032, + "step": 5688 + }, + { + "epoch": 24.32, + "learning_rate": 9.500918043732265e-05, + "loss": 0.0042, + "step": 5692 + }, + { + "epoch": 24.34, + "learning_rate": 9.507594725421466e-05, + "loss": 0.0044, + "step": 5696 + }, + { + "epoch": 24.36, + "learning_rate": 9.514271407110667e-05, + "loss": 0.0024, + "step": 5700 + }, + { + "epoch": 24.38, + "learning_rate": 9.520948088799867e-05, + "loss": 0.005, + "step": 5704 + }, + { + "epoch": 24.39, + "learning_rate": 9.527624770489067e-05, + "loss": 0.0025, + "step": 5708 + }, + { + "epoch": 24.41, + "learning_rate": 9.534301452178269e-05, + "loss": 0.0047, + "step": 5712 + }, + { + "epoch": 24.43, + "learning_rate": 9.540978133867469e-05, + "loss": 0.003, + "step": 5716 + }, + { + "epoch": 24.44, + "learning_rate": 9.547654815556669e-05, + "loss": 0.0025, + "step": 5720 + }, + { + "epoch": 24.46, + "learning_rate": 9.55433149724587e-05, + "loss": 0.0034, + "step": 5724 + }, + { + "epoch": 24.48, + "learning_rate": 9.56100817893507e-05, + "loss": 0.004, + "step": 5728 + }, + { + "epoch": 24.5, + "learning_rate": 9.56768486062427e-05, + "loss": 0.0024, + "step": 5732 + }, + { + "epoch": 24.51, + "learning_rate": 9.57436154231347e-05, + "loss": 0.0027, + "step": 5736 + }, + { + "epoch": 24.53, + "learning_rate": 9.58103822400267e-05, + "loss": 0.0026, + "step": 5740 + }, + { + "epoch": 24.55, + "learning_rate": 9.587714905691871e-05, + "loss": 0.0017, + "step": 5744 + }, + { + "epoch": 24.56, + "learning_rate": 9.594391587381072e-05, + "loss": 0.0067, + "step": 5748 + }, + { + "epoch": 24.58, + "learning_rate": 9.601068269070273e-05, + "loss": 0.0032, + "step": 5752 + }, + { + "epoch": 24.6, + "learning_rate": 9.607744950759473e-05, + "loss": 0.0078, + "step": 5756 + }, + { + "epoch": 24.62, + "learning_rate": 9.614421632448673e-05, + "loss": 0.0082, + "step": 5760 + }, + { + "epoch": 24.63, + "learning_rate": 9.621098314137873e-05, + "loss": 0.0049, + "step": 5764 + }, + { + "epoch": 24.65, + "learning_rate": 9.627774995827075e-05, + "loss": 0.0036, + "step": 5768 + }, + { + "epoch": 24.67, + "learning_rate": 9.634451677516275e-05, + "loss": 0.0045, + "step": 5772 + }, + { + "epoch": 24.68, + "learning_rate": 9.641128359205476e-05, + "loss": 0.0068, + "step": 5776 + }, + { + "epoch": 24.7, + "learning_rate": 9.647805040894676e-05, + "loss": 0.007, + "step": 5780 + }, + { + "epoch": 24.72, + "learning_rate": 9.654481722583876e-05, + "loss": 0.0026, + "step": 5784 + }, + { + "epoch": 24.74, + "learning_rate": 9.661158404273076e-05, + "loss": 0.0066, + "step": 5788 + }, + { + "epoch": 24.75, + "learning_rate": 9.667835085962277e-05, + "loss": 0.002, + "step": 5792 + }, + { + "epoch": 24.77, + "learning_rate": 9.674511767651477e-05, + "loss": 0.0017, + "step": 5796 + }, + { + "epoch": 24.79, + "learning_rate": 9.681188449340679e-05, + "loss": 0.0021, + "step": 5800 + }, + { + "epoch": 24.8, + "learning_rate": 9.687865131029879e-05, + "loss": 0.0039, + "step": 5804 + }, + { + "epoch": 24.82, + "learning_rate": 9.694541812719079e-05, + "loss": 0.0055, + "step": 5808 + }, + { + "epoch": 24.84, + "learning_rate": 9.701218494408279e-05, + "loss": 0.0042, + "step": 5812 + }, + { + "epoch": 24.85, + "learning_rate": 9.70789517609748e-05, + "loss": 0.0037, + "step": 5816 + }, + { + "epoch": 24.87, + "learning_rate": 9.714571857786681e-05, + "loss": 0.0039, + "step": 5820 + }, + { + "epoch": 24.89, + "learning_rate": 9.721248539475881e-05, + "loss": 0.0026, + "step": 5824 + }, + { + "epoch": 24.91, + "learning_rate": 9.727925221165082e-05, + "loss": 0.0043, + "step": 5828 + }, + { + "epoch": 24.92, + "learning_rate": 9.734601902854282e-05, + "loss": 0.0046, + "step": 5832 + }, + { + "epoch": 24.94, + "learning_rate": 9.741278584543482e-05, + "loss": 0.0033, + "step": 5836 + }, + { + "epoch": 24.96, + "learning_rate": 9.747955266232682e-05, + "loss": 0.004, + "step": 5840 + }, + { + "epoch": 24.97, + "learning_rate": 9.754631947921883e-05, + "loss": 0.0051, + "step": 5844 + }, + { + "epoch": 24.99, + "learning_rate": 9.761308629611083e-05, + "loss": 0.0041, + "step": 5848 + }, + { + "epoch": 25.01, + "learning_rate": 9.767985311300283e-05, + "loss": 0.0039, + "step": 5852 + }, + { + "epoch": 25.03, + "learning_rate": 9.774661992989485e-05, + "loss": 0.0039, + "step": 5856 + }, + { + "epoch": 25.04, + "learning_rate": 9.781338674678685e-05, + "loss": 0.0026, + "step": 5860 + }, + { + "epoch": 25.06, + "learning_rate": 9.788015356367885e-05, + "loss": 0.0023, + "step": 5864 + }, + { + "epoch": 25.08, + "learning_rate": 9.794692038057086e-05, + "loss": 0.0033, + "step": 5868 + }, + { + "epoch": 25.09, + "learning_rate": 9.801368719746287e-05, + "loss": 0.0032, + "step": 5872 + }, + { + "epoch": 25.11, + "learning_rate": 9.808045401435488e-05, + "loss": 0.0048, + "step": 5876 + }, + { + "epoch": 25.13, + "learning_rate": 9.814722083124688e-05, + "loss": 0.0074, + "step": 5880 + }, + { + "epoch": 25.15, + "learning_rate": 9.821398764813888e-05, + "loss": 0.0021, + "step": 5884 + }, + { + "epoch": 25.16, + "learning_rate": 9.828075446503088e-05, + "loss": 0.0019, + "step": 5888 + }, + { + "epoch": 25.18, + "learning_rate": 9.83475212819229e-05, + "loss": 0.0014, + "step": 5892 + }, + { + "epoch": 25.2, + "learning_rate": 9.841428809881489e-05, + "loss": 0.0038, + "step": 5896 + }, + { + "epoch": 25.21, + "learning_rate": 9.848105491570689e-05, + "loss": 0.0046, + "step": 5900 + }, + { + "epoch": 25.23, + "learning_rate": 9.85478217325989e-05, + "loss": 0.0021, + "step": 5904 + }, + { + "epoch": 25.25, + "learning_rate": 9.861458854949091e-05, + "loss": 0.0037, + "step": 5908 + }, + { + "epoch": 25.26, + "learning_rate": 9.868135536638291e-05, + "loss": 0.0019, + "step": 5912 + }, + { + "epoch": 25.28, + "learning_rate": 9.874812218327492e-05, + "loss": 0.0013, + "step": 5916 + }, + { + "epoch": 25.3, + "learning_rate": 9.881488900016692e-05, + "loss": 0.0026, + "step": 5920 + }, + { + "epoch": 25.32, + "learning_rate": 9.888165581705892e-05, + "loss": 0.0034, + "step": 5924 + }, + { + "epoch": 25.33, + "learning_rate": 9.894842263395094e-05, + "loss": 0.0028, + "step": 5928 + }, + { + "epoch": 25.35, + "learning_rate": 9.901518945084294e-05, + "loss": 0.0069, + "step": 5932 + }, + { + "epoch": 25.37, + "learning_rate": 9.908195626773494e-05, + "loss": 0.0031, + "step": 5936 + }, + { + "epoch": 25.38, + "learning_rate": 9.914872308462694e-05, + "loss": 0.0047, + "step": 5940 + }, + { + "epoch": 25.4, + "learning_rate": 9.921548990151896e-05, + "loss": 0.0029, + "step": 5944 + }, + { + "epoch": 25.42, + "learning_rate": 9.928225671841096e-05, + "loss": 0.0026, + "step": 5948 + }, + { + "epoch": 25.44, + "learning_rate": 9.934902353530295e-05, + "loss": 0.002, + "step": 5952 + }, + { + "epoch": 25.45, + "learning_rate": 9.941579035219495e-05, + "loss": 0.0019, + "step": 5956 + }, + { + "epoch": 25.47, + "learning_rate": 9.948255716908697e-05, + "loss": 0.0021, + "step": 5960 + }, + { + "epoch": 25.49, + "learning_rate": 9.954932398597897e-05, + "loss": 0.0015, + "step": 5964 + }, + { + "epoch": 25.5, + "learning_rate": 9.961609080287098e-05, + "loss": 0.0042, + "step": 5968 + }, + { + "epoch": 25.52, + "learning_rate": 9.968285761976298e-05, + "loss": 0.0036, + "step": 5972 + }, + { + "epoch": 25.54, + "learning_rate": 9.974962443665498e-05, + "loss": 0.005, + "step": 5976 + }, + { + "epoch": 25.56, + "learning_rate": 9.9816391253547e-05, + "loss": 0.0019, + "step": 5980 + }, + { + "epoch": 25.57, + "learning_rate": 9.9883158070439e-05, + "loss": 0.0046, + "step": 5984 + }, + { + "epoch": 25.59, + "learning_rate": 9.9949924887331e-05, + "loss": 0.0044, + "step": 5988 + }, + { + "epoch": 25.61, + "learning_rate": 9.999999991511074e-05, + "loss": 0.002, + "step": 5992 + }, + { + "epoch": 25.62, + "learning_rate": 9.999999787776848e-05, + "loss": 0.004, + "step": 5996 + }, + { + "epoch": 25.64, + "learning_rate": 9.999999312396997e-05, + "loss": 0.0056, + "step": 6000 + }, + { + "epoch": 25.64, + "eval_exact_match": 0.5, + "eval_loss": 0.6754465699195862, + "eval_runtime": 141.2031, + "eval_samples_per_second": 6.813, + "step": 6000 + }, + { + "epoch": 25.66, + "learning_rate": 9.999998565371547e-05, + "loss": 0.0071, + "step": 6004 + }, + { + "epoch": 25.68, + "learning_rate": 9.999997546700539e-05, + "loss": 0.0036, + "step": 6008 + }, + { + "epoch": 25.69, + "learning_rate": 9.999996256384028e-05, + "loss": 0.0021, + "step": 6012 + }, + { + "epoch": 25.71, + "learning_rate": 9.999994694422086e-05, + "loss": 0.0037, + "step": 6016 + }, + { + "epoch": 25.73, + "learning_rate": 9.999992860814793e-05, + "loss": 0.0028, + "step": 6020 + }, + { + "epoch": 25.74, + "learning_rate": 9.999990755562253e-05, + "loss": 0.0073, + "step": 6024 + }, + { + "epoch": 25.76, + "learning_rate": 9.999988378664581e-05, + "loss": 0.0037, + "step": 6028 + }, + { + "epoch": 25.78, + "learning_rate": 9.999985730121903e-05, + "loss": 0.0035, + "step": 6032 + }, + { + "epoch": 25.79, + "learning_rate": 9.999982809934364e-05, + "loss": 0.004, + "step": 6036 + }, + { + "epoch": 25.81, + "learning_rate": 9.999979618102123e-05, + "loss": 0.0018, + "step": 6040 + }, + { + "epoch": 25.83, + "learning_rate": 9.999976154625354e-05, + "loss": 0.0028, + "step": 6044 + }, + { + "epoch": 25.85, + "learning_rate": 9.999972419504244e-05, + "loss": 0.003, + "step": 6048 + }, + { + "epoch": 25.86, + "learning_rate": 9.999968412738995e-05, + "loss": 0.0025, + "step": 6052 + }, + { + "epoch": 25.88, + "learning_rate": 9.999964134329827e-05, + "loss": 0.0024, + "step": 6056 + }, + { + "epoch": 25.9, + "learning_rate": 9.999959584276973e-05, + "loss": 0.0038, + "step": 6060 + }, + { + "epoch": 25.91, + "learning_rate": 9.999954762580675e-05, + "loss": 0.0027, + "step": 6064 + }, + { + "epoch": 25.93, + "learning_rate": 9.999949669241202e-05, + "loss": 0.0019, + "step": 6068 + }, + { + "epoch": 25.95, + "learning_rate": 9.999944304258826e-05, + "loss": 0.0034, + "step": 6072 + }, + { + "epoch": 25.97, + "learning_rate": 9.999938667633839e-05, + "loss": 0.0026, + "step": 6076 + }, + { + "epoch": 25.98, + "learning_rate": 9.999932759366551e-05, + "loss": 0.0041, + "step": 6080 + }, + { + "epoch": 26.0, + "learning_rate": 9.999926579457277e-05, + "loss": 0.0026, + "step": 6084 + }, + { + "epoch": 26.02, + "learning_rate": 9.999920127906357e-05, + "loss": 0.0028, + "step": 6088 + }, + { + "epoch": 26.03, + "learning_rate": 9.999913404714139e-05, + "loss": 0.0047, + "step": 6092 + }, + { + "epoch": 26.05, + "learning_rate": 9.999906409880991e-05, + "loss": 0.0028, + "step": 6096 + }, + { + "epoch": 26.07, + "learning_rate": 9.999899143407291e-05, + "loss": 0.0046, + "step": 6100 + }, + { + "epoch": 26.09, + "learning_rate": 9.999891605293434e-05, + "loss": 0.0027, + "step": 6104 + }, + { + "epoch": 26.1, + "learning_rate": 9.999883795539832e-05, + "loss": 0.0024, + "step": 6108 + }, + { + "epoch": 26.12, + "learning_rate": 9.999875714146904e-05, + "loss": 0.0074, + "step": 6112 + }, + { + "epoch": 26.14, + "learning_rate": 9.999867361115096e-05, + "loss": 0.003, + "step": 6116 + }, + { + "epoch": 26.15, + "learning_rate": 9.999858736444856e-05, + "loss": 0.0034, + "step": 6120 + }, + { + "epoch": 26.17, + "learning_rate": 9.999849840136655e-05, + "loss": 0.0035, + "step": 6124 + }, + { + "epoch": 26.19, + "learning_rate": 9.999840672190975e-05, + "loss": 0.0023, + "step": 6128 + }, + { + "epoch": 26.21, + "learning_rate": 9.999831232608316e-05, + "loss": 0.0037, + "step": 6132 + }, + { + "epoch": 26.22, + "learning_rate": 9.99982152138919e-05, + "loss": 0.0031, + "step": 6136 + }, + { + "epoch": 26.24, + "learning_rate": 9.999811538534123e-05, + "loss": 0.0053, + "step": 6140 + }, + { + "epoch": 26.26, + "learning_rate": 9.99980128404366e-05, + "loss": 0.0042, + "step": 6144 + }, + { + "epoch": 26.27, + "learning_rate": 9.999790757918357e-05, + "loss": 0.0031, + "step": 6148 + }, + { + "epoch": 26.29, + "learning_rate": 9.999779960158786e-05, + "loss": 0.0015, + "step": 6152 + }, + { + "epoch": 26.31, + "learning_rate": 9.999768890765533e-05, + "loss": 0.0042, + "step": 6156 + }, + { + "epoch": 26.32, + "learning_rate": 9.999757549739201e-05, + "loss": 0.0029, + "step": 6160 + }, + { + "epoch": 26.34, + "learning_rate": 9.999745937080403e-05, + "loss": 0.0037, + "step": 6164 + }, + { + "epoch": 26.36, + "learning_rate": 9.999734052789775e-05, + "loss": 0.0027, + "step": 6168 + }, + { + "epoch": 26.38, + "learning_rate": 9.999721896867957e-05, + "loss": 0.0019, + "step": 6172 + }, + { + "epoch": 26.39, + "learning_rate": 9.999709469315612e-05, + "loss": 0.0031, + "step": 6176 + }, + { + "epoch": 26.41, + "learning_rate": 9.999696770133417e-05, + "loss": 0.0037, + "step": 6180 + }, + { + "epoch": 26.43, + "learning_rate": 9.999683799322059e-05, + "loss": 0.0018, + "step": 6184 + }, + { + "epoch": 26.44, + "learning_rate": 9.999670556882244e-05, + "loss": 0.0025, + "step": 6188 + }, + { + "epoch": 26.46, + "learning_rate": 9.99965704281469e-05, + "loss": 0.0022, + "step": 6192 + }, + { + "epoch": 26.48, + "learning_rate": 9.999643257120135e-05, + "loss": 0.0022, + "step": 6196 + }, + { + "epoch": 26.5, + "learning_rate": 9.999629199799323e-05, + "loss": 0.0032, + "step": 6200 + }, + { + "epoch": 26.51, + "learning_rate": 9.999614870853023e-05, + "loss": 0.0041, + "step": 6204 + }, + { + "epoch": 26.53, + "learning_rate": 9.999600270282007e-05, + "loss": 0.005, + "step": 6208 + }, + { + "epoch": 26.55, + "learning_rate": 9.999585398087074e-05, + "loss": 0.0037, + "step": 6212 + }, + { + "epoch": 26.56, + "learning_rate": 9.99957025426903e-05, + "loss": 0.005, + "step": 6216 + }, + { + "epoch": 26.58, + "learning_rate": 9.999554838828698e-05, + "loss": 0.0039, + "step": 6220 + }, + { + "epoch": 26.6, + "learning_rate": 9.999539151766915e-05, + "loss": 0.003, + "step": 6224 + }, + { + "epoch": 26.62, + "learning_rate": 9.999523193084532e-05, + "loss": 0.002, + "step": 6228 + }, + { + "epoch": 26.63, + "learning_rate": 9.999506962782418e-05, + "loss": 0.0018, + "step": 6232 + }, + { + "epoch": 26.65, + "learning_rate": 9.999490460861454e-05, + "loss": 0.0031, + "step": 6236 + }, + { + "epoch": 26.67, + "learning_rate": 9.999473687322538e-05, + "loss": 0.0029, + "step": 6240 + }, + { + "epoch": 26.68, + "learning_rate": 9.999456642166577e-05, + "loss": 0.0034, + "step": 6244 + }, + { + "epoch": 26.7, + "learning_rate": 9.999439325394504e-05, + "loss": 0.002, + "step": 6248 + }, + { + "epoch": 26.72, + "learning_rate": 9.999421737007254e-05, + "loss": 0.0023, + "step": 6252 + }, + { + "epoch": 26.74, + "learning_rate": 9.999403877005783e-05, + "loss": 0.003, + "step": 6256 + }, + { + "epoch": 26.75, + "learning_rate": 9.999385745391065e-05, + "loss": 0.0034, + "step": 6260 + }, + { + "epoch": 26.77, + "learning_rate": 9.999367342164083e-05, + "loss": 0.0033, + "step": 6264 + }, + { + "epoch": 26.79, + "learning_rate": 9.999348667325836e-05, + "loss": 0.0037, + "step": 6268 + }, + { + "epoch": 26.8, + "learning_rate": 9.99932972087734e-05, + "loss": 0.0022, + "step": 6272 + }, + { + "epoch": 26.82, + "learning_rate": 9.999310502819623e-05, + "loss": 0.0036, + "step": 6276 + }, + { + "epoch": 26.84, + "learning_rate": 9.999291013153729e-05, + "loss": 0.0065, + "step": 6280 + }, + { + "epoch": 26.85, + "learning_rate": 9.999271251880718e-05, + "loss": 0.0047, + "step": 6284 + }, + { + "epoch": 26.87, + "learning_rate": 9.999251219001662e-05, + "loss": 0.0015, + "step": 6288 + }, + { + "epoch": 26.89, + "learning_rate": 9.999230914517653e-05, + "loss": 0.0022, + "step": 6292 + }, + { + "epoch": 26.91, + "learning_rate": 9.99921033842979e-05, + "loss": 0.0016, + "step": 6296 + }, + { + "epoch": 26.92, + "learning_rate": 9.999189490739191e-05, + "loss": 0.0035, + "step": 6300 + }, + { + "epoch": 26.94, + "learning_rate": 9.999168371446993e-05, + "loss": 0.0022, + "step": 6304 + }, + { + "epoch": 26.96, + "learning_rate": 9.99914698055434e-05, + "loss": 0.0029, + "step": 6308 + }, + { + "epoch": 26.97, + "learning_rate": 9.999125318062394e-05, + "loss": 0.004, + "step": 6312 + }, + { + "epoch": 26.99, + "learning_rate": 9.999103383972333e-05, + "loss": 0.0027, + "step": 6316 + }, + { + "epoch": 27.01, + "learning_rate": 9.999081178285347e-05, + "loss": 0.0031, + "step": 6320 + }, + { + "epoch": 27.03, + "learning_rate": 9.999058701002645e-05, + "loss": 0.0044, + "step": 6324 + }, + { + "epoch": 27.04, + "learning_rate": 9.999035952125448e-05, + "loss": 0.0014, + "step": 6328 + }, + { + "epoch": 27.06, + "learning_rate": 9.99901293165499e-05, + "loss": 0.0071, + "step": 6332 + }, + { + "epoch": 27.08, + "learning_rate": 9.998989639592521e-05, + "loss": 0.0028, + "step": 6336 + }, + { + "epoch": 27.09, + "learning_rate": 9.998966075939309e-05, + "loss": 0.0018, + "step": 6340 + }, + { + "epoch": 27.11, + "learning_rate": 9.998942240696633e-05, + "loss": 0.0015, + "step": 6344 + }, + { + "epoch": 27.13, + "learning_rate": 9.998918133865788e-05, + "loss": 0.0018, + "step": 6348 + }, + { + "epoch": 27.15, + "learning_rate": 9.998893755448085e-05, + "loss": 0.0031, + "step": 6352 + }, + { + "epoch": 27.16, + "learning_rate": 9.998869105444845e-05, + "loss": 0.0053, + "step": 6356 + }, + { + "epoch": 27.18, + "learning_rate": 9.998844183857412e-05, + "loss": 0.0033, + "step": 6360 + }, + { + "epoch": 27.2, + "learning_rate": 9.998818990687135e-05, + "loss": 0.0021, + "step": 6364 + }, + { + "epoch": 27.21, + "learning_rate": 9.998793525935387e-05, + "loss": 0.002, + "step": 6368 + }, + { + "epoch": 27.23, + "learning_rate": 9.998767789603547e-05, + "loss": 0.0011, + "step": 6372 + }, + { + "epoch": 27.25, + "learning_rate": 9.998741781693017e-05, + "loss": 0.0034, + "step": 6376 + }, + { + "epoch": 27.26, + "learning_rate": 9.99871550220521e-05, + "loss": 0.0034, + "step": 6380 + }, + { + "epoch": 27.28, + "learning_rate": 9.998688951141552e-05, + "loss": 0.0018, + "step": 6384 + }, + { + "epoch": 27.3, + "learning_rate": 9.998662128503487e-05, + "loss": 0.0038, + "step": 6388 + }, + { + "epoch": 27.32, + "learning_rate": 9.998635034292469e-05, + "loss": 0.0009, + "step": 6392 + }, + { + "epoch": 27.33, + "learning_rate": 9.998607668509975e-05, + "loss": 0.0019, + "step": 6396 + }, + { + "epoch": 27.35, + "learning_rate": 9.998580031157486e-05, + "loss": 0.0036, + "step": 6400 + }, + { + "epoch": 27.37, + "learning_rate": 9.998552122236509e-05, + "loss": 0.0019, + "step": 6404 + }, + { + "epoch": 27.38, + "learning_rate": 9.998523941748556e-05, + "loss": 0.0038, + "step": 6408 + }, + { + "epoch": 27.4, + "learning_rate": 9.998495489695161e-05, + "loss": 0.0042, + "step": 6412 + }, + { + "epoch": 27.42, + "learning_rate": 9.998466766077868e-05, + "loss": 0.0027, + "step": 6416 + }, + { + "epoch": 27.44, + "learning_rate": 9.998437770898239e-05, + "loss": 0.0026, + "step": 6420 + }, + { + "epoch": 27.45, + "learning_rate": 9.998408504157849e-05, + "loss": 0.0027, + "step": 6424 + }, + { + "epoch": 27.47, + "learning_rate": 9.998378965858286e-05, + "loss": 0.0043, + "step": 6428 + }, + { + "epoch": 27.49, + "learning_rate": 9.998349156001156e-05, + "loss": 0.0031, + "step": 6432 + }, + { + "epoch": 27.5, + "learning_rate": 9.998319074588081e-05, + "loss": 0.0018, + "step": 6436 + }, + { + "epoch": 27.52, + "learning_rate": 9.998288721620692e-05, + "loss": 0.0033, + "step": 6440 + }, + { + "epoch": 27.54, + "learning_rate": 9.998258097100639e-05, + "loss": 0.0009, + "step": 6444 + }, + { + "epoch": 27.56, + "learning_rate": 9.998227201029586e-05, + "loss": 0.0035, + "step": 6448 + }, + { + "epoch": 27.57, + "learning_rate": 9.998196033409212e-05, + "loss": 0.0034, + "step": 6452 + }, + { + "epoch": 27.59, + "learning_rate": 9.998164594241208e-05, + "loss": 0.0023, + "step": 6456 + }, + { + "epoch": 27.61, + "learning_rate": 9.998132883527287e-05, + "loss": 0.0042, + "step": 6460 + }, + { + "epoch": 27.62, + "learning_rate": 9.998100901269167e-05, + "loss": 0.0011, + "step": 6464 + }, + { + "epoch": 27.64, + "learning_rate": 9.998068647468587e-05, + "loss": 0.0047, + "step": 6468 + }, + { + "epoch": 27.66, + "learning_rate": 9.9980361221273e-05, + "loss": 0.0023, + "step": 6472 + }, + { + "epoch": 27.68, + "learning_rate": 9.998003325247071e-05, + "loss": 0.0025, + "step": 6476 + }, + { + "epoch": 27.69, + "learning_rate": 9.997970256829687e-05, + "loss": 0.003, + "step": 6480 + }, + { + "epoch": 27.71, + "learning_rate": 9.997936916876939e-05, + "loss": 0.0016, + "step": 6484 + }, + { + "epoch": 27.73, + "learning_rate": 9.997903305390641e-05, + "loss": 0.0027, + "step": 6488 + }, + { + "epoch": 27.74, + "learning_rate": 9.997869422372618e-05, + "loss": 0.0038, + "step": 6492 + }, + { + "epoch": 27.76, + "learning_rate": 9.997835267824712e-05, + "loss": 0.0047, + "step": 6496 + }, + { + "epoch": 27.78, + "learning_rate": 9.997800841748777e-05, + "loss": 0.002, + "step": 6500 + }, + { + "epoch": 27.79, + "learning_rate": 9.997766144146685e-05, + "loss": 0.0065, + "step": 6504 + }, + { + "epoch": 27.81, + "learning_rate": 9.997731175020319e-05, + "loss": 0.0062, + "step": 6508 + }, + { + "epoch": 27.83, + "learning_rate": 9.997695934371583e-05, + "loss": 0.0043, + "step": 6512 + }, + { + "epoch": 27.85, + "learning_rate": 9.997660422202386e-05, + "loss": 0.006, + "step": 6516 + }, + { + "epoch": 27.86, + "learning_rate": 9.997624638514661e-05, + "loss": 0.0015, + "step": 6520 + }, + { + "epoch": 27.88, + "learning_rate": 9.997588583310352e-05, + "loss": 0.0029, + "step": 6524 + }, + { + "epoch": 27.9, + "learning_rate": 9.997552256591416e-05, + "loss": 0.0052, + "step": 6528 + }, + { + "epoch": 27.91, + "learning_rate": 9.997515658359828e-05, + "loss": 0.0014, + "step": 6532 + }, + { + "epoch": 27.93, + "learning_rate": 9.997478788617576e-05, + "loss": 0.0018, + "step": 6536 + }, + { + "epoch": 27.95, + "learning_rate": 9.997441647366663e-05, + "loss": 0.0025, + "step": 6540 + }, + { + "epoch": 27.97, + "learning_rate": 9.997404234609107e-05, + "loss": 0.0022, + "step": 6544 + }, + { + "epoch": 27.98, + "learning_rate": 9.997366550346943e-05, + "loss": 0.0025, + "step": 6548 + }, + { + "epoch": 28.0, + "learning_rate": 9.997328594582213e-05, + "loss": 0.0033, + "step": 6552 + }, + { + "epoch": 28.02, + "learning_rate": 9.997290367316982e-05, + "loss": 0.0018, + "step": 6556 + }, + { + "epoch": 28.03, + "learning_rate": 9.997251868553328e-05, + "loss": 0.0025, + "step": 6560 + }, + { + "epoch": 28.05, + "learning_rate": 9.997213098293341e-05, + "loss": 0.0038, + "step": 6564 + }, + { + "epoch": 28.07, + "learning_rate": 9.997174056539129e-05, + "loss": 0.0038, + "step": 6568 + }, + { + "epoch": 28.09, + "learning_rate": 9.997134743292811e-05, + "loss": 0.0027, + "step": 6572 + }, + { + "epoch": 28.1, + "learning_rate": 9.997095158556525e-05, + "loss": 0.0028, + "step": 6576 + }, + { + "epoch": 28.12, + "learning_rate": 9.997055302332419e-05, + "loss": 0.003, + "step": 6580 + }, + { + "epoch": 28.14, + "learning_rate": 9.99701517462266e-05, + "loss": 0.0031, + "step": 6584 + }, + { + "epoch": 28.15, + "learning_rate": 9.996974775429427e-05, + "loss": 0.0022, + "step": 6588 + }, + { + "epoch": 28.17, + "learning_rate": 9.996934104754916e-05, + "loss": 0.0034, + "step": 6592 + }, + { + "epoch": 28.19, + "learning_rate": 9.996893162601337e-05, + "loss": 0.0033, + "step": 6596 + }, + { + "epoch": 28.21, + "learning_rate": 9.996851948970915e-05, + "loss": 0.0024, + "step": 6600 + }, + { + "epoch": 28.22, + "learning_rate": 9.996810463865886e-05, + "loss": 0.0016, + "step": 6604 + }, + { + "epoch": 28.24, + "learning_rate": 9.996768707288506e-05, + "loss": 0.0049, + "step": 6608 + }, + { + "epoch": 28.26, + "learning_rate": 9.996726679241043e-05, + "loss": 0.0062, + "step": 6612 + }, + { + "epoch": 28.27, + "learning_rate": 9.99668437972578e-05, + "loss": 0.0058, + "step": 6616 + }, + { + "epoch": 28.29, + "learning_rate": 9.996641808745017e-05, + "loss": 0.0044, + "step": 6620 + }, + { + "epoch": 28.31, + "learning_rate": 9.996598966301065e-05, + "loss": 0.0026, + "step": 6624 + }, + { + "epoch": 28.32, + "learning_rate": 9.996555852396252e-05, + "loss": 0.002, + "step": 6628 + }, + { + "epoch": 28.34, + "learning_rate": 9.996512467032922e-05, + "loss": 0.0022, + "step": 6632 + }, + { + "epoch": 28.36, + "learning_rate": 9.996468810213428e-05, + "loss": 0.0025, + "step": 6636 + }, + { + "epoch": 28.38, + "learning_rate": 9.996424881940146e-05, + "loss": 0.0037, + "step": 6640 + }, + { + "epoch": 28.39, + "learning_rate": 9.996380682215461e-05, + "loss": 0.0019, + "step": 6644 + }, + { + "epoch": 28.41, + "learning_rate": 9.996336211041773e-05, + "loss": 0.0028, + "step": 6648 + }, + { + "epoch": 28.43, + "learning_rate": 9.996291468421501e-05, + "loss": 0.0028, + "step": 6652 + }, + { + "epoch": 28.44, + "learning_rate": 9.996246454357073e-05, + "loss": 0.0025, + "step": 6656 + }, + { + "epoch": 28.46, + "learning_rate": 9.996201168850936e-05, + "loss": 0.0007, + "step": 6660 + }, + { + "epoch": 28.48, + "learning_rate": 9.996155611905551e-05, + "loss": 0.002, + "step": 6664 + }, + { + "epoch": 28.5, + "learning_rate": 9.99610978352339e-05, + "loss": 0.0035, + "step": 6668 + }, + { + "epoch": 28.51, + "learning_rate": 9.996063683706946e-05, + "loss": 0.0032, + "step": 6672 + }, + { + "epoch": 28.53, + "learning_rate": 9.996017312458724e-05, + "loss": 0.0017, + "step": 6676 + }, + { + "epoch": 28.55, + "learning_rate": 9.995970669781241e-05, + "loss": 0.0016, + "step": 6680 + }, + { + "epoch": 28.56, + "learning_rate": 9.995923755677033e-05, + "loss": 0.0049, + "step": 6684 + }, + { + "epoch": 28.58, + "learning_rate": 9.995876570148646e-05, + "loss": 0.0024, + "step": 6688 + }, + { + "epoch": 28.6, + "learning_rate": 9.995829113198645e-05, + "loss": 0.0041, + "step": 6692 + }, + { + "epoch": 28.62, + "learning_rate": 9.995781384829609e-05, + "loss": 0.0042, + "step": 6696 + }, + { + "epoch": 28.63, + "learning_rate": 9.99573338504413e-05, + "loss": 0.0036, + "step": 6700 + }, + { + "epoch": 28.65, + "learning_rate": 9.995685113844818e-05, + "loss": 0.0018, + "step": 6704 + }, + { + "epoch": 28.67, + "learning_rate": 9.995636571234293e-05, + "loss": 0.0019, + "step": 6708 + }, + { + "epoch": 28.68, + "learning_rate": 9.995587757215194e-05, + "loss": 0.0012, + "step": 6712 + }, + { + "epoch": 28.7, + "learning_rate": 9.995538671790171e-05, + "loss": 0.0021, + "step": 6716 + }, + { + "epoch": 28.72, + "learning_rate": 9.995489314961892e-05, + "loss": 0.0041, + "step": 6720 + }, + { + "epoch": 28.74, + "learning_rate": 9.995439686733039e-05, + "loss": 0.0026, + "step": 6724 + }, + { + "epoch": 28.75, + "learning_rate": 9.995389787106309e-05, + "loss": 0.0012, + "step": 6728 + }, + { + "epoch": 28.77, + "learning_rate": 9.995339616084409e-05, + "loss": 0.0023, + "step": 6732 + }, + { + "epoch": 28.79, + "learning_rate": 9.995289173670071e-05, + "loss": 0.002, + "step": 6736 + }, + { + "epoch": 28.8, + "learning_rate": 9.995238459866029e-05, + "loss": 0.0035, + "step": 6740 + }, + { + "epoch": 28.82, + "learning_rate": 9.995187474675042e-05, + "loss": 0.0012, + "step": 6744 + }, + { + "epoch": 28.84, + "learning_rate": 9.995136218099878e-05, + "loss": 0.0016, + "step": 6748 + }, + { + "epoch": 28.85, + "learning_rate": 9.995084690143326e-05, + "loss": 0.0047, + "step": 6752 + }, + { + "epoch": 28.87, + "learning_rate": 9.99503289080818e-05, + "loss": 0.0015, + "step": 6756 + }, + { + "epoch": 28.89, + "learning_rate": 9.994980820097258e-05, + "loss": 0.0055, + "step": 6760 + }, + { + "epoch": 28.91, + "learning_rate": 9.994928478013387e-05, + "loss": 0.0013, + "step": 6764 + }, + { + "epoch": 28.92, + "learning_rate": 9.994875864559413e-05, + "loss": 0.0028, + "step": 6768 + }, + { + "epoch": 28.94, + "learning_rate": 9.99482297973819e-05, + "loss": 0.0032, + "step": 6772 + }, + { + "epoch": 28.96, + "learning_rate": 9.994769823552594e-05, + "loss": 0.0039, + "step": 6776 + }, + { + "epoch": 28.97, + "learning_rate": 9.994716396005515e-05, + "loss": 0.002, + "step": 6780 + }, + { + "epoch": 28.99, + "learning_rate": 9.994662697099851e-05, + "loss": 0.0041, + "step": 6784 + }, + { + "epoch": 29.01, + "learning_rate": 9.994608726838523e-05, + "loss": 0.0013, + "step": 6788 + }, + { + "epoch": 29.03, + "learning_rate": 9.994554485224463e-05, + "loss": 0.001, + "step": 6792 + }, + { + "epoch": 29.04, + "learning_rate": 9.994499972260616e-05, + "loss": 0.0017, + "step": 6796 + }, + { + "epoch": 29.06, + "learning_rate": 9.994445187949944e-05, + "loss": 0.0041, + "step": 6800 + }, + { + "epoch": 29.08, + "learning_rate": 9.994390132295426e-05, + "loss": 0.0044, + "step": 6804 + }, + { + "epoch": 29.09, + "learning_rate": 9.99433480530005e-05, + "loss": 0.0037, + "step": 6808 + }, + { + "epoch": 29.11, + "learning_rate": 9.994279206966824e-05, + "loss": 0.0013, + "step": 6812 + }, + { + "epoch": 29.13, + "learning_rate": 9.994223337298764e-05, + "loss": 0.0019, + "step": 6816 + }, + { + "epoch": 29.15, + "learning_rate": 9.994167196298914e-05, + "loss": 0.0017, + "step": 6820 + }, + { + "epoch": 29.16, + "learning_rate": 9.994110783970316e-05, + "loss": 0.0024, + "step": 6824 + }, + { + "epoch": 29.18, + "learning_rate": 9.994054100316038e-05, + "loss": 0.0036, + "step": 6828 + }, + { + "epoch": 29.2, + "learning_rate": 9.99399714533916e-05, + "loss": 0.0047, + "step": 6832 + }, + { + "epoch": 29.21, + "learning_rate": 9.993939919042776e-05, + "loss": 0.0036, + "step": 6836 + }, + { + "epoch": 29.23, + "learning_rate": 9.993882421429994e-05, + "loss": 0.0024, + "step": 6840 + }, + { + "epoch": 29.25, + "learning_rate": 9.993824652503939e-05, + "loss": 0.0018, + "step": 6844 + }, + { + "epoch": 29.26, + "learning_rate": 9.99376661226775e-05, + "loss": 0.0035, + "step": 6848 + }, + { + "epoch": 29.28, + "learning_rate": 9.993708300724577e-05, + "loss": 0.0021, + "step": 6852 + }, + { + "epoch": 29.3, + "learning_rate": 9.993649717877593e-05, + "loss": 0.0027, + "step": 6856 + }, + { + "epoch": 29.32, + "learning_rate": 9.993590863729978e-05, + "loss": 0.0031, + "step": 6860 + }, + { + "epoch": 29.33, + "learning_rate": 9.993531738284929e-05, + "loss": 0.0026, + "step": 6864 + }, + { + "epoch": 29.35, + "learning_rate": 9.99347234154566e-05, + "loss": 0.0019, + "step": 6868 + }, + { + "epoch": 29.37, + "learning_rate": 9.993412673515396e-05, + "loss": 0.0034, + "step": 6872 + }, + { + "epoch": 29.38, + "learning_rate": 9.993352734197379e-05, + "loss": 0.0022, + "step": 6876 + }, + { + "epoch": 29.4, + "learning_rate": 9.993292523594867e-05, + "loss": 0.0054, + "step": 6880 + }, + { + "epoch": 29.42, + "learning_rate": 9.993232041711129e-05, + "loss": 0.0025, + "step": 6884 + }, + { + "epoch": 29.44, + "learning_rate": 9.993171288549454e-05, + "loss": 0.004, + "step": 6888 + }, + { + "epoch": 29.45, + "learning_rate": 9.99311026411314e-05, + "loss": 0.003, + "step": 6892 + }, + { + "epoch": 29.47, + "learning_rate": 9.993048968405502e-05, + "loss": 0.0017, + "step": 6896 + }, + { + "epoch": 29.49, + "learning_rate": 9.992987401429873e-05, + "loss": 0.0017, + "step": 6900 + }, + { + "epoch": 29.5, + "learning_rate": 9.992925563189595e-05, + "loss": 0.0028, + "step": 6904 + }, + { + "epoch": 29.52, + "learning_rate": 9.992863453688031e-05, + "loss": 0.0049, + "step": 6908 + }, + { + "epoch": 29.54, + "learning_rate": 9.992801072928551e-05, + "loss": 0.0042, + "step": 6912 + }, + { + "epoch": 29.56, + "learning_rate": 9.992738420914546e-05, + "loss": 0.0019, + "step": 6916 + }, + { + "epoch": 29.57, + "learning_rate": 9.992675497649424e-05, + "loss": 0.0044, + "step": 6920 + }, + { + "epoch": 29.59, + "learning_rate": 9.992612303136597e-05, + "loss": 0.0049, + "step": 6924 + }, + { + "epoch": 29.61, + "learning_rate": 9.992548837379501e-05, + "loss": 0.0028, + "step": 6928 + }, + { + "epoch": 29.62, + "learning_rate": 9.992485100381584e-05, + "loss": 0.0036, + "step": 6932 + }, + { + "epoch": 29.64, + "learning_rate": 9.992421092146309e-05, + "loss": 0.003, + "step": 6936 + }, + { + "epoch": 29.66, + "learning_rate": 9.992356812677152e-05, + "loss": 0.0064, + "step": 6940 + }, + { + "epoch": 29.68, + "learning_rate": 9.992292261977609e-05, + "loss": 0.0013, + "step": 6944 + }, + { + "epoch": 29.69, + "learning_rate": 9.992227440051184e-05, + "loss": 0.0043, + "step": 6948 + }, + { + "epoch": 29.71, + "learning_rate": 9.992162346901399e-05, + "loss": 0.0018, + "step": 6952 + }, + { + "epoch": 29.73, + "learning_rate": 9.99209698253179e-05, + "loss": 0.0048, + "step": 6956 + }, + { + "epoch": 29.74, + "learning_rate": 9.992031346945911e-05, + "loss": 0.0025, + "step": 6960 + }, + { + "epoch": 29.76, + "learning_rate": 9.991965440147324e-05, + "loss": 0.0017, + "step": 6964 + }, + { + "epoch": 29.78, + "learning_rate": 9.991899262139612e-05, + "loss": 0.0022, + "step": 6968 + }, + { + "epoch": 29.79, + "learning_rate": 9.991832812926371e-05, + "loss": 0.0033, + "step": 6972 + }, + { + "epoch": 29.81, + "learning_rate": 9.99176609251121e-05, + "loss": 0.0036, + "step": 6976 + }, + { + "epoch": 29.83, + "learning_rate": 9.991699100897753e-05, + "loss": 0.0047, + "step": 6980 + }, + { + "epoch": 29.85, + "learning_rate": 9.99163183808964e-05, + "loss": 0.0021, + "step": 6984 + }, + { + "epoch": 29.86, + "learning_rate": 9.991564304090525e-05, + "loss": 0.002, + "step": 6988 + }, + { + "epoch": 29.88, + "learning_rate": 9.99149649890408e-05, + "loss": 0.0044, + "step": 6992 + }, + { + "epoch": 29.9, + "learning_rate": 9.991428422533987e-05, + "loss": 0.0025, + "step": 6996 + }, + { + "epoch": 29.91, + "learning_rate": 9.991360074983943e-05, + "loss": 0.006, + "step": 7000 + }, + { + "epoch": 29.91, + "eval_exact_match": 0.502079002079002, + "eval_loss": 0.7090530395507812, + "eval_runtime": 143.1626, + "eval_samples_per_second": 6.72, + "step": 7000 + }, + { + "epoch": 29.93, + "learning_rate": 9.991291456257663e-05, + "loss": 0.0016, + "step": 7004 + }, + { + "epoch": 29.95, + "learning_rate": 9.991222566358874e-05, + "loss": 0.0021, + "step": 7008 + }, + { + "epoch": 29.97, + "learning_rate": 9.99115340529132e-05, + "loss": 0.0012, + "step": 7012 + }, + { + "epoch": 29.98, + "learning_rate": 9.991083973058757e-05, + "loss": 0.0042, + "step": 7016 + }, + { + "epoch": 30.0, + "learning_rate": 9.991014269664958e-05, + "loss": 0.0028, + "step": 7020 + }, + { + "epoch": 30.02, + "learning_rate": 9.990944295113711e-05, + "loss": 0.0038, + "step": 7024 + }, + { + "epoch": 30.03, + "learning_rate": 9.990874049408815e-05, + "loss": 0.0027, + "step": 7028 + }, + { + "epoch": 30.05, + "learning_rate": 9.990803532554087e-05, + "loss": 0.0031, + "step": 7032 + }, + { + "epoch": 30.07, + "learning_rate": 9.990732744553361e-05, + "loss": 0.0041, + "step": 7036 + }, + { + "epoch": 30.09, + "learning_rate": 9.990661685410481e-05, + "loss": 0.002, + "step": 7040 + }, + { + "epoch": 30.1, + "learning_rate": 9.990590355129306e-05, + "loss": 0.0017, + "step": 7044 + }, + { + "epoch": 30.12, + "learning_rate": 9.990518753713712e-05, + "loss": 0.0021, + "step": 7048 + }, + { + "epoch": 30.14, + "learning_rate": 9.990446881167592e-05, + "loss": 0.0021, + "step": 7052 + }, + { + "epoch": 30.15, + "learning_rate": 9.990374737494848e-05, + "loss": 0.0017, + "step": 7056 + }, + { + "epoch": 30.17, + "learning_rate": 9.990302322699399e-05, + "loss": 0.0041, + "step": 7060 + }, + { + "epoch": 30.19, + "learning_rate": 9.99022963678518e-05, + "loss": 0.0013, + "step": 7064 + }, + { + "epoch": 30.21, + "learning_rate": 9.99015667975614e-05, + "loss": 0.0035, + "step": 7068 + }, + { + "epoch": 30.22, + "learning_rate": 9.990083451616245e-05, + "loss": 0.0028, + "step": 7072 + }, + { + "epoch": 30.24, + "learning_rate": 9.990009952369471e-05, + "loss": 0.0013, + "step": 7076 + }, + { + "epoch": 30.26, + "learning_rate": 9.98993618201981e-05, + "loss": 0.0031, + "step": 7080 + }, + { + "epoch": 30.27, + "learning_rate": 9.98986214057127e-05, + "loss": 0.0015, + "step": 7084 + }, + { + "epoch": 30.29, + "learning_rate": 9.98978782802788e-05, + "loss": 0.004, + "step": 7088 + }, + { + "epoch": 30.31, + "learning_rate": 9.989713244393669e-05, + "loss": 0.002, + "step": 7092 + }, + { + "epoch": 30.32, + "learning_rate": 9.989638389672691e-05, + "loss": 0.0028, + "step": 7096 + }, + { + "epoch": 30.34, + "learning_rate": 9.989563263869016e-05, + "loss": 0.0037, + "step": 7100 + }, + { + "epoch": 30.36, + "learning_rate": 9.989487866986724e-05, + "loss": 0.0011, + "step": 7104 + }, + { + "epoch": 30.38, + "learning_rate": 9.98941219902991e-05, + "loss": 0.0018, + "step": 7108 + }, + { + "epoch": 30.39, + "learning_rate": 9.989336260002687e-05, + "loss": 0.0015, + "step": 7112 + }, + { + "epoch": 30.41, + "learning_rate": 9.989260049909179e-05, + "loss": 0.0031, + "step": 7116 + }, + { + "epoch": 30.43, + "learning_rate": 9.989183568753527e-05, + "loss": 0.0039, + "step": 7120 + }, + { + "epoch": 30.44, + "learning_rate": 9.989106816539885e-05, + "loss": 0.0032, + "step": 7124 + }, + { + "epoch": 30.46, + "learning_rate": 9.989029793272425e-05, + "loss": 0.0017, + "step": 7128 + }, + { + "epoch": 30.48, + "learning_rate": 9.988952498955331e-05, + "loss": 0.0011, + "step": 7132 + }, + { + "epoch": 30.5, + "learning_rate": 9.988874933592803e-05, + "loss": 0.0035, + "step": 7136 + }, + { + "epoch": 30.51, + "learning_rate": 9.988797097189052e-05, + "loss": 0.0041, + "step": 7140 + }, + { + "epoch": 30.53, + "learning_rate": 9.988718989748309e-05, + "loss": 0.0044, + "step": 7144 + }, + { + "epoch": 30.55, + "learning_rate": 9.988640611274819e-05, + "loss": 0.0022, + "step": 7148 + }, + { + "epoch": 30.56, + "learning_rate": 9.988561961772838e-05, + "loss": 0.0027, + "step": 7152 + }, + { + "epoch": 30.58, + "learning_rate": 9.988483041246639e-05, + "loss": 0.0018, + "step": 7156 + }, + { + "epoch": 30.6, + "learning_rate": 9.988403849700511e-05, + "loss": 0.0016, + "step": 7160 + }, + { + "epoch": 30.62, + "learning_rate": 9.988324387138754e-05, + "loss": 0.0048, + "step": 7164 + }, + { + "epoch": 30.63, + "learning_rate": 9.988244653565688e-05, + "loss": 0.001, + "step": 7168 + }, + { + "epoch": 30.65, + "learning_rate": 9.988164648985644e-05, + "loss": 0.004, + "step": 7172 + }, + { + "epoch": 30.67, + "learning_rate": 9.988084373402968e-05, + "loss": 0.002, + "step": 7176 + }, + { + "epoch": 30.68, + "learning_rate": 9.988003826822022e-05, + "loss": 0.0025, + "step": 7180 + }, + { + "epoch": 30.7, + "learning_rate": 9.987923009247179e-05, + "loss": 0.0029, + "step": 7184 + }, + { + "epoch": 30.72, + "learning_rate": 9.987841920682834e-05, + "loss": 0.0022, + "step": 7188 + }, + { + "epoch": 30.74, + "learning_rate": 9.987760561133391e-05, + "loss": 0.0023, + "step": 7192 + }, + { + "epoch": 30.75, + "learning_rate": 9.98767893060327e-05, + "loss": 0.0011, + "step": 7196 + }, + { + "epoch": 30.77, + "learning_rate": 9.987597029096907e-05, + "loss": 0.002, + "step": 7200 + }, + { + "epoch": 30.79, + "learning_rate": 9.987514856618748e-05, + "loss": 0.0014, + "step": 7204 + }, + { + "epoch": 30.8, + "learning_rate": 9.987432413173262e-05, + "loss": 0.0025, + "step": 7208 + }, + { + "epoch": 30.82, + "learning_rate": 9.987349698764923e-05, + "loss": 0.0033, + "step": 7212 + }, + { + "epoch": 30.84, + "learning_rate": 9.987266713398232e-05, + "loss": 0.0023, + "step": 7216 + }, + { + "epoch": 30.85, + "learning_rate": 9.987183457077689e-05, + "loss": 0.0026, + "step": 7220 + }, + { + "epoch": 30.87, + "learning_rate": 9.987099929807824e-05, + "loss": 0.0027, + "step": 7224 + }, + { + "epoch": 30.89, + "learning_rate": 9.987016131593173e-05, + "loss": 0.0017, + "step": 7228 + }, + { + "epoch": 30.91, + "learning_rate": 9.986932062438285e-05, + "loss": 0.0015, + "step": 7232 + }, + { + "epoch": 30.92, + "learning_rate": 9.986847722347734e-05, + "loss": 0.0025, + "step": 7236 + }, + { + "epoch": 30.94, + "learning_rate": 9.986763111326096e-05, + "loss": 0.0048, + "step": 7240 + }, + { + "epoch": 30.96, + "learning_rate": 9.986678229377973e-05, + "loss": 0.0034, + "step": 7244 + }, + { + "epoch": 30.97, + "learning_rate": 9.986593076507974e-05, + "loss": 0.004, + "step": 7248 + }, + { + "epoch": 30.99, + "learning_rate": 9.986507652720723e-05, + "loss": 0.0027, + "step": 7252 + }, + { + "epoch": 31.01, + "learning_rate": 9.986421958020863e-05, + "loss": 0.004, + "step": 7256 + }, + { + "epoch": 31.03, + "learning_rate": 9.986335992413054e-05, + "loss": 0.0015, + "step": 7260 + }, + { + "epoch": 31.04, + "learning_rate": 9.98624975590196e-05, + "loss": 0.0023, + "step": 7264 + }, + { + "epoch": 31.06, + "learning_rate": 9.986163248492268e-05, + "loss": 0.0063, + "step": 7268 + }, + { + "epoch": 31.08, + "learning_rate": 9.98607647018868e-05, + "loss": 0.0012, + "step": 7272 + }, + { + "epoch": 31.09, + "learning_rate": 9.98598942099591e-05, + "loss": 0.0052, + "step": 7276 + }, + { + "epoch": 31.11, + "learning_rate": 9.985902100918684e-05, + "loss": 0.0011, + "step": 7280 + }, + { + "epoch": 31.13, + "learning_rate": 9.985814509961748e-05, + "loss": 0.0039, + "step": 7284 + }, + { + "epoch": 31.15, + "learning_rate": 9.985726648129863e-05, + "loss": 0.0085, + "step": 7288 + }, + { + "epoch": 31.16, + "learning_rate": 9.9856385154278e-05, + "loss": 0.0036, + "step": 7292 + }, + { + "epoch": 31.18, + "learning_rate": 9.98555011186035e-05, + "loss": 0.0015, + "step": 7296 + }, + { + "epoch": 31.2, + "learning_rate": 9.985461437432311e-05, + "loss": 0.0014, + "step": 7300 + }, + { + "epoch": 31.21, + "learning_rate": 9.985372492148504e-05, + "loss": 0.003, + "step": 7304 + }, + { + "epoch": 31.23, + "learning_rate": 9.985283276013761e-05, + "loss": 0.0022, + "step": 7308 + }, + { + "epoch": 31.25, + "learning_rate": 9.98519378903293e-05, + "loss": 0.0007, + "step": 7312 + }, + { + "epoch": 31.26, + "learning_rate": 9.985104031210869e-05, + "loss": 0.0033, + "step": 7316 + }, + { + "epoch": 31.28, + "learning_rate": 9.985014002552457e-05, + "loss": 0.0025, + "step": 7320 + }, + { + "epoch": 31.3, + "learning_rate": 9.984923703062588e-05, + "loss": 0.004, + "step": 7324 + }, + { + "epoch": 31.32, + "learning_rate": 9.984833132746163e-05, + "loss": 0.0019, + "step": 7328 + }, + { + "epoch": 31.33, + "learning_rate": 9.984742291608104e-05, + "loss": 0.002, + "step": 7332 + }, + { + "epoch": 31.35, + "learning_rate": 9.984651179653348e-05, + "loss": 0.0011, + "step": 7336 + }, + { + "epoch": 31.37, + "learning_rate": 9.984559796886844e-05, + "loss": 0.0012, + "step": 7340 + }, + { + "epoch": 31.38, + "learning_rate": 9.984468143313555e-05, + "loss": 0.0013, + "step": 7344 + }, + { + "epoch": 31.4, + "learning_rate": 9.984376218938463e-05, + "loss": 0.0024, + "step": 7348 + }, + { + "epoch": 31.42, + "learning_rate": 9.984284023766562e-05, + "loss": 0.0057, + "step": 7352 + }, + { + "epoch": 31.44, + "learning_rate": 9.98419155780286e-05, + "loss": 0.0017, + "step": 7356 + }, + { + "epoch": 31.45, + "learning_rate": 9.98409882105238e-05, + "loss": 0.0012, + "step": 7360 + }, + { + "epoch": 31.47, + "learning_rate": 9.984005813520162e-05, + "loss": 0.0014, + "step": 7364 + }, + { + "epoch": 31.49, + "learning_rate": 9.983912535211258e-05, + "loss": 0.0015, + "step": 7368 + }, + { + "epoch": 31.5, + "learning_rate": 9.983818986130736e-05, + "loss": 0.0021, + "step": 7372 + }, + { + "epoch": 31.52, + "learning_rate": 9.983725166283676e-05, + "loss": 0.001, + "step": 7376 + }, + { + "epoch": 31.54, + "learning_rate": 9.983631075675179e-05, + "loss": 0.0026, + "step": 7380 + }, + { + "epoch": 31.56, + "learning_rate": 9.983536714310355e-05, + "loss": 0.0015, + "step": 7384 + }, + { + "epoch": 31.57, + "learning_rate": 9.983442082194333e-05, + "loss": 0.0016, + "step": 7388 + }, + { + "epoch": 31.59, + "learning_rate": 9.98334717933225e-05, + "loss": 0.0013, + "step": 7392 + }, + { + "epoch": 31.61, + "learning_rate": 9.983252005729264e-05, + "loss": 0.0035, + "step": 7396 + }, + { + "epoch": 31.62, + "learning_rate": 9.983156561390547e-05, + "loss": 0.0008, + "step": 7400 + }, + { + "epoch": 31.64, + "learning_rate": 9.983060846321282e-05, + "loss": 0.0021, + "step": 7404 + }, + { + "epoch": 31.66, + "learning_rate": 9.982964860526671e-05, + "loss": 0.0017, + "step": 7408 + }, + { + "epoch": 31.68, + "learning_rate": 9.982868604011929e-05, + "loss": 0.0025, + "step": 7412 + }, + { + "epoch": 31.69, + "learning_rate": 9.982772076782283e-05, + "loss": 0.0009, + "step": 7416 + }, + { + "epoch": 31.71, + "learning_rate": 9.982675278842981e-05, + "loss": 0.0026, + "step": 7420 + }, + { + "epoch": 31.73, + "learning_rate": 9.982578210199279e-05, + "loss": 0.0022, + "step": 7424 + }, + { + "epoch": 31.74, + "learning_rate": 9.98248087085645e-05, + "loss": 0.002, + "step": 7428 + }, + { + "epoch": 31.76, + "learning_rate": 9.982383260819785e-05, + "loss": 0.002, + "step": 7432 + }, + { + "epoch": 31.78, + "learning_rate": 9.982285380094587e-05, + "loss": 0.0018, + "step": 7436 + }, + { + "epoch": 31.79, + "learning_rate": 9.982187228686172e-05, + "loss": 0.0017, + "step": 7440 + }, + { + "epoch": 31.81, + "learning_rate": 9.982088806599874e-05, + "loss": 0.0035, + "step": 7444 + }, + { + "epoch": 31.83, + "learning_rate": 9.981990113841038e-05, + "loss": 0.0027, + "step": 7448 + }, + { + "epoch": 31.85, + "learning_rate": 9.981891150415029e-05, + "loss": 0.0017, + "step": 7452 + }, + { + "epoch": 31.86, + "learning_rate": 9.981791916327218e-05, + "loss": 0.0029, + "step": 7456 + }, + { + "epoch": 31.88, + "learning_rate": 9.981692411583005e-05, + "loss": 0.0012, + "step": 7460 + }, + { + "epoch": 31.9, + "learning_rate": 9.981592636187789e-05, + "loss": 0.0023, + "step": 7464 + }, + { + "epoch": 31.91, + "learning_rate": 9.981492590146992e-05, + "loss": 0.0023, + "step": 7468 + }, + { + "epoch": 31.93, + "learning_rate": 9.981392273466053e-05, + "loss": 0.0024, + "step": 7472 + }, + { + "epoch": 31.95, + "learning_rate": 9.981291686150418e-05, + "loss": 0.0035, + "step": 7476 + }, + { + "epoch": 31.97, + "learning_rate": 9.981190828205553e-05, + "loss": 0.0029, + "step": 7480 + }, + { + "epoch": 31.98, + "learning_rate": 9.981089699636939e-05, + "loss": 0.0013, + "step": 7484 + }, + { + "epoch": 32.0, + "learning_rate": 9.980988300450067e-05, + "loss": 0.0011, + "step": 7488 + }, + { + "epoch": 32.02, + "learning_rate": 9.98088663065045e-05, + "loss": 0.0022, + "step": 7492 + }, + { + "epoch": 32.03, + "learning_rate": 9.980784690243608e-05, + "loss": 0.0028, + "step": 7496 + }, + { + "epoch": 32.05, + "learning_rate": 9.980682479235082e-05, + "loss": 0.002, + "step": 7500 + }, + { + "epoch": 32.07, + "learning_rate": 9.980579997630423e-05, + "loss": 0.0013, + "step": 7504 + }, + { + "epoch": 32.09, + "learning_rate": 9.9804772454352e-05, + "loss": 0.0044, + "step": 7508 + }, + { + "epoch": 32.1, + "learning_rate": 9.980374222654996e-05, + "loss": 0.002, + "step": 7512 + }, + { + "epoch": 32.12, + "learning_rate": 9.980270929295406e-05, + "loss": 0.0019, + "step": 7516 + }, + { + "epoch": 32.14, + "learning_rate": 9.980167365362044e-05, + "loss": 0.0014, + "step": 7520 + }, + { + "epoch": 32.15, + "learning_rate": 9.980063530860535e-05, + "loss": 0.0061, + "step": 7524 + }, + { + "epoch": 32.17, + "learning_rate": 9.979959425796521e-05, + "loss": 0.0006, + "step": 7528 + }, + { + "epoch": 32.19, + "learning_rate": 9.979855050175659e-05, + "loss": 0.0022, + "step": 7532 + }, + { + "epoch": 32.21, + "learning_rate": 9.979750404003617e-05, + "loss": 0.004, + "step": 7536 + }, + { + "epoch": 32.22, + "learning_rate": 9.979645487286082e-05, + "loss": 0.0024, + "step": 7540 + }, + { + "epoch": 32.24, + "learning_rate": 9.979540300028755e-05, + "loss": 0.001, + "step": 7544 + }, + { + "epoch": 32.26, + "learning_rate": 9.979434842237347e-05, + "loss": 0.0016, + "step": 7548 + }, + { + "epoch": 32.27, + "learning_rate": 9.979329113917593e-05, + "loss": 0.0019, + "step": 7552 + }, + { + "epoch": 32.29, + "learning_rate": 9.979223115075233e-05, + "loss": 0.008, + "step": 7556 + }, + { + "epoch": 32.31, + "learning_rate": 9.979116845716026e-05, + "loss": 0.0041, + "step": 7560 + }, + { + "epoch": 32.32, + "learning_rate": 9.979010305845747e-05, + "loss": 0.0019, + "step": 7564 + }, + { + "epoch": 32.34, + "learning_rate": 9.978903495470184e-05, + "loss": 0.0021, + "step": 7568 + }, + { + "epoch": 32.36, + "learning_rate": 9.97879641459514e-05, + "loss": 0.0012, + "step": 7572 + }, + { + "epoch": 32.38, + "learning_rate": 9.978689063226432e-05, + "loss": 0.0005, + "step": 7576 + }, + { + "epoch": 32.39, + "learning_rate": 9.978581441369892e-05, + "loss": 0.0009, + "step": 7580 + }, + { + "epoch": 32.41, + "learning_rate": 9.978473549031368e-05, + "loss": 0.0023, + "step": 7584 + }, + { + "epoch": 32.43, + "learning_rate": 9.97836538621672e-05, + "loss": 0.0013, + "step": 7588 + }, + { + "epoch": 32.44, + "learning_rate": 9.978256952931827e-05, + "loss": 0.0018, + "step": 7592 + }, + { + "epoch": 32.46, + "learning_rate": 9.978148249182578e-05, + "loss": 0.0034, + "step": 7596 + }, + { + "epoch": 32.48, + "learning_rate": 9.97803927497488e-05, + "loss": 0.0006, + "step": 7600 + }, + { + "epoch": 32.5, + "learning_rate": 9.977930030314653e-05, + "loss": 0.0017, + "step": 7604 + }, + { + "epoch": 32.51, + "learning_rate": 9.977820515207831e-05, + "loss": 0.0005, + "step": 7608 + }, + { + "epoch": 32.53, + "learning_rate": 9.977710729660365e-05, + "loss": 0.0008, + "step": 7612 + }, + { + "epoch": 32.55, + "learning_rate": 9.97760067367822e-05, + "loss": 0.0013, + "step": 7616 + }, + { + "epoch": 32.56, + "learning_rate": 9.977490347267375e-05, + "loss": 0.0021, + "step": 7620 + }, + { + "epoch": 32.58, + "learning_rate": 9.977379750433824e-05, + "loss": 0.0037, + "step": 7624 + }, + { + "epoch": 32.6, + "learning_rate": 9.977268883183575e-05, + "loss": 0.0007, + "step": 7628 + }, + { + "epoch": 32.62, + "learning_rate": 9.977157745522652e-05, + "loss": 0.0025, + "step": 7632 + }, + { + "epoch": 32.63, + "learning_rate": 9.977046337457092e-05, + "loss": 0.0029, + "step": 7636 + }, + { + "epoch": 32.65, + "learning_rate": 9.976934658992949e-05, + "loss": 0.0022, + "step": 7640 + }, + { + "epoch": 32.67, + "learning_rate": 9.97682271013629e-05, + "loss": 0.002, + "step": 7644 + }, + { + "epoch": 32.68, + "learning_rate": 9.976710490893195e-05, + "loss": 0.0019, + "step": 7648 + }, + { + "epoch": 32.7, + "learning_rate": 9.976598001269765e-05, + "loss": 0.002, + "step": 7652 + }, + { + "epoch": 32.72, + "learning_rate": 9.976485241272107e-05, + "loss": 0.0009, + "step": 7656 + }, + { + "epoch": 32.74, + "learning_rate": 9.97637221090635e-05, + "loss": 0.0046, + "step": 7660 + }, + { + "epoch": 32.75, + "learning_rate": 9.976258910178635e-05, + "loss": 0.002, + "step": 7664 + }, + { + "epoch": 32.77, + "learning_rate": 9.976145339095115e-05, + "loss": 0.001, + "step": 7668 + }, + { + "epoch": 32.79, + "learning_rate": 9.976031497661964e-05, + "loss": 0.0008, + "step": 7672 + }, + { + "epoch": 32.8, + "learning_rate": 9.975917385885363e-05, + "loss": 0.0016, + "step": 7676 + }, + { + "epoch": 32.82, + "learning_rate": 9.975803003771513e-05, + "loss": 0.0009, + "step": 7680 + }, + { + "epoch": 32.84, + "learning_rate": 9.97568835132663e-05, + "loss": 0.0007, + "step": 7684 + }, + { + "epoch": 32.85, + "learning_rate": 9.975573428556941e-05, + "loss": 0.0008, + "step": 7688 + }, + { + "epoch": 32.87, + "learning_rate": 9.975458235468691e-05, + "loss": 0.0007, + "step": 7692 + }, + { + "epoch": 32.89, + "learning_rate": 9.975342772068137e-05, + "loss": 0.0004, + "step": 7696 + }, + { + "epoch": 32.91, + "learning_rate": 9.975227038361552e-05, + "loss": 0.0009, + "step": 7700 + }, + { + "epoch": 32.92, + "learning_rate": 9.975111034355225e-05, + "loss": 0.0006, + "step": 7704 + }, + { + "epoch": 32.94, + "learning_rate": 9.974994760055457e-05, + "loss": 0.0029, + "step": 7708 + }, + { + "epoch": 32.96, + "learning_rate": 9.974878215468566e-05, + "loss": 0.0026, + "step": 7712 + }, + { + "epoch": 32.97, + "learning_rate": 9.974761400600883e-05, + "loss": 0.0011, + "step": 7716 + }, + { + "epoch": 32.99, + "learning_rate": 9.974644315458756e-05, + "loss": 0.001, + "step": 7720 + }, + { + "epoch": 33.01, + "learning_rate": 9.974526960048545e-05, + "loss": 0.0004, + "step": 7724 + }, + { + "epoch": 33.03, + "learning_rate": 9.974409334376626e-05, + "loss": 0.0034, + "step": 7728 + }, + { + "epoch": 33.04, + "learning_rate": 9.974291438449388e-05, + "loss": 0.0015, + "step": 7732 + }, + { + "epoch": 33.06, + "learning_rate": 9.974173272273239e-05, + "loss": 0.002, + "step": 7736 + }, + { + "epoch": 33.08, + "learning_rate": 9.974054835854598e-05, + "loss": 0.0028, + "step": 7740 + }, + { + "epoch": 33.09, + "learning_rate": 9.973936129199898e-05, + "loss": 0.003, + "step": 7744 + }, + { + "epoch": 33.11, + "learning_rate": 9.97381715231559e-05, + "loss": 0.0017, + "step": 7748 + }, + { + "epoch": 33.13, + "learning_rate": 9.973697905208136e-05, + "loss": 0.0019, + "step": 7752 + }, + { + "epoch": 33.15, + "learning_rate": 9.973578387884017e-05, + "loss": 0.0009, + "step": 7756 + }, + { + "epoch": 33.16, + "learning_rate": 9.973458600349724e-05, + "loss": 0.001, + "step": 7760 + }, + { + "epoch": 33.18, + "learning_rate": 9.973338542611766e-05, + "loss": 0.0003, + "step": 7764 + }, + { + "epoch": 33.2, + "learning_rate": 9.973218214676668e-05, + "loss": 0.0007, + "step": 7768 + }, + { + "epoch": 33.21, + "learning_rate": 9.973097616550961e-05, + "loss": 0.0006, + "step": 7772 + }, + { + "epoch": 33.23, + "learning_rate": 9.972976748241203e-05, + "loss": 0.0048, + "step": 7776 + }, + { + "epoch": 33.25, + "learning_rate": 9.972855609753959e-05, + "loss": 0.0006, + "step": 7780 + }, + { + "epoch": 33.26, + "learning_rate": 9.97273420109581e-05, + "loss": 0.0005, + "step": 7784 + }, + { + "epoch": 33.28, + "learning_rate": 9.972612522273352e-05, + "loss": 0.0021, + "step": 7788 + }, + { + "epoch": 33.3, + "learning_rate": 9.972490573293196e-05, + "loss": 0.0008, + "step": 7792 + }, + { + "epoch": 33.32, + "learning_rate": 9.972368354161965e-05, + "loss": 0.0013, + "step": 7796 + }, + { + "epoch": 33.33, + "learning_rate": 9.972245864886302e-05, + "loss": 0.0017, + "step": 7800 + }, + { + "epoch": 33.35, + "learning_rate": 9.972123105472862e-05, + "loss": 0.0012, + "step": 7804 + }, + { + "epoch": 33.37, + "learning_rate": 9.972000075928311e-05, + "loss": 0.001, + "step": 7808 + }, + { + "epoch": 33.38, + "learning_rate": 9.971876776259339e-05, + "loss": 0.0016, + "step": 7812 + }, + { + "epoch": 33.4, + "learning_rate": 9.971753206472638e-05, + "loss": 0.002, + "step": 7816 + }, + { + "epoch": 33.42, + "learning_rate": 9.971629366574926e-05, + "loss": 0.0023, + "step": 7820 + }, + { + "epoch": 33.44, + "learning_rate": 9.971505256572928e-05, + "loss": 0.0011, + "step": 7824 + }, + { + "epoch": 33.45, + "learning_rate": 9.971380876473391e-05, + "loss": 0.0006, + "step": 7828 + }, + { + "epoch": 33.47, + "learning_rate": 9.971256226283068e-05, + "loss": 0.0011, + "step": 7832 + }, + { + "epoch": 33.49, + "learning_rate": 9.971131306008734e-05, + "loss": 0.0006, + "step": 7836 + }, + { + "epoch": 33.5, + "learning_rate": 9.971006115657174e-05, + "loss": 0.0006, + "step": 7840 + }, + { + "epoch": 33.52, + "learning_rate": 9.970880655235191e-05, + "loss": 0.001, + "step": 7844 + }, + { + "epoch": 33.54, + "learning_rate": 9.970754924749601e-05, + "loss": 0.0006, + "step": 7848 + }, + { + "epoch": 33.56, + "learning_rate": 9.970628924207232e-05, + "loss": 0.0049, + "step": 7852 + }, + { + "epoch": 33.57, + "learning_rate": 9.970502653614934e-05, + "loss": 0.0012, + "step": 7856 + }, + { + "epoch": 33.59, + "learning_rate": 9.970376112979564e-05, + "loss": 0.001, + "step": 7860 + }, + { + "epoch": 33.61, + "learning_rate": 9.970249302307999e-05, + "loss": 0.0016, + "step": 7864 + }, + { + "epoch": 33.62, + "learning_rate": 9.970122221607128e-05, + "loss": 0.0008, + "step": 7868 + }, + { + "epoch": 33.64, + "learning_rate": 9.969994870883852e-05, + "loss": 0.0011, + "step": 7872 + }, + { + "epoch": 33.66, + "learning_rate": 9.969867250145094e-05, + "loss": 0.0013, + "step": 7876 + }, + { + "epoch": 33.68, + "learning_rate": 9.969739359397785e-05, + "loss": 0.002, + "step": 7880 + }, + { + "epoch": 33.69, + "learning_rate": 9.969611198648876e-05, + "loss": 0.002, + "step": 7884 + }, + { + "epoch": 33.71, + "learning_rate": 9.969482767905326e-05, + "loss": 0.0014, + "step": 7888 + }, + { + "epoch": 33.73, + "learning_rate": 9.969354067174115e-05, + "loss": 0.0008, + "step": 7892 + }, + { + "epoch": 33.74, + "learning_rate": 9.969225096462234e-05, + "loss": 0.0013, + "step": 7896 + }, + { + "epoch": 33.76, + "learning_rate": 9.969095855776692e-05, + "loss": 0.0043, + "step": 7900 + }, + { + "epoch": 33.78, + "learning_rate": 9.968966345124509e-05, + "loss": 0.0015, + "step": 7904 + }, + { + "epoch": 33.79, + "learning_rate": 9.96883656451272e-05, + "loss": 0.0008, + "step": 7908 + }, + { + "epoch": 33.81, + "learning_rate": 9.968706513948378e-05, + "loss": 0.0025, + "step": 7912 + }, + { + "epoch": 33.83, + "learning_rate": 9.968576193438548e-05, + "loss": 0.0021, + "step": 7916 + }, + { + "epoch": 33.85, + "learning_rate": 9.968445602990309e-05, + "loss": 0.0016, + "step": 7920 + }, + { + "epoch": 33.86, + "learning_rate": 9.968314742610758e-05, + "loss": 0.0035, + "step": 7924 + }, + { + "epoch": 33.88, + "learning_rate": 9.968183612307003e-05, + "loss": 0.0033, + "step": 7928 + }, + { + "epoch": 33.9, + "learning_rate": 9.968052212086168e-05, + "loss": 0.0039, + "step": 7932 + }, + { + "epoch": 33.91, + "learning_rate": 9.967920541955392e-05, + "loss": 0.0021, + "step": 7936 + }, + { + "epoch": 33.93, + "learning_rate": 9.96778860192183e-05, + "loss": 0.0035, + "step": 7940 + }, + { + "epoch": 33.95, + "learning_rate": 9.967656391992647e-05, + "loss": 0.0005, + "step": 7944 + }, + { + "epoch": 33.97, + "learning_rate": 9.96752391217503e-05, + "loss": 0.0016, + "step": 7948 + }, + { + "epoch": 33.98, + "learning_rate": 9.967391162476174e-05, + "loss": 0.0014, + "step": 7952 + }, + { + "epoch": 34.0, + "learning_rate": 9.967258142903291e-05, + "loss": 0.0031, + "step": 7956 + }, + { + "epoch": 34.02, + "learning_rate": 9.967124853463608e-05, + "loss": 0.0027, + "step": 7960 + }, + { + "epoch": 34.03, + "learning_rate": 9.966991294164368e-05, + "loss": 0.0011, + "step": 7964 + }, + { + "epoch": 34.05, + "learning_rate": 9.966857465012827e-05, + "loss": 0.0019, + "step": 7968 + }, + { + "epoch": 34.07, + "learning_rate": 9.966723366016253e-05, + "loss": 0.003, + "step": 7972 + }, + { + "epoch": 34.09, + "learning_rate": 9.966588997181933e-05, + "loss": 0.0013, + "step": 7976 + }, + { + "epoch": 34.1, + "learning_rate": 9.96645435851717e-05, + "loss": 0.002, + "step": 7980 + }, + { + "epoch": 34.12, + "learning_rate": 9.966319450029274e-05, + "loss": 0.0021, + "step": 7984 + }, + { + "epoch": 34.14, + "learning_rate": 9.966184271725578e-05, + "loss": 0.0033, + "step": 7988 + }, + { + "epoch": 34.15, + "learning_rate": 9.966048823613425e-05, + "loss": 0.001, + "step": 7992 + }, + { + "epoch": 34.17, + "learning_rate": 9.965913105700173e-05, + "loss": 0.0015, + "step": 7996 + }, + { + "epoch": 34.19, + "learning_rate": 9.965777117993196e-05, + "loss": 0.0013, + "step": 8000 + }, + { + "epoch": 34.19, + "eval_exact_match": 0.5103950103950103, + "eval_loss": 0.726246178150177, + "eval_runtime": 130.3318, + "eval_samples_per_second": 7.381, + "step": 8000 + }, + { + "epoch": 34.21, + "learning_rate": 9.965640860499883e-05, + "loss": 0.0044, + "step": 8004 + }, + { + "epoch": 34.22, + "learning_rate": 9.965504333227636e-05, + "loss": 0.0018, + "step": 8008 + }, + { + "epoch": 34.24, + "learning_rate": 9.965367536183872e-05, + "loss": 0.0017, + "step": 8012 + }, + { + "epoch": 34.26, + "learning_rate": 9.965230469376024e-05, + "loss": 0.002, + "step": 8016 + }, + { + "epoch": 34.27, + "learning_rate": 9.965093132811539e-05, + "loss": 0.0006, + "step": 8020 + }, + { + "epoch": 34.29, + "learning_rate": 9.964955526497874e-05, + "loss": 0.0014, + "step": 8024 + }, + { + "epoch": 34.31, + "learning_rate": 9.964817650442512e-05, + "loss": 0.0015, + "step": 8028 + }, + { + "epoch": 34.32, + "learning_rate": 9.964679504652939e-05, + "loss": 0.0038, + "step": 8032 + }, + { + "epoch": 34.34, + "learning_rate": 9.964541089136661e-05, + "loss": 0.0006, + "step": 8036 + }, + { + "epoch": 34.36, + "learning_rate": 9.9644024039012e-05, + "loss": 0.0024, + "step": 8040 + }, + { + "epoch": 34.38, + "learning_rate": 9.964263448954087e-05, + "loss": 0.0022, + "step": 8044 + }, + { + "epoch": 34.39, + "learning_rate": 9.964124224302877e-05, + "loss": 0.0045, + "step": 8048 + }, + { + "epoch": 34.41, + "learning_rate": 9.963984729955128e-05, + "loss": 0.0012, + "step": 8052 + }, + { + "epoch": 34.43, + "learning_rate": 9.96384496591842e-05, + "loss": 0.0018, + "step": 8056 + }, + { + "epoch": 34.44, + "learning_rate": 9.963704932200351e-05, + "loss": 0.0014, + "step": 8060 + }, + { + "epoch": 34.46, + "learning_rate": 9.963564628808523e-05, + "loss": 0.0019, + "step": 8064 + }, + { + "epoch": 34.48, + "learning_rate": 9.963424055750561e-05, + "loss": 0.0006, + "step": 8068 + }, + { + "epoch": 34.5, + "learning_rate": 9.963283213034103e-05, + "loss": 0.0013, + "step": 8072 + }, + { + "epoch": 34.51, + "learning_rate": 9.963142100666799e-05, + "loss": 0.0026, + "step": 8076 + }, + { + "epoch": 34.53, + "learning_rate": 9.963000718656316e-05, + "loss": 0.0021, + "step": 8080 + }, + { + "epoch": 34.55, + "learning_rate": 9.962859067010335e-05, + "loss": 0.0031, + "step": 8084 + }, + { + "epoch": 34.56, + "learning_rate": 9.962717145736554e-05, + "loss": 0.0012, + "step": 8088 + }, + { + "epoch": 34.58, + "learning_rate": 9.962574954842682e-05, + "loss": 0.0004, + "step": 8092 + }, + { + "epoch": 34.6, + "learning_rate": 9.962432494336442e-05, + "loss": 0.0014, + "step": 8096 + }, + { + "epoch": 34.62, + "learning_rate": 9.962289764225579e-05, + "loss": 0.0045, + "step": 8100 + }, + { + "epoch": 34.63, + "learning_rate": 9.962146764517841e-05, + "loss": 0.0006, + "step": 8104 + }, + { + "epoch": 34.65, + "learning_rate": 9.962003495221002e-05, + "loss": 0.0021, + "step": 8108 + }, + { + "epoch": 34.67, + "learning_rate": 9.961859956342843e-05, + "loss": 0.0015, + "step": 8112 + }, + { + "epoch": 34.68, + "learning_rate": 9.961716147891163e-05, + "loss": 0.0008, + "step": 8116 + }, + { + "epoch": 34.7, + "learning_rate": 9.961572069873775e-05, + "loss": 0.0037, + "step": 8120 + }, + { + "epoch": 34.72, + "learning_rate": 9.961427722298507e-05, + "loss": 0.0009, + "step": 8124 + }, + { + "epoch": 34.74, + "learning_rate": 9.961283105173202e-05, + "loss": 0.0023, + "step": 8128 + }, + { + "epoch": 34.75, + "learning_rate": 9.961138218505714e-05, + "loss": 0.0029, + "step": 8132 + }, + { + "epoch": 34.77, + "learning_rate": 9.960993062303919e-05, + "loss": 0.0008, + "step": 8136 + }, + { + "epoch": 34.79, + "learning_rate": 9.960847636575699e-05, + "loss": 0.0018, + "step": 8140 + }, + { + "epoch": 34.8, + "learning_rate": 9.960701941328958e-05, + "loss": 0.0014, + "step": 8144 + }, + { + "epoch": 34.82, + "learning_rate": 9.96055597657161e-05, + "loss": 0.0011, + "step": 8148 + }, + { + "epoch": 34.84, + "learning_rate": 9.960409742311584e-05, + "loss": 0.0009, + "step": 8152 + }, + { + "epoch": 34.85, + "learning_rate": 9.960263238556828e-05, + "loss": 0.0048, + "step": 8156 + }, + { + "epoch": 34.87, + "learning_rate": 9.9601164653153e-05, + "loss": 0.0022, + "step": 8160 + }, + { + "epoch": 34.89, + "learning_rate": 9.95996942259497e-05, + "loss": 0.0086, + "step": 8164 + }, + { + "epoch": 34.91, + "learning_rate": 9.959822110403832e-05, + "loss": 0.0022, + "step": 8168 + }, + { + "epoch": 34.92, + "learning_rate": 9.959674528749888e-05, + "loss": 0.0008, + "step": 8172 + }, + { + "epoch": 34.94, + "learning_rate": 9.959526677641156e-05, + "loss": 0.0015, + "step": 8176 + }, + { + "epoch": 34.96, + "learning_rate": 9.959378557085668e-05, + "loss": 0.0016, + "step": 8180 + }, + { + "epoch": 34.97, + "learning_rate": 9.959230167091471e-05, + "loss": 0.0005, + "step": 8184 + }, + { + "epoch": 34.99, + "learning_rate": 9.959081507666626e-05, + "loss": 0.0009, + "step": 8188 + }, + { + "epoch": 35.01, + "learning_rate": 9.958932578819213e-05, + "loss": 0.0011, + "step": 8192 + }, + { + "epoch": 35.03, + "learning_rate": 9.95878338055732e-05, + "loss": 0.0018, + "step": 8196 + }, + { + "epoch": 35.04, + "learning_rate": 9.958633912889054e-05, + "loss": 0.0006, + "step": 8200 + }, + { + "epoch": 35.06, + "learning_rate": 9.958484175822535e-05, + "loss": 0.0028, + "step": 8204 + }, + { + "epoch": 35.08, + "learning_rate": 9.958334169365899e-05, + "loss": 0.0011, + "step": 8208 + }, + { + "epoch": 35.09, + "learning_rate": 9.958183893527296e-05, + "loss": 0.0039, + "step": 8212 + }, + { + "epoch": 35.11, + "learning_rate": 9.958033348314888e-05, + "loss": 0.0019, + "step": 8216 + }, + { + "epoch": 35.13, + "learning_rate": 9.957882533736856e-05, + "loss": 0.0022, + "step": 8220 + }, + { + "epoch": 35.15, + "learning_rate": 9.957731449801393e-05, + "loss": 0.002, + "step": 8224 + }, + { + "epoch": 35.16, + "learning_rate": 9.957580096516707e-05, + "loss": 0.0025, + "step": 8228 + }, + { + "epoch": 35.18, + "learning_rate": 9.957428473891023e-05, + "loss": 0.0025, + "step": 8232 + }, + { + "epoch": 35.2, + "learning_rate": 9.957276581932575e-05, + "loss": 0.0018, + "step": 8236 + }, + { + "epoch": 35.21, + "learning_rate": 9.957124420649617e-05, + "loss": 0.0011, + "step": 8240 + }, + { + "epoch": 35.23, + "learning_rate": 9.956971990050417e-05, + "loss": 0.0028, + "step": 8244 + }, + { + "epoch": 35.25, + "learning_rate": 9.956819290143255e-05, + "loss": 0.0021, + "step": 8248 + }, + { + "epoch": 35.26, + "learning_rate": 9.956666320936425e-05, + "loss": 0.0012, + "step": 8252 + }, + { + "epoch": 35.28, + "learning_rate": 9.956513082438243e-05, + "loss": 0.0011, + "step": 8256 + }, + { + "epoch": 35.3, + "learning_rate": 9.956359574657028e-05, + "loss": 0.0027, + "step": 8260 + }, + { + "epoch": 35.32, + "learning_rate": 9.956205797601126e-05, + "loss": 0.0009, + "step": 8264 + }, + { + "epoch": 35.33, + "learning_rate": 9.956051751278888e-05, + "loss": 0.0021, + "step": 8268 + }, + { + "epoch": 35.35, + "learning_rate": 9.955897435698683e-05, + "loss": 0.0034, + "step": 8272 + }, + { + "epoch": 35.37, + "learning_rate": 9.955742850868895e-05, + "loss": 0.0011, + "step": 8276 + }, + { + "epoch": 35.38, + "learning_rate": 9.955587996797925e-05, + "loss": 0.0055, + "step": 8280 + }, + { + "epoch": 35.4, + "learning_rate": 9.955432873494183e-05, + "loss": 0.0007, + "step": 8284 + }, + { + "epoch": 35.42, + "learning_rate": 9.955277480966098e-05, + "loss": 0.002, + "step": 8288 + }, + { + "epoch": 35.44, + "learning_rate": 9.955121819222112e-05, + "loss": 0.0027, + "step": 8292 + }, + { + "epoch": 35.45, + "learning_rate": 9.954965888270684e-05, + "loss": 0.0018, + "step": 8296 + }, + { + "epoch": 35.47, + "learning_rate": 9.954809688120284e-05, + "loss": 0.0014, + "step": 8300 + }, + { + "epoch": 35.49, + "learning_rate": 9.954653218779397e-05, + "loss": 0.0024, + "step": 8304 + }, + { + "epoch": 35.5, + "learning_rate": 9.954496480256525e-05, + "loss": 0.0034, + "step": 8308 + }, + { + "epoch": 35.52, + "learning_rate": 9.954339472560183e-05, + "loss": 0.0037, + "step": 8312 + }, + { + "epoch": 35.54, + "learning_rate": 9.954182195698902e-05, + "loss": 0.0012, + "step": 8316 + }, + { + "epoch": 35.56, + "learning_rate": 9.954024649681227e-05, + "loss": 0.0008, + "step": 8320 + }, + { + "epoch": 35.57, + "learning_rate": 9.953866834515717e-05, + "loss": 0.0009, + "step": 8324 + }, + { + "epoch": 35.59, + "learning_rate": 9.953708750210943e-05, + "loss": 0.0021, + "step": 8328 + }, + { + "epoch": 35.61, + "learning_rate": 9.953550396775498e-05, + "loss": 0.0036, + "step": 8332 + }, + { + "epoch": 35.62, + "learning_rate": 9.953391774217984e-05, + "loss": 0.0013, + "step": 8336 + }, + { + "epoch": 35.64, + "learning_rate": 9.953232882547017e-05, + "loss": 0.0008, + "step": 8340 + }, + { + "epoch": 35.66, + "learning_rate": 9.95307372177123e-05, + "loss": 0.0011, + "step": 8344 + }, + { + "epoch": 35.68, + "learning_rate": 9.952914291899271e-05, + "loss": 0.0033, + "step": 8348 + }, + { + "epoch": 35.69, + "learning_rate": 9.952754592939801e-05, + "loss": 0.0012, + "step": 8352 + }, + { + "epoch": 35.71, + "learning_rate": 9.952594624901498e-05, + "loss": 0.0017, + "step": 8356 + }, + { + "epoch": 35.73, + "learning_rate": 9.95243438779305e-05, + "loss": 0.002, + "step": 8360 + }, + { + "epoch": 35.74, + "learning_rate": 9.952273881623166e-05, + "loss": 0.0019, + "step": 8364 + }, + { + "epoch": 35.76, + "learning_rate": 9.952113106400562e-05, + "loss": 0.0042, + "step": 8368 + }, + { + "epoch": 35.78, + "learning_rate": 9.951952062133977e-05, + "loss": 0.0017, + "step": 8372 + }, + { + "epoch": 35.79, + "learning_rate": 9.951790748832158e-05, + "loss": 0.0017, + "step": 8376 + }, + { + "epoch": 35.81, + "learning_rate": 9.951629166503868e-05, + "loss": 0.0014, + "step": 8380 + }, + { + "epoch": 35.83, + "learning_rate": 9.95146731515789e-05, + "loss": 0.0016, + "step": 8384 + }, + { + "epoch": 35.85, + "learning_rate": 9.951305194803013e-05, + "loss": 0.0005, + "step": 8388 + }, + { + "epoch": 35.86, + "learning_rate": 9.951142805448046e-05, + "loss": 0.0008, + "step": 8392 + }, + { + "epoch": 35.88, + "learning_rate": 9.95098014710181e-05, + "loss": 0.0029, + "step": 8396 + }, + { + "epoch": 35.9, + "learning_rate": 9.950817219773145e-05, + "loss": 0.0026, + "step": 8400 + }, + { + "epoch": 35.91, + "learning_rate": 9.950654023470902e-05, + "loss": 0.0044, + "step": 8404 + }, + { + "epoch": 35.93, + "learning_rate": 9.950490558203947e-05, + "loss": 0.0011, + "step": 8408 + }, + { + "epoch": 35.95, + "learning_rate": 9.95032682398116e-05, + "loss": 0.0013, + "step": 8412 + }, + { + "epoch": 35.97, + "learning_rate": 9.950162820811438e-05, + "loss": 0.0014, + "step": 8416 + }, + { + "epoch": 35.98, + "learning_rate": 9.949998548703688e-05, + "loss": 0.002, + "step": 8420 + }, + { + "epoch": 36.0, + "learning_rate": 9.949834007666838e-05, + "loss": 0.0011, + "step": 8424 + }, + { + "epoch": 36.02, + "learning_rate": 9.949669197709827e-05, + "loss": 0.0014, + "step": 8428 + }, + { + "epoch": 36.03, + "learning_rate": 9.94950411884161e-05, + "loss": 0.0015, + "step": 8432 + }, + { + "epoch": 36.05, + "learning_rate": 9.949338771071152e-05, + "loss": 0.0008, + "step": 8436 + }, + { + "epoch": 36.07, + "learning_rate": 9.94917315440744e-05, + "loss": 0.0014, + "step": 8440 + }, + { + "epoch": 36.09, + "learning_rate": 9.94900726885947e-05, + "loss": 0.001, + "step": 8444 + }, + { + "epoch": 36.1, + "learning_rate": 9.948841114436253e-05, + "loss": 0.0024, + "step": 8448 + }, + { + "epoch": 36.12, + "learning_rate": 9.948674691146817e-05, + "loss": 0.0012, + "step": 8452 + }, + { + "epoch": 36.14, + "learning_rate": 9.948507999000205e-05, + "loss": 0.0011, + "step": 8456 + }, + { + "epoch": 36.15, + "learning_rate": 9.948341038005474e-05, + "loss": 0.0014, + "step": 8460 + }, + { + "epoch": 36.17, + "learning_rate": 9.948173808171691e-05, + "loss": 0.0017, + "step": 8464 + }, + { + "epoch": 36.19, + "learning_rate": 9.948006309507944e-05, + "loss": 0.0021, + "step": 8468 + }, + { + "epoch": 36.21, + "learning_rate": 9.947838542023334e-05, + "loss": 0.0021, + "step": 8472 + }, + { + "epoch": 36.22, + "learning_rate": 9.947670505726974e-05, + "loss": 0.0008, + "step": 8476 + }, + { + "epoch": 36.24, + "learning_rate": 9.947502200627994e-05, + "loss": 0.0036, + "step": 8480 + }, + { + "epoch": 36.26, + "learning_rate": 9.947333626735535e-05, + "loss": 0.0006, + "step": 8484 + }, + { + "epoch": 36.27, + "learning_rate": 9.947164784058761e-05, + "loss": 0.0019, + "step": 8488 + }, + { + "epoch": 36.29, + "learning_rate": 9.94699567260684e-05, + "loss": 0.0004, + "step": 8492 + }, + { + "epoch": 36.31, + "learning_rate": 9.946826292388964e-05, + "loss": 0.0028, + "step": 8496 + }, + { + "epoch": 36.32, + "learning_rate": 9.946656643414331e-05, + "loss": 0.0039, + "step": 8500 + }, + { + "epoch": 36.34, + "learning_rate": 9.946486725692161e-05, + "loss": 0.0011, + "step": 8504 + }, + { + "epoch": 36.36, + "learning_rate": 9.946316539231683e-05, + "loss": 0.0007, + "step": 8508 + }, + { + "epoch": 36.38, + "learning_rate": 9.946146084042148e-05, + "loss": 0.0022, + "step": 8512 + }, + { + "epoch": 36.39, + "learning_rate": 9.94597536013281e-05, + "loss": 0.0009, + "step": 8516 + }, + { + "epoch": 36.41, + "learning_rate": 9.945804367512948e-05, + "loss": 0.0015, + "step": 8520 + }, + { + "epoch": 36.43, + "learning_rate": 9.945633106191851e-05, + "loss": 0.001, + "step": 8524 + }, + { + "epoch": 36.44, + "learning_rate": 9.945461576178825e-05, + "loss": 0.0004, + "step": 8528 + }, + { + "epoch": 36.46, + "learning_rate": 9.945289777483186e-05, + "loss": 0.0011, + "step": 8532 + }, + { + "epoch": 36.48, + "learning_rate": 9.945117710114272e-05, + "loss": 0.002, + "step": 8536 + }, + { + "epoch": 36.5, + "learning_rate": 9.944945374081427e-05, + "loss": 0.0013, + "step": 8540 + }, + { + "epoch": 36.51, + "learning_rate": 9.944772769394018e-05, + "loss": 0.0008, + "step": 8544 + }, + { + "epoch": 36.53, + "learning_rate": 9.944599896061417e-05, + "loss": 0.0004, + "step": 8548 + }, + { + "epoch": 36.55, + "learning_rate": 9.944426754093021e-05, + "loss": 0.0007, + "step": 8552 + }, + { + "epoch": 36.56, + "learning_rate": 9.944253343498234e-05, + "loss": 0.0017, + "step": 8556 + }, + { + "epoch": 36.58, + "learning_rate": 9.944079664286479e-05, + "loss": 0.0013, + "step": 8560 + }, + { + "epoch": 36.6, + "learning_rate": 9.94390571646719e-05, + "loss": 0.0013, + "step": 8564 + }, + { + "epoch": 36.62, + "learning_rate": 9.94373150004982e-05, + "loss": 0.0007, + "step": 8568 + }, + { + "epoch": 36.63, + "learning_rate": 9.943557015043832e-05, + "loss": 0.0038, + "step": 8572 + }, + { + "epoch": 36.65, + "learning_rate": 9.943382261458705e-05, + "loss": 0.0022, + "step": 8576 + }, + { + "epoch": 36.67, + "learning_rate": 9.943207239303935e-05, + "loss": 0.0011, + "step": 8580 + }, + { + "epoch": 36.68, + "learning_rate": 9.943031948589029e-05, + "loss": 0.0009, + "step": 8584 + }, + { + "epoch": 36.7, + "learning_rate": 9.942856389323512e-05, + "loss": 0.0012, + "step": 8588 + }, + { + "epoch": 36.72, + "learning_rate": 9.942680561516921e-05, + "loss": 0.0004, + "step": 8592 + }, + { + "epoch": 36.74, + "learning_rate": 9.94250446517881e-05, + "loss": 0.001, + "step": 8596 + }, + { + "epoch": 36.75, + "learning_rate": 9.942328100318745e-05, + "loss": 0.0008, + "step": 8600 + }, + { + "epoch": 36.77, + "learning_rate": 9.942151466946308e-05, + "loss": 0.0009, + "step": 8604 + }, + { + "epoch": 36.79, + "learning_rate": 9.941974565071094e-05, + "loss": 0.0004, + "step": 8608 + }, + { + "epoch": 36.8, + "learning_rate": 9.941797394702717e-05, + "loss": 0.0011, + "step": 8612 + }, + { + "epoch": 36.82, + "learning_rate": 9.941619955850798e-05, + "loss": 0.0014, + "step": 8616 + }, + { + "epoch": 36.84, + "learning_rate": 9.941442248524981e-05, + "loss": 0.0029, + "step": 8620 + }, + { + "epoch": 36.85, + "learning_rate": 9.94126427273492e-05, + "loss": 0.0015, + "step": 8624 + }, + { + "epoch": 36.87, + "learning_rate": 9.941086028490284e-05, + "loss": 0.0011, + "step": 8628 + }, + { + "epoch": 36.89, + "learning_rate": 9.940907515800755e-05, + "loss": 0.0017, + "step": 8632 + }, + { + "epoch": 36.91, + "learning_rate": 9.940728734676035e-05, + "loss": 0.0012, + "step": 8636 + }, + { + "epoch": 36.92, + "learning_rate": 9.940549685125834e-05, + "loss": 0.0006, + "step": 8640 + }, + { + "epoch": 36.94, + "learning_rate": 9.940370367159881e-05, + "loss": 0.0022, + "step": 8644 + }, + { + "epoch": 36.96, + "learning_rate": 9.940190780787917e-05, + "loss": 0.0028, + "step": 8648 + }, + { + "epoch": 36.97, + "learning_rate": 9.9400109260197e-05, + "loss": 0.0019, + "step": 8652 + }, + { + "epoch": 36.99, + "learning_rate": 9.939830802865002e-05, + "loss": 0.002, + "step": 8656 + }, + { + "epoch": 37.01, + "learning_rate": 9.939650411333606e-05, + "loss": 0.0009, + "step": 8660 + }, + { + "epoch": 37.03, + "learning_rate": 9.939469751435315e-05, + "loss": 0.0087, + "step": 8664 + }, + { + "epoch": 37.04, + "learning_rate": 9.939288823179945e-05, + "loss": 0.0025, + "step": 8668 + }, + { + "epoch": 37.06, + "learning_rate": 9.939107626577324e-05, + "loss": 0.001, + "step": 8672 + }, + { + "epoch": 37.08, + "learning_rate": 9.938926161637294e-05, + "loss": 0.0009, + "step": 8676 + }, + { + "epoch": 37.09, + "learning_rate": 9.938744428369717e-05, + "loss": 0.0068, + "step": 8680 + }, + { + "epoch": 37.11, + "learning_rate": 9.938562426784468e-05, + "loss": 0.0018, + "step": 8684 + }, + { + "epoch": 37.13, + "learning_rate": 9.93838015689143e-05, + "loss": 0.0011, + "step": 8688 + }, + { + "epoch": 37.15, + "learning_rate": 9.93819761870051e-05, + "loss": 0.0017, + "step": 8692 + }, + { + "epoch": 37.16, + "learning_rate": 9.938014812221622e-05, + "loss": 0.0017, + "step": 8696 + }, + { + "epoch": 37.18, + "learning_rate": 9.9378317374647e-05, + "loss": 0.0018, + "step": 8700 + }, + { + "epoch": 37.2, + "learning_rate": 9.937648394439689e-05, + "loss": 0.0014, + "step": 8704 + }, + { + "epoch": 37.21, + "learning_rate": 9.937464783156549e-05, + "loss": 0.0024, + "step": 8708 + }, + { + "epoch": 37.23, + "learning_rate": 9.937280903625259e-05, + "loss": 0.0018, + "step": 8712 + }, + { + "epoch": 37.25, + "learning_rate": 9.937096755855806e-05, + "loss": 0.0004, + "step": 8716 + }, + { + "epoch": 37.26, + "learning_rate": 9.936912339858194e-05, + "loss": 0.0026, + "step": 8720 + }, + { + "epoch": 37.28, + "learning_rate": 9.936727655642443e-05, + "loss": 0.0006, + "step": 8724 + }, + { + "epoch": 37.3, + "learning_rate": 9.936542703218589e-05, + "loss": 0.0003, + "step": 8728 + }, + { + "epoch": 37.32, + "learning_rate": 9.936357482596679e-05, + "loss": 0.0018, + "step": 8732 + }, + { + "epoch": 37.33, + "learning_rate": 9.936171993786773e-05, + "loss": 0.001, + "step": 8736 + }, + { + "epoch": 37.35, + "learning_rate": 9.935986236798953e-05, + "loss": 0.0009, + "step": 8740 + }, + { + "epoch": 37.37, + "learning_rate": 9.935800211643307e-05, + "loss": 0.0017, + "step": 8744 + }, + { + "epoch": 37.38, + "learning_rate": 9.935613918329945e-05, + "loss": 0.0028, + "step": 8748 + }, + { + "epoch": 37.4, + "learning_rate": 9.935427356868985e-05, + "loss": 0.0015, + "step": 8752 + }, + { + "epoch": 37.42, + "learning_rate": 9.935240527270566e-05, + "loss": 0.0004, + "step": 8756 + }, + { + "epoch": 37.44, + "learning_rate": 9.935053429544835e-05, + "loss": 0.0015, + "step": 8760 + }, + { + "epoch": 37.45, + "learning_rate": 9.93486606370196e-05, + "loss": 0.001, + "step": 8764 + }, + { + "epoch": 37.47, + "learning_rate": 9.934678429752117e-05, + "loss": 0.0014, + "step": 8768 + }, + { + "epoch": 37.49, + "learning_rate": 9.934490527705502e-05, + "loss": 0.0007, + "step": 8772 + }, + { + "epoch": 37.5, + "learning_rate": 9.934302357572324e-05, + "loss": 0.0042, + "step": 8776 + }, + { + "epoch": 37.52, + "learning_rate": 9.934113919362807e-05, + "loss": 0.0016, + "step": 8780 + }, + { + "epoch": 37.54, + "learning_rate": 9.933925213087184e-05, + "loss": 0.002, + "step": 8784 + }, + { + "epoch": 37.56, + "learning_rate": 9.933736238755714e-05, + "loss": 0.0013, + "step": 8788 + }, + { + "epoch": 37.57, + "learning_rate": 9.933546996378657e-05, + "loss": 0.0036, + "step": 8792 + }, + { + "epoch": 37.59, + "learning_rate": 9.9333574859663e-05, + "loss": 0.002, + "step": 8796 + }, + { + "epoch": 37.61, + "learning_rate": 9.933167707528934e-05, + "loss": 0.0006, + "step": 8800 + }, + { + "epoch": 37.62, + "learning_rate": 9.932977661076873e-05, + "loss": 0.0012, + "step": 8804 + }, + { + "epoch": 37.64, + "learning_rate": 9.932787346620443e-05, + "loss": 0.0007, + "step": 8808 + }, + { + "epoch": 37.66, + "learning_rate": 9.932596764169979e-05, + "loss": 0.001, + "step": 8812 + }, + { + "epoch": 37.68, + "learning_rate": 9.93240591373584e-05, + "loss": 0.0026, + "step": 8816 + }, + { + "epoch": 37.69, + "learning_rate": 9.932214795328391e-05, + "loss": 0.0009, + "step": 8820 + }, + { + "epoch": 37.71, + "learning_rate": 9.932023408958018e-05, + "loss": 0.0014, + "step": 8824 + }, + { + "epoch": 37.73, + "learning_rate": 9.931831754635117e-05, + "loss": 0.0003, + "step": 8828 + }, + { + "epoch": 37.74, + "learning_rate": 9.9316398323701e-05, + "loss": 0.0014, + "step": 8832 + }, + { + "epoch": 37.76, + "learning_rate": 9.931447642173398e-05, + "loss": 0.0021, + "step": 8836 + }, + { + "epoch": 37.78, + "learning_rate": 9.93125518405545e-05, + "loss": 0.0007, + "step": 8840 + }, + { + "epoch": 37.79, + "learning_rate": 9.93106245802671e-05, + "loss": 0.001, + "step": 8844 + }, + { + "epoch": 37.81, + "learning_rate": 9.93086946409765e-05, + "loss": 0.0007, + "step": 8848 + }, + { + "epoch": 37.83, + "learning_rate": 9.930676202278756e-05, + "loss": 0.0011, + "step": 8852 + }, + { + "epoch": 37.85, + "learning_rate": 9.930482672580528e-05, + "loss": 0.0006, + "step": 8856 + }, + { + "epoch": 37.86, + "learning_rate": 9.930288875013479e-05, + "loss": 0.0017, + "step": 8860 + }, + { + "epoch": 37.88, + "learning_rate": 9.930094809588139e-05, + "loss": 0.0027, + "step": 8864 + }, + { + "epoch": 37.9, + "learning_rate": 9.92990047631505e-05, + "loss": 0.0022, + "step": 8868 + }, + { + "epoch": 37.91, + "learning_rate": 9.929705875204771e-05, + "loss": 0.0028, + "step": 8872 + }, + { + "epoch": 37.93, + "learning_rate": 9.929511006267876e-05, + "loss": 0.0015, + "step": 8876 + }, + { + "epoch": 37.95, + "learning_rate": 9.92931586951495e-05, + "loss": 0.0009, + "step": 8880 + }, + { + "epoch": 37.97, + "learning_rate": 9.929120464956594e-05, + "loss": 0.0012, + "step": 8884 + }, + { + "epoch": 37.98, + "learning_rate": 9.928924792603424e-05, + "loss": 0.0008, + "step": 8888 + }, + { + "epoch": 38.0, + "learning_rate": 9.928728852466073e-05, + "loss": 0.0062, + "step": 8892 + }, + { + "epoch": 38.02, + "learning_rate": 9.928532644555186e-05, + "loss": 0.0005, + "step": 8896 + }, + { + "epoch": 38.03, + "learning_rate": 9.928336168881423e-05, + "loss": 0.0003, + "step": 8900 + }, + { + "epoch": 38.05, + "learning_rate": 9.928139425455455e-05, + "loss": 0.0015, + "step": 8904 + }, + { + "epoch": 38.07, + "learning_rate": 9.927942414287974e-05, + "loss": 0.0005, + "step": 8908 + }, + { + "epoch": 38.09, + "learning_rate": 9.927745135389684e-05, + "loss": 0.0007, + "step": 8912 + }, + { + "epoch": 38.1, + "learning_rate": 9.927547588771299e-05, + "loss": 0.0008, + "step": 8916 + }, + { + "epoch": 38.12, + "learning_rate": 9.927349774443555e-05, + "loss": 0.0008, + "step": 8920 + }, + { + "epoch": 38.14, + "learning_rate": 9.9271516924172e-05, + "loss": 0.0013, + "step": 8924 + }, + { + "epoch": 38.15, + "learning_rate": 9.926953342702991e-05, + "loss": 0.0017, + "step": 8928 + }, + { + "epoch": 38.17, + "learning_rate": 9.926754725311709e-05, + "loss": 0.0021, + "step": 8932 + }, + { + "epoch": 38.19, + "learning_rate": 9.926555840254144e-05, + "loss": 0.0008, + "step": 8936 + }, + { + "epoch": 38.21, + "learning_rate": 9.926356687541098e-05, + "loss": 0.0002, + "step": 8940 + }, + { + "epoch": 38.22, + "learning_rate": 9.926157267183394e-05, + "loss": 0.0037, + "step": 8944 + }, + { + "epoch": 38.24, + "learning_rate": 9.925957579191863e-05, + "loss": 0.0024, + "step": 8948 + }, + { + "epoch": 38.26, + "learning_rate": 9.925757623577358e-05, + "loss": 0.001, + "step": 8952 + }, + { + "epoch": 38.27, + "learning_rate": 9.92555740035074e-05, + "loss": 0.0007, + "step": 8956 + }, + { + "epoch": 38.29, + "learning_rate": 9.925356909522887e-05, + "loss": 0.0007, + "step": 8960 + }, + { + "epoch": 38.31, + "learning_rate": 9.925156151104692e-05, + "loss": 0.0011, + "step": 8964 + }, + { + "epoch": 38.32, + "learning_rate": 9.924955125107063e-05, + "loss": 0.0011, + "step": 8968 + }, + { + "epoch": 38.34, + "learning_rate": 9.924753831540918e-05, + "loss": 0.0019, + "step": 8972 + }, + { + "epoch": 38.36, + "learning_rate": 9.924552270417198e-05, + "loss": 0.0008, + "step": 8976 + }, + { + "epoch": 38.38, + "learning_rate": 9.92435044174685e-05, + "loss": 0.0014, + "step": 8980 + }, + { + "epoch": 38.39, + "learning_rate": 9.92414834554084e-05, + "loss": 0.0024, + "step": 8984 + }, + { + "epoch": 38.41, + "learning_rate": 9.92394598181015e-05, + "loss": 0.0007, + "step": 8988 + }, + { + "epoch": 38.43, + "learning_rate": 9.923743350565771e-05, + "loss": 0.0006, + "step": 8992 + }, + { + "epoch": 38.44, + "learning_rate": 9.923540451818713e-05, + "loss": 0.0015, + "step": 8996 + }, + { + "epoch": 38.46, + "learning_rate": 9.923337285579999e-05, + "loss": 0.0006, + "step": 9000 + }, + { + "epoch": 38.46, + "eval_exact_match": 0.501039501039501, + "eval_loss": 0.7777053117752075, + "eval_runtime": 134.1347, + "eval_samples_per_second": 7.172, + "step": 9000 + }, + { + "epoch": 38.48, + "learning_rate": 9.923133851860669e-05, + "loss": 0.0006, + "step": 9004 + }, + { + "epoch": 38.5, + "learning_rate": 9.922930150671773e-05, + "loss": 0.0008, + "step": 9008 + }, + { + "epoch": 38.51, + "learning_rate": 9.922726182024378e-05, + "loss": 0.0009, + "step": 9012 + }, + { + "epoch": 38.53, + "learning_rate": 9.922521945929567e-05, + "loss": 0.0014, + "step": 9016 + }, + { + "epoch": 38.55, + "learning_rate": 9.922317442398433e-05, + "loss": 0.0031, + "step": 9020 + }, + { + "epoch": 38.56, + "learning_rate": 9.92211267144209e-05, + "loss": 0.0017, + "step": 9024 + }, + { + "epoch": 38.58, + "learning_rate": 9.921907633071661e-05, + "loss": 0.0018, + "step": 9028 + }, + { + "epoch": 38.6, + "learning_rate": 9.921702327298286e-05, + "loss": 0.001, + "step": 9032 + }, + { + "epoch": 38.62, + "learning_rate": 9.92149675413312e-05, + "loss": 0.0011, + "step": 9036 + }, + { + "epoch": 38.63, + "learning_rate": 9.921290913587328e-05, + "loss": 0.0012, + "step": 9040 + }, + { + "epoch": 38.65, + "learning_rate": 9.921084805672098e-05, + "loss": 0.0023, + "step": 9044 + }, + { + "epoch": 38.67, + "learning_rate": 9.920878430398626e-05, + "loss": 0.0031, + "step": 9048 + }, + { + "epoch": 38.68, + "learning_rate": 9.920671787778124e-05, + "loss": 0.0007, + "step": 9052 + }, + { + "epoch": 38.7, + "learning_rate": 9.920464877821818e-05, + "loss": 0.0012, + "step": 9056 + }, + { + "epoch": 38.72, + "learning_rate": 9.920257700540949e-05, + "loss": 0.0025, + "step": 9060 + }, + { + "epoch": 38.74, + "learning_rate": 9.920050255946774e-05, + "loss": 0.0034, + "step": 9064 + }, + { + "epoch": 38.75, + "learning_rate": 9.919842544050561e-05, + "loss": 0.0022, + "step": 9068 + }, + { + "epoch": 38.77, + "learning_rate": 9.919634564863599e-05, + "loss": 0.0008, + "step": 9072 + }, + { + "epoch": 38.79, + "learning_rate": 9.919426318397185e-05, + "loss": 0.0012, + "step": 9076 + }, + { + "epoch": 38.8, + "learning_rate": 9.91921780466263e-05, + "loss": 0.0018, + "step": 9080 + }, + { + "epoch": 38.82, + "learning_rate": 9.919009023671268e-05, + "loss": 0.0012, + "step": 9084 + }, + { + "epoch": 38.84, + "learning_rate": 9.918799975434436e-05, + "loss": 0.0006, + "step": 9088 + }, + { + "epoch": 38.85, + "learning_rate": 9.918590659963497e-05, + "loss": 0.0011, + "step": 9092 + }, + { + "epoch": 38.87, + "learning_rate": 9.91838107726982e-05, + "loss": 0.0006, + "step": 9096 + }, + { + "epoch": 38.89, + "learning_rate": 9.918171227364791e-05, + "loss": 0.0004, + "step": 9100 + }, + { + "epoch": 38.91, + "learning_rate": 9.917961110259811e-05, + "loss": 0.0009, + "step": 9104 + }, + { + "epoch": 38.92, + "learning_rate": 9.917750725966297e-05, + "loss": 0.0012, + "step": 9108 + }, + { + "epoch": 38.94, + "learning_rate": 9.917540074495679e-05, + "loss": 0.0006, + "step": 9112 + }, + { + "epoch": 38.96, + "learning_rate": 9.917329155859401e-05, + "loss": 0.0015, + "step": 9116 + }, + { + "epoch": 38.97, + "learning_rate": 9.917117970068919e-05, + "loss": 0.0003, + "step": 9120 + }, + { + "epoch": 38.99, + "learning_rate": 9.91690651713571e-05, + "loss": 0.002, + "step": 9124 + }, + { + "epoch": 39.01, + "learning_rate": 9.916694797071265e-05, + "loss": 0.0006, + "step": 9128 + }, + { + "epoch": 39.03, + "learning_rate": 9.91648280988708e-05, + "loss": 0.0005, + "step": 9132 + }, + { + "epoch": 39.04, + "learning_rate": 9.916270555594674e-05, + "loss": 0.0003, + "step": 9136 + }, + { + "epoch": 39.06, + "learning_rate": 9.91605803420558e-05, + "loss": 0.0011, + "step": 9140 + }, + { + "epoch": 39.08, + "learning_rate": 9.915845245731345e-05, + "loss": 0.0024, + "step": 9144 + }, + { + "epoch": 39.09, + "learning_rate": 9.915632190183528e-05, + "loss": 0.0017, + "step": 9148 + }, + { + "epoch": 39.11, + "learning_rate": 9.915418867573703e-05, + "loss": 0.001, + "step": 9152 + }, + { + "epoch": 39.13, + "learning_rate": 9.915205277913462e-05, + "loss": 0.0003, + "step": 9156 + }, + { + "epoch": 39.15, + "learning_rate": 9.914991421214408e-05, + "loss": 0.0006, + "step": 9160 + }, + { + "epoch": 39.16, + "learning_rate": 9.91477729748816e-05, + "loss": 0.0002, + "step": 9164 + }, + { + "epoch": 39.18, + "learning_rate": 9.914562906746353e-05, + "loss": 0.0021, + "step": 9168 + }, + { + "epoch": 39.2, + "learning_rate": 9.914348249000629e-05, + "loss": 0.0006, + "step": 9172 + }, + { + "epoch": 39.21, + "learning_rate": 9.914133324262656e-05, + "loss": 0.001, + "step": 9176 + }, + { + "epoch": 39.23, + "learning_rate": 9.913918132544107e-05, + "loss": 0.002, + "step": 9180 + }, + { + "epoch": 39.25, + "learning_rate": 9.913702673856675e-05, + "loss": 0.0009, + "step": 9184 + }, + { + "epoch": 39.26, + "learning_rate": 9.913486948212066e-05, + "loss": 0.0021, + "step": 9188 + }, + { + "epoch": 39.28, + "learning_rate": 9.913270955622e-05, + "loss": 0.0021, + "step": 9192 + }, + { + "epoch": 39.3, + "learning_rate": 9.91305469609821e-05, + "loss": 0.0012, + "step": 9196 + }, + { + "epoch": 39.32, + "learning_rate": 9.912838169652446e-05, + "loss": 0.0025, + "step": 9200 + }, + { + "epoch": 39.33, + "learning_rate": 9.912621376296474e-05, + "loss": 0.001, + "step": 9204 + }, + { + "epoch": 39.35, + "learning_rate": 9.91240431604207e-05, + "loss": 0.0045, + "step": 9208 + }, + { + "epoch": 39.37, + "learning_rate": 9.912186988901025e-05, + "loss": 0.0017, + "step": 9212 + }, + { + "epoch": 39.38, + "learning_rate": 9.91196939488515e-05, + "loss": 0.0008, + "step": 9216 + }, + { + "epoch": 39.4, + "learning_rate": 9.911751534006264e-05, + "loss": 0.0014, + "step": 9220 + }, + { + "epoch": 39.42, + "learning_rate": 9.911533406276203e-05, + "loss": 0.001, + "step": 9224 + }, + { + "epoch": 39.44, + "learning_rate": 9.911315011706822e-05, + "loss": 0.0025, + "step": 9228 + }, + { + "epoch": 39.45, + "learning_rate": 9.911096350309979e-05, + "loss": 0.003, + "step": 9232 + }, + { + "epoch": 39.47, + "learning_rate": 9.910877422097559e-05, + "loss": 0.0009, + "step": 9236 + }, + { + "epoch": 39.49, + "learning_rate": 9.910658227081457e-05, + "loss": 0.0009, + "step": 9240 + }, + { + "epoch": 39.5, + "learning_rate": 9.910438765273578e-05, + "loss": 0.0023, + "step": 9244 + }, + { + "epoch": 39.52, + "learning_rate": 9.910219036685845e-05, + "loss": 0.0013, + "step": 9248 + }, + { + "epoch": 39.54, + "learning_rate": 9.9099990413302e-05, + "loss": 0.002, + "step": 9252 + }, + { + "epoch": 39.56, + "learning_rate": 9.90977877921859e-05, + "loss": 0.0004, + "step": 9256 + }, + { + "epoch": 39.57, + "learning_rate": 9.909558250362986e-05, + "loss": 0.0003, + "step": 9260 + }, + { + "epoch": 39.59, + "learning_rate": 9.909337454775367e-05, + "loss": 0.0011, + "step": 9264 + }, + { + "epoch": 39.61, + "learning_rate": 9.909116392467728e-05, + "loss": 0.0019, + "step": 9268 + }, + { + "epoch": 39.62, + "learning_rate": 9.908895063452079e-05, + "loss": 0.0007, + "step": 9272 + }, + { + "epoch": 39.64, + "learning_rate": 9.908673467740448e-05, + "loss": 0.001, + "step": 9276 + }, + { + "epoch": 39.66, + "learning_rate": 9.90845160534487e-05, + "loss": 0.0012, + "step": 9280 + }, + { + "epoch": 39.68, + "learning_rate": 9.908229476277401e-05, + "loss": 0.0009, + "step": 9284 + }, + { + "epoch": 39.69, + "learning_rate": 9.908007080550109e-05, + "loss": 0.0016, + "step": 9288 + }, + { + "epoch": 39.71, + "learning_rate": 9.907784418175075e-05, + "loss": 0.0013, + "step": 9292 + }, + { + "epoch": 39.73, + "learning_rate": 9.907561489164397e-05, + "loss": 0.002, + "step": 9296 + }, + { + "epoch": 39.74, + "learning_rate": 9.907338293530187e-05, + "loss": 0.0015, + "step": 9300 + }, + { + "epoch": 39.76, + "learning_rate": 9.90711483128457e-05, + "loss": 0.0024, + "step": 9304 + }, + { + "epoch": 39.78, + "learning_rate": 9.906891102439688e-05, + "loss": 0.0007, + "step": 9308 + }, + { + "epoch": 39.79, + "learning_rate": 9.906667107007694e-05, + "loss": 0.0021, + "step": 9312 + }, + { + "epoch": 39.81, + "learning_rate": 9.90644284500076e-05, + "loss": 0.0008, + "step": 9316 + }, + { + "epoch": 39.83, + "learning_rate": 9.906218316431066e-05, + "loss": 0.0034, + "step": 9320 + }, + { + "epoch": 39.85, + "learning_rate": 9.905993521310816e-05, + "loss": 0.0005, + "step": 9324 + }, + { + "epoch": 39.86, + "learning_rate": 9.905768459652218e-05, + "loss": 0.0036, + "step": 9328 + }, + { + "epoch": 39.88, + "learning_rate": 9.905543131467501e-05, + "loss": 0.0008, + "step": 9332 + }, + { + "epoch": 39.9, + "learning_rate": 9.905317536768908e-05, + "loss": 0.0011, + "step": 9336 + }, + { + "epoch": 39.91, + "learning_rate": 9.905091675568694e-05, + "loss": 0.0012, + "step": 9340 + }, + { + "epoch": 39.93, + "learning_rate": 9.904865547879131e-05, + "loss": 0.0009, + "step": 9344 + }, + { + "epoch": 39.95, + "learning_rate": 9.904639153712503e-05, + "loss": 0.0006, + "step": 9348 + }, + { + "epoch": 39.97, + "learning_rate": 9.90441249308111e-05, + "loss": 0.002, + "step": 9352 + }, + { + "epoch": 39.98, + "learning_rate": 9.904185565997269e-05, + "loss": 0.0036, + "step": 9356 + }, + { + "epoch": 40.0, + "learning_rate": 9.903958372473303e-05, + "loss": 0.0006, + "step": 9360 + }, + { + "epoch": 40.02, + "learning_rate": 9.903730912521561e-05, + "loss": 0.001, + "step": 9364 + }, + { + "epoch": 40.03, + "learning_rate": 9.903503186154399e-05, + "loss": 0.0019, + "step": 9368 + }, + { + "epoch": 40.05, + "learning_rate": 9.903275193384188e-05, + "loss": 0.0008, + "step": 9372 + }, + { + "epoch": 40.07, + "learning_rate": 9.903046934223315e-05, + "loss": 0.002, + "step": 9376 + }, + { + "epoch": 40.09, + "learning_rate": 9.90281840868418e-05, + "loss": 0.0023, + "step": 9380 + }, + { + "epoch": 40.1, + "learning_rate": 9.902589616779201e-05, + "loss": 0.0011, + "step": 9384 + }, + { + "epoch": 40.12, + "learning_rate": 9.902360558520807e-05, + "loss": 0.0016, + "step": 9388 + }, + { + "epoch": 40.14, + "learning_rate": 9.902131233921443e-05, + "loss": 0.0041, + "step": 9392 + }, + { + "epoch": 40.15, + "learning_rate": 9.901901642993567e-05, + "loss": 0.0029, + "step": 9396 + }, + { + "epoch": 40.17, + "learning_rate": 9.901671785749654e-05, + "loss": 0.0011, + "step": 9400 + }, + { + "epoch": 40.19, + "learning_rate": 9.90144166220219e-05, + "loss": 0.001, + "step": 9404 + }, + { + "epoch": 40.21, + "learning_rate": 9.901211272363679e-05, + "loss": 0.0007, + "step": 9408 + }, + { + "epoch": 40.22, + "learning_rate": 9.900980616246637e-05, + "loss": 0.0021, + "step": 9412 + }, + { + "epoch": 40.24, + "learning_rate": 9.900749693863596e-05, + "loss": 0.0009, + "step": 9416 + }, + { + "epoch": 40.26, + "learning_rate": 9.900518505227101e-05, + "loss": 0.0004, + "step": 9420 + }, + { + "epoch": 40.27, + "learning_rate": 9.900287050349712e-05, + "loss": 0.001, + "step": 9424 + }, + { + "epoch": 40.29, + "learning_rate": 9.900055329244004e-05, + "loss": 0.0022, + "step": 9428 + }, + { + "epoch": 40.31, + "learning_rate": 9.89982334192257e-05, + "loss": 0.0014, + "step": 9432 + }, + { + "epoch": 40.32, + "learning_rate": 9.899591088398007e-05, + "loss": 0.0029, + "step": 9436 + }, + { + "epoch": 40.34, + "learning_rate": 9.899358568682938e-05, + "loss": 0.0004, + "step": 9440 + }, + { + "epoch": 40.36, + "learning_rate": 9.899125782789993e-05, + "loss": 0.0006, + "step": 9444 + }, + { + "epoch": 40.38, + "learning_rate": 9.898892730731821e-05, + "loss": 0.0019, + "step": 9448 + }, + { + "epoch": 40.39, + "learning_rate": 9.898659412521082e-05, + "loss": 0.0016, + "step": 9452 + }, + { + "epoch": 40.41, + "learning_rate": 9.898425828170452e-05, + "loss": 0.0029, + "step": 9456 + }, + { + "epoch": 40.43, + "learning_rate": 9.898191977692623e-05, + "loss": 0.0018, + "step": 9460 + }, + { + "epoch": 40.44, + "learning_rate": 9.8979578611003e-05, + "loss": 0.0005, + "step": 9464 + }, + { + "epoch": 40.46, + "learning_rate": 9.8977234784062e-05, + "loss": 0.0007, + "step": 9468 + }, + { + "epoch": 40.48, + "learning_rate": 9.897488829623059e-05, + "loss": 0.0022, + "step": 9472 + }, + { + "epoch": 40.5, + "learning_rate": 9.897253914763622e-05, + "loss": 0.0034, + "step": 9476 + }, + { + "epoch": 40.51, + "learning_rate": 9.897018733840656e-05, + "loss": 0.0019, + "step": 9480 + }, + { + "epoch": 40.53, + "learning_rate": 9.896783286866935e-05, + "loss": 0.0009, + "step": 9484 + }, + { + "epoch": 40.55, + "learning_rate": 9.896547573855253e-05, + "loss": 0.0006, + "step": 9488 + }, + { + "epoch": 40.56, + "learning_rate": 9.896311594818415e-05, + "loss": 0.0004, + "step": 9492 + }, + { + "epoch": 40.58, + "learning_rate": 9.896075349769241e-05, + "loss": 0.0038, + "step": 9496 + }, + { + "epoch": 40.6, + "learning_rate": 9.895838838720565e-05, + "loss": 0.0023, + "step": 9500 + }, + { + "epoch": 40.62, + "learning_rate": 9.89560206168524e-05, + "loss": 0.0005, + "step": 9504 + }, + { + "epoch": 40.63, + "learning_rate": 9.895365018676128e-05, + "loss": 0.0008, + "step": 9508 + }, + { + "epoch": 40.65, + "learning_rate": 9.895127709706106e-05, + "loss": 0.0016, + "step": 9512 + }, + { + "epoch": 40.67, + "learning_rate": 9.894890134788068e-05, + "loss": 0.0013, + "step": 9516 + }, + { + "epoch": 40.68, + "learning_rate": 9.894652293934921e-05, + "loss": 0.0011, + "step": 9520 + }, + { + "epoch": 40.7, + "learning_rate": 9.894414187159587e-05, + "loss": 0.0006, + "step": 9524 + }, + { + "epoch": 40.72, + "learning_rate": 9.894175814475002e-05, + "loss": 0.0008, + "step": 9528 + }, + { + "epoch": 40.74, + "learning_rate": 9.893937175894117e-05, + "loss": 0.0008, + "step": 9532 + }, + { + "epoch": 40.75, + "learning_rate": 9.893698271429896e-05, + "loss": 0.001, + "step": 9536 + }, + { + "epoch": 40.77, + "learning_rate": 9.893459101095319e-05, + "loss": 0.0006, + "step": 9540 + }, + { + "epoch": 40.79, + "learning_rate": 9.893219664903382e-05, + "loss": 0.0009, + "step": 9544 + }, + { + "epoch": 40.8, + "learning_rate": 9.892979962867088e-05, + "loss": 0.0005, + "step": 9548 + }, + { + "epoch": 40.82, + "learning_rate": 9.892739994999466e-05, + "loss": 0.0013, + "step": 9552 + }, + { + "epoch": 40.84, + "learning_rate": 9.89249976131355e-05, + "loss": 0.0012, + "step": 9556 + }, + { + "epoch": 40.85, + "learning_rate": 9.89225926182239e-05, + "loss": 0.0007, + "step": 9560 + }, + { + "epoch": 40.87, + "learning_rate": 9.892018496539055e-05, + "loss": 0.0019, + "step": 9564 + }, + { + "epoch": 40.89, + "learning_rate": 9.891777465476627e-05, + "loss": 0.0026, + "step": 9568 + }, + { + "epoch": 40.91, + "learning_rate": 9.891536168648195e-05, + "loss": 0.0009, + "step": 9572 + }, + { + "epoch": 40.92, + "learning_rate": 9.891294606066875e-05, + "loss": 0.0008, + "step": 9576 + }, + { + "epoch": 40.94, + "learning_rate": 9.891052777745786e-05, + "loss": 0.0007, + "step": 9580 + }, + { + "epoch": 40.96, + "learning_rate": 9.89081068369807e-05, + "loss": 0.0015, + "step": 9584 + }, + { + "epoch": 40.97, + "learning_rate": 9.890568323936877e-05, + "loss": 0.0007, + "step": 9588 + }, + { + "epoch": 40.99, + "learning_rate": 9.890325698475377e-05, + "loss": 0.0007, + "step": 9592 + }, + { + "epoch": 41.01, + "learning_rate": 9.89008280732675e-05, + "loss": 0.0029, + "step": 9596 + }, + { + "epoch": 41.03, + "learning_rate": 9.889839650504191e-05, + "loss": 0.0007, + "step": 9600 + }, + { + "epoch": 41.04, + "learning_rate": 9.889596228020912e-05, + "loss": 0.0013, + "step": 9604 + }, + { + "epoch": 41.06, + "learning_rate": 9.889352539890136e-05, + "loss": 0.0062, + "step": 9608 + }, + { + "epoch": 41.08, + "learning_rate": 9.889108586125105e-05, + "loss": 0.0003, + "step": 9612 + }, + { + "epoch": 41.09, + "learning_rate": 9.888864366739073e-05, + "loss": 0.001, + "step": 9616 + }, + { + "epoch": 41.11, + "learning_rate": 9.888619881745305e-05, + "loss": 0.0005, + "step": 9620 + }, + { + "epoch": 41.13, + "learning_rate": 9.888375131157085e-05, + "loss": 0.0016, + "step": 9624 + }, + { + "epoch": 41.15, + "learning_rate": 9.888130114987712e-05, + "loss": 0.0013, + "step": 9628 + }, + { + "epoch": 41.16, + "learning_rate": 9.887884833250496e-05, + "loss": 0.0008, + "step": 9632 + }, + { + "epoch": 41.18, + "learning_rate": 9.887639285958763e-05, + "loss": 0.0025, + "step": 9636 + }, + { + "epoch": 41.2, + "learning_rate": 9.887393473125854e-05, + "loss": 0.001, + "step": 9640 + }, + { + "epoch": 41.21, + "learning_rate": 9.887147394765122e-05, + "loss": 0.0007, + "step": 9644 + }, + { + "epoch": 41.23, + "learning_rate": 9.886901050889938e-05, + "loss": 0.0018, + "step": 9648 + }, + { + "epoch": 41.25, + "learning_rate": 9.886654441513686e-05, + "loss": 0.0007, + "step": 9652 + }, + { + "epoch": 41.26, + "learning_rate": 9.886407566649762e-05, + "loss": 0.0013, + "step": 9656 + }, + { + "epoch": 41.28, + "learning_rate": 9.886160426311581e-05, + "loss": 0.0006, + "step": 9660 + }, + { + "epoch": 41.3, + "learning_rate": 9.885913020512568e-05, + "loss": 0.0021, + "step": 9664 + }, + { + "epoch": 41.32, + "learning_rate": 9.885665349266163e-05, + "loss": 0.0017, + "step": 9668 + }, + { + "epoch": 41.33, + "learning_rate": 9.885417412585826e-05, + "loss": 0.0004, + "step": 9672 + }, + { + "epoch": 41.35, + "learning_rate": 9.885169210485023e-05, + "loss": 0.0027, + "step": 9676 + }, + { + "epoch": 41.37, + "learning_rate": 9.884920742977243e-05, + "loss": 0.0028, + "step": 9680 + }, + { + "epoch": 41.38, + "learning_rate": 9.884672010075982e-05, + "loss": 0.0042, + "step": 9684 + }, + { + "epoch": 41.4, + "learning_rate": 9.884423011794753e-05, + "loss": 0.0006, + "step": 9688 + }, + { + "epoch": 41.42, + "learning_rate": 9.884173748147084e-05, + "loss": 0.0021, + "step": 9692 + }, + { + "epoch": 41.44, + "learning_rate": 9.883924219146519e-05, + "loss": 0.0016, + "step": 9696 + }, + { + "epoch": 41.45, + "learning_rate": 9.883674424806614e-05, + "loss": 0.0003, + "step": 9700 + }, + { + "epoch": 41.47, + "learning_rate": 9.88342436514094e-05, + "loss": 0.0007, + "step": 9704 + }, + { + "epoch": 41.49, + "learning_rate": 9.883174040163081e-05, + "loss": 0.0015, + "step": 9708 + }, + { + "epoch": 41.5, + "learning_rate": 9.882923449886641e-05, + "loss": 0.0033, + "step": 9712 + }, + { + "epoch": 41.52, + "learning_rate": 9.882672594325228e-05, + "loss": 0.0012, + "step": 9716 + }, + { + "epoch": 41.54, + "learning_rate": 9.882421473492478e-05, + "loss": 0.0012, + "step": 9720 + }, + { + "epoch": 41.56, + "learning_rate": 9.88217008740203e-05, + "loss": 0.0007, + "step": 9724 + }, + { + "epoch": 41.57, + "learning_rate": 9.881918436067541e-05, + "loss": 0.0033, + "step": 9728 + }, + { + "epoch": 41.59, + "learning_rate": 9.881666519502684e-05, + "loss": 0.001, + "step": 9732 + }, + { + "epoch": 41.61, + "learning_rate": 9.881414337721146e-05, + "loss": 0.0011, + "step": 9736 + }, + { + "epoch": 41.62, + "learning_rate": 9.881161890736627e-05, + "loss": 0.0009, + "step": 9740 + }, + { + "epoch": 41.64, + "learning_rate": 9.880909178562845e-05, + "loss": 0.0018, + "step": 9744 + }, + { + "epoch": 41.66, + "learning_rate": 9.880656201213525e-05, + "loss": 0.0008, + "step": 9748 + }, + { + "epoch": 41.68, + "learning_rate": 9.880402958702415e-05, + "loss": 0.0016, + "step": 9752 + }, + { + "epoch": 41.69, + "learning_rate": 9.88014945104327e-05, + "loss": 0.0017, + "step": 9756 + }, + { + "epoch": 41.71, + "learning_rate": 9.879895678249866e-05, + "loss": 0.0011, + "step": 9760 + }, + { + "epoch": 41.73, + "learning_rate": 9.879641640335988e-05, + "loss": 0.0003, + "step": 9764 + }, + { + "epoch": 41.74, + "learning_rate": 9.87938733731544e-05, + "loss": 0.0009, + "step": 9768 + }, + { + "epoch": 41.76, + "learning_rate": 9.879132769202035e-05, + "loss": 0.0007, + "step": 9772 + }, + { + "epoch": 41.78, + "learning_rate": 9.878877936009605e-05, + "loss": 0.0007, + "step": 9776 + }, + { + "epoch": 41.79, + "learning_rate": 9.878622837751997e-05, + "loss": 0.0005, + "step": 9780 + }, + { + "epoch": 41.81, + "learning_rate": 9.878367474443066e-05, + "loss": 0.0005, + "step": 9784 + }, + { + "epoch": 41.83, + "learning_rate": 9.87811184609669e-05, + "loss": 0.0012, + "step": 9788 + }, + { + "epoch": 41.85, + "learning_rate": 9.877855952726752e-05, + "loss": 0.0019, + "step": 9792 + }, + { + "epoch": 41.86, + "learning_rate": 9.877599794347158e-05, + "loss": 0.0015, + "step": 9796 + }, + { + "epoch": 41.88, + "learning_rate": 9.877343370971827e-05, + "loss": 0.0008, + "step": 9800 + }, + { + "epoch": 41.9, + "learning_rate": 9.877086682614686e-05, + "loss": 0.0003, + "step": 9804 + }, + { + "epoch": 41.91, + "learning_rate": 9.876829729289682e-05, + "loss": 0.0005, + "step": 9808 + }, + { + "epoch": 41.93, + "learning_rate": 9.876572511010775e-05, + "loss": 0.0009, + "step": 9812 + }, + { + "epoch": 41.95, + "learning_rate": 9.87631502779194e-05, + "loss": 0.002, + "step": 9816 + }, + { + "epoch": 41.97, + "learning_rate": 9.876057279647165e-05, + "loss": 0.001, + "step": 9820 + }, + { + "epoch": 41.98, + "learning_rate": 9.875799266590453e-05, + "loss": 0.0019, + "step": 9824 + }, + { + "epoch": 42.0, + "learning_rate": 9.875540988635824e-05, + "loss": 0.0003, + "step": 9828 + }, + { + "epoch": 42.02, + "learning_rate": 9.875282445797308e-05, + "loss": 0.0003, + "step": 9832 + }, + { + "epoch": 42.03, + "learning_rate": 9.875023638088951e-05, + "loss": 0.0013, + "step": 9836 + }, + { + "epoch": 42.05, + "learning_rate": 9.874764565524816e-05, + "loss": 0.0004, + "step": 9840 + }, + { + "epoch": 42.07, + "learning_rate": 9.874505228118976e-05, + "loss": 0.0006, + "step": 9844 + }, + { + "epoch": 42.09, + "learning_rate": 9.874245625885521e-05, + "loss": 0.0004, + "step": 9848 + }, + { + "epoch": 42.1, + "learning_rate": 9.873985758838555e-05, + "loss": 0.0003, + "step": 9852 + }, + { + "epoch": 42.12, + "learning_rate": 9.873725626992199e-05, + "loss": 0.0023, + "step": 9856 + }, + { + "epoch": 42.14, + "learning_rate": 9.873465230360582e-05, + "loss": 0.0017, + "step": 9860 + }, + { + "epoch": 42.15, + "learning_rate": 9.87320456895785e-05, + "loss": 0.0009, + "step": 9864 + }, + { + "epoch": 42.17, + "learning_rate": 9.87294364279817e-05, + "loss": 0.001, + "step": 9868 + }, + { + "epoch": 42.19, + "learning_rate": 9.872682451895714e-05, + "loss": 0.0005, + "step": 9872 + }, + { + "epoch": 42.21, + "learning_rate": 9.872420996264673e-05, + "loss": 0.0007, + "step": 9876 + }, + { + "epoch": 42.22, + "learning_rate": 9.872159275919253e-05, + "loss": 0.001, + "step": 9880 + }, + { + "epoch": 42.24, + "learning_rate": 9.87189729087367e-05, + "loss": 0.0009, + "step": 9884 + }, + { + "epoch": 42.26, + "learning_rate": 9.87163504114216e-05, + "loss": 0.0012, + "step": 9888 + }, + { + "epoch": 42.27, + "learning_rate": 9.871372526738971e-05, + "loss": 0.0007, + "step": 9892 + }, + { + "epoch": 42.29, + "learning_rate": 9.871109747678364e-05, + "loss": 0.0005, + "step": 9896 + }, + { + "epoch": 42.31, + "learning_rate": 9.870846703974614e-05, + "loss": 0.0009, + "step": 9900 + }, + { + "epoch": 42.32, + "learning_rate": 9.870583395642017e-05, + "loss": 0.0005, + "step": 9904 + }, + { + "epoch": 42.34, + "learning_rate": 9.870319822694874e-05, + "loss": 0.0007, + "step": 9908 + }, + { + "epoch": 42.36, + "learning_rate": 9.870055985147503e-05, + "loss": 0.0015, + "step": 9912 + }, + { + "epoch": 42.38, + "learning_rate": 9.869791883014243e-05, + "loss": 0.0015, + "step": 9916 + }, + { + "epoch": 42.39, + "learning_rate": 9.869527516309441e-05, + "loss": 0.0014, + "step": 9920 + }, + { + "epoch": 42.41, + "learning_rate": 9.869262885047459e-05, + "loss": 0.0015, + "step": 9924 + }, + { + "epoch": 42.43, + "learning_rate": 9.868997989242672e-05, + "loss": 0.0018, + "step": 9928 + }, + { + "epoch": 42.44, + "learning_rate": 9.868732828909476e-05, + "loss": 0.0018, + "step": 9932 + }, + { + "epoch": 42.46, + "learning_rate": 9.868467404062273e-05, + "loss": 0.0009, + "step": 9936 + }, + { + "epoch": 42.48, + "learning_rate": 9.868201714715489e-05, + "loss": 0.001, + "step": 9940 + }, + { + "epoch": 42.5, + "learning_rate": 9.867935760883551e-05, + "loss": 0.0009, + "step": 9944 + }, + { + "epoch": 42.51, + "learning_rate": 9.867669542580912e-05, + "loss": 0.0009, + "step": 9948 + }, + { + "epoch": 42.53, + "learning_rate": 9.867403059822035e-05, + "loss": 0.0008, + "step": 9952 + }, + { + "epoch": 42.55, + "learning_rate": 9.867136312621398e-05, + "loss": 0.0011, + "step": 9956 + }, + { + "epoch": 42.56, + "learning_rate": 9.866869300993496e-05, + "loss": 0.002, + "step": 9960 + }, + { + "epoch": 42.58, + "learning_rate": 9.866602024952828e-05, + "loss": 0.001, + "step": 9964 + }, + { + "epoch": 42.6, + "learning_rate": 9.866334484513923e-05, + "loss": 0.0008, + "step": 9968 + }, + { + "epoch": 42.62, + "learning_rate": 9.866066679691313e-05, + "loss": 0.0006, + "step": 9972 + }, + { + "epoch": 42.63, + "learning_rate": 9.865798610499546e-05, + "loss": 0.0012, + "step": 9976 + }, + { + "epoch": 42.65, + "learning_rate": 9.865530276953187e-05, + "loss": 0.0004, + "step": 9980 + }, + { + "epoch": 42.67, + "learning_rate": 9.865261679066814e-05, + "loss": 0.0033, + "step": 9984 + }, + { + "epoch": 42.68, + "learning_rate": 9.864992816855021e-05, + "loss": 0.0021, + "step": 9988 + }, + { + "epoch": 42.7, + "learning_rate": 9.864723690332415e-05, + "loss": 0.0006, + "step": 9992 + }, + { + "epoch": 42.72, + "learning_rate": 9.864454299513616e-05, + "loss": 0.002, + "step": 9996 + }, + { + "epoch": 42.74, + "learning_rate": 9.864184644413262e-05, + "loss": 0.0016, + "step": 10000 + }, + { + "epoch": 42.74, + "eval_exact_match": 0.5031185031185031, + "eval_loss": 0.7922365665435791, + "eval_runtime": 139.7876, + "eval_samples_per_second": 6.882, + "step": 10000 + }, + { + "epoch": 42.75, + "learning_rate": 9.863914725046e-05, + "loss": 0.0025, + "step": 10004 + }, + { + "epoch": 42.77, + "learning_rate": 9.863644541426496e-05, + "loss": 0.0007, + "step": 10008 + }, + { + "epoch": 42.79, + "learning_rate": 9.863374093569428e-05, + "loss": 0.0017, + "step": 10012 + }, + { + "epoch": 42.8, + "learning_rate": 9.863103381489493e-05, + "loss": 0.0003, + "step": 10016 + }, + { + "epoch": 42.82, + "learning_rate": 9.862832405201395e-05, + "loss": 0.0007, + "step": 10020 + }, + { + "epoch": 42.84, + "learning_rate": 9.862561164719855e-05, + "loss": 0.0023, + "step": 10024 + }, + { + "epoch": 42.85, + "learning_rate": 9.862289660059611e-05, + "loss": 0.0014, + "step": 10028 + }, + { + "epoch": 42.87, + "learning_rate": 9.862017891235416e-05, + "loss": 0.0014, + "step": 10032 + }, + { + "epoch": 42.89, + "learning_rate": 9.86174585826203e-05, + "loss": 0.0044, + "step": 10036 + }, + { + "epoch": 42.91, + "learning_rate": 9.861473561154236e-05, + "loss": 0.0004, + "step": 10040 + }, + { + "epoch": 42.92, + "learning_rate": 9.861200999926826e-05, + "loss": 0.0011, + "step": 10044 + }, + { + "epoch": 42.94, + "learning_rate": 9.860928174594608e-05, + "loss": 0.0016, + "step": 10048 + }, + { + "epoch": 42.96, + "learning_rate": 9.860655085172405e-05, + "loss": 0.0008, + "step": 10052 + }, + { + "epoch": 42.97, + "learning_rate": 9.860381731675055e-05, + "loss": 0.0011, + "step": 10056 + }, + { + "epoch": 42.99, + "learning_rate": 9.860108114117406e-05, + "loss": 0.0011, + "step": 10060 + }, + { + "epoch": 43.01, + "learning_rate": 9.859834232514324e-05, + "loss": 0.0004, + "step": 10064 + }, + { + "epoch": 43.03, + "learning_rate": 9.859560086880693e-05, + "loss": 0.0004, + "step": 10068 + }, + { + "epoch": 43.04, + "learning_rate": 9.859285677231402e-05, + "loss": 0.0013, + "step": 10072 + }, + { + "epoch": 43.06, + "learning_rate": 9.85901100358136e-05, + "loss": 0.0013, + "step": 10076 + }, + { + "epoch": 43.08, + "learning_rate": 9.858736065945493e-05, + "loss": 0.0012, + "step": 10080 + }, + { + "epoch": 43.09, + "learning_rate": 9.858460864338735e-05, + "loss": 0.0007, + "step": 10084 + }, + { + "epoch": 43.11, + "learning_rate": 9.858185398776039e-05, + "loss": 0.0031, + "step": 10088 + }, + { + "epoch": 43.13, + "learning_rate": 9.85790966927237e-05, + "loss": 0.0007, + "step": 10092 + }, + { + "epoch": 43.15, + "learning_rate": 9.85763367584271e-05, + "loss": 0.0012, + "step": 10096 + }, + { + "epoch": 43.16, + "learning_rate": 9.857357418502051e-05, + "loss": 0.0033, + "step": 10100 + }, + { + "epoch": 43.18, + "learning_rate": 9.857080897265403e-05, + "loss": 0.0031, + "step": 10104 + }, + { + "epoch": 43.2, + "learning_rate": 9.856804112147788e-05, + "loss": 0.001, + "step": 10108 + }, + { + "epoch": 43.21, + "learning_rate": 9.856527063164246e-05, + "loss": 0.0013, + "step": 10112 + }, + { + "epoch": 43.23, + "learning_rate": 9.856249750329827e-05, + "loss": 0.0018, + "step": 10116 + }, + { + "epoch": 43.25, + "learning_rate": 9.855972173659597e-05, + "loss": 0.0009, + "step": 10120 + }, + { + "epoch": 43.26, + "learning_rate": 9.855694333168638e-05, + "loss": 0.0004, + "step": 10124 + }, + { + "epoch": 43.28, + "learning_rate": 9.855416228872043e-05, + "loss": 0.0004, + "step": 10128 + }, + { + "epoch": 43.3, + "learning_rate": 9.855137860784923e-05, + "loss": 0.0002, + "step": 10132 + }, + { + "epoch": 43.32, + "learning_rate": 9.854859228922398e-05, + "loss": 0.0004, + "step": 10136 + }, + { + "epoch": 43.33, + "learning_rate": 9.85458033329961e-05, + "loss": 0.0013, + "step": 10140 + }, + { + "epoch": 43.35, + "learning_rate": 9.85430117393171e-05, + "loss": 0.0029, + "step": 10144 + }, + { + "epoch": 43.37, + "learning_rate": 9.854021750833864e-05, + "loss": 0.0011, + "step": 10148 + }, + { + "epoch": 43.38, + "learning_rate": 9.853742064021252e-05, + "loss": 0.0003, + "step": 10152 + }, + { + "epoch": 43.4, + "learning_rate": 9.85346211350907e-05, + "loss": 0.0012, + "step": 10156 + }, + { + "epoch": 43.42, + "learning_rate": 9.853181899312527e-05, + "loss": 0.0012, + "step": 10160 + }, + { + "epoch": 43.44, + "learning_rate": 9.852901421446849e-05, + "loss": 0.0008, + "step": 10164 + }, + { + "epoch": 43.45, + "learning_rate": 9.85262067992727e-05, + "loss": 0.0026, + "step": 10168 + }, + { + "epoch": 43.47, + "learning_rate": 9.852339674769047e-05, + "loss": 0.0006, + "step": 10172 + }, + { + "epoch": 43.49, + "learning_rate": 9.852058405987444e-05, + "loss": 0.0016, + "step": 10176 + }, + { + "epoch": 43.5, + "learning_rate": 9.85177687359774e-05, + "loss": 0.0003, + "step": 10180 + }, + { + "epoch": 43.52, + "learning_rate": 9.851495077615236e-05, + "loss": 0.0004, + "step": 10184 + }, + { + "epoch": 43.54, + "learning_rate": 9.851213018055237e-05, + "loss": 0.0014, + "step": 10188 + }, + { + "epoch": 43.56, + "learning_rate": 9.850930694933069e-05, + "loss": 0.0008, + "step": 10192 + }, + { + "epoch": 43.57, + "learning_rate": 9.85064810826407e-05, + "loss": 0.0003, + "step": 10196 + }, + { + "epoch": 43.59, + "learning_rate": 9.850365258063595e-05, + "loss": 0.0015, + "step": 10200 + }, + { + "epoch": 43.61, + "learning_rate": 9.850082144347007e-05, + "loss": 0.0016, + "step": 10204 + }, + { + "epoch": 43.62, + "learning_rate": 9.84979876712969e-05, + "loss": 0.0032, + "step": 10208 + }, + { + "epoch": 43.64, + "learning_rate": 9.849515126427038e-05, + "loss": 0.0004, + "step": 10212 + }, + { + "epoch": 43.66, + "learning_rate": 9.849231222254463e-05, + "loss": 0.0004, + "step": 10216 + }, + { + "epoch": 43.68, + "learning_rate": 9.848947054627386e-05, + "loss": 0.0007, + "step": 10220 + }, + { + "epoch": 43.69, + "learning_rate": 9.848662623561248e-05, + "loss": 0.0019, + "step": 10224 + }, + { + "epoch": 43.71, + "learning_rate": 9.848377929071503e-05, + "loss": 0.0004, + "step": 10228 + }, + { + "epoch": 43.73, + "learning_rate": 9.848092971173615e-05, + "loss": 0.0017, + "step": 10232 + }, + { + "epoch": 43.74, + "learning_rate": 9.847807749883069e-05, + "loss": 0.0007, + "step": 10236 + }, + { + "epoch": 43.76, + "learning_rate": 9.847522265215357e-05, + "loss": 0.0009, + "step": 10240 + }, + { + "epoch": 43.78, + "learning_rate": 9.847236517185994e-05, + "loss": 0.002, + "step": 10244 + }, + { + "epoch": 43.79, + "learning_rate": 9.846950505810499e-05, + "loss": 0.0019, + "step": 10248 + }, + { + "epoch": 43.81, + "learning_rate": 9.846664231104414e-05, + "loss": 0.0013, + "step": 10252 + }, + { + "epoch": 43.83, + "learning_rate": 9.846377693083292e-05, + "loss": 0.0004, + "step": 10256 + }, + { + "epoch": 43.85, + "learning_rate": 9.8460908917627e-05, + "loss": 0.0003, + "step": 10260 + }, + { + "epoch": 43.86, + "learning_rate": 9.845803827158218e-05, + "loss": 0.0014, + "step": 10264 + }, + { + "epoch": 43.88, + "learning_rate": 9.845516499285444e-05, + "loss": 0.0009, + "step": 10268 + }, + { + "epoch": 43.9, + "learning_rate": 9.845228908159988e-05, + "loss": 0.0004, + "step": 10272 + }, + { + "epoch": 43.91, + "learning_rate": 9.844941053797474e-05, + "loss": 0.0007, + "step": 10276 + }, + { + "epoch": 43.93, + "learning_rate": 9.844652936213541e-05, + "loss": 0.0017, + "step": 10280 + }, + { + "epoch": 43.95, + "learning_rate": 9.844364555423844e-05, + "loss": 0.0019, + "step": 10284 + }, + { + "epoch": 43.97, + "learning_rate": 9.844075911444046e-05, + "loss": 0.0004, + "step": 10288 + }, + { + "epoch": 43.98, + "learning_rate": 9.843787004289833e-05, + "loss": 0.0006, + "step": 10292 + }, + { + "epoch": 44.0, + "learning_rate": 9.8434978339769e-05, + "loss": 0.0007, + "step": 10296 + }, + { + "epoch": 44.02, + "learning_rate": 9.843208400520956e-05, + "loss": 0.0002, + "step": 10300 + }, + { + "epoch": 44.03, + "learning_rate": 9.842918703937727e-05, + "loss": 0.0005, + "step": 10304 + }, + { + "epoch": 44.05, + "learning_rate": 9.842628744242953e-05, + "loss": 0.0021, + "step": 10308 + }, + { + "epoch": 44.07, + "learning_rate": 9.842338521452384e-05, + "loss": 0.0002, + "step": 10312 + }, + { + "epoch": 44.09, + "learning_rate": 9.84204803558179e-05, + "loss": 0.0006, + "step": 10316 + }, + { + "epoch": 44.1, + "learning_rate": 9.841757286646953e-05, + "loss": 0.0013, + "step": 10320 + }, + { + "epoch": 44.12, + "learning_rate": 9.841466274663669e-05, + "loss": 0.0006, + "step": 10324 + }, + { + "epoch": 44.14, + "learning_rate": 9.841174999647747e-05, + "loss": 0.0006, + "step": 10328 + }, + { + "epoch": 44.15, + "learning_rate": 9.840883461615011e-05, + "loss": 0.0013, + "step": 10332 + }, + { + "epoch": 44.17, + "learning_rate": 9.840591660581304e-05, + "loss": 0.0004, + "step": 10336 + }, + { + "epoch": 44.19, + "learning_rate": 9.840299596562475e-05, + "loss": 0.0012, + "step": 10340 + }, + { + "epoch": 44.21, + "learning_rate": 9.840007269574394e-05, + "loss": 0.0007, + "step": 10344 + }, + { + "epoch": 44.22, + "learning_rate": 9.839714679632943e-05, + "loss": 0.0012, + "step": 10348 + }, + { + "epoch": 44.24, + "learning_rate": 9.839421826754016e-05, + "loss": 0.0021, + "step": 10352 + }, + { + "epoch": 44.26, + "learning_rate": 9.839128710953527e-05, + "loss": 0.0015, + "step": 10356 + }, + { + "epoch": 44.27, + "learning_rate": 9.838835332247397e-05, + "loss": 0.0038, + "step": 10360 + }, + { + "epoch": 44.29, + "learning_rate": 9.838541690651569e-05, + "loss": 0.0006, + "step": 10364 + }, + { + "epoch": 44.31, + "learning_rate": 9.83824778618199e-05, + "loss": 0.0007, + "step": 10368 + }, + { + "epoch": 44.32, + "learning_rate": 9.837953618854634e-05, + "loss": 0.0015, + "step": 10372 + }, + { + "epoch": 44.34, + "learning_rate": 9.83765918868548e-05, + "loss": 0.0002, + "step": 10376 + }, + { + "epoch": 44.36, + "learning_rate": 9.837364495690525e-05, + "loss": 0.0015, + "step": 10380 + }, + { + "epoch": 44.38, + "learning_rate": 9.83706953988578e-05, + "loss": 0.0015, + "step": 10384 + }, + { + "epoch": 44.39, + "learning_rate": 9.836774321287265e-05, + "loss": 0.0022, + "step": 10388 + }, + { + "epoch": 44.41, + "learning_rate": 9.836478839911025e-05, + "loss": 0.0007, + "step": 10392 + }, + { + "epoch": 44.43, + "learning_rate": 9.83618309577311e-05, + "loss": 0.0006, + "step": 10396 + }, + { + "epoch": 44.44, + "learning_rate": 9.83588708888959e-05, + "loss": 0.0009, + "step": 10400 + }, + { + "epoch": 44.46, + "learning_rate": 9.835590819276543e-05, + "loss": 0.0023, + "step": 10404 + }, + { + "epoch": 44.48, + "learning_rate": 9.835294286950069e-05, + "loss": 0.0028, + "step": 10408 + }, + { + "epoch": 44.5, + "learning_rate": 9.834997491926275e-05, + "loss": 0.0035, + "step": 10412 + }, + { + "epoch": 44.51, + "learning_rate": 9.83470043422129e-05, + "loss": 0.0011, + "step": 10416 + }, + { + "epoch": 44.53, + "learning_rate": 9.834403113851247e-05, + "loss": 0.0004, + "step": 10420 + }, + { + "epoch": 44.55, + "learning_rate": 9.834105530832302e-05, + "loss": 0.0017, + "step": 10424 + }, + { + "epoch": 44.56, + "learning_rate": 9.833807685180624e-05, + "loss": 0.0011, + "step": 10428 + }, + { + "epoch": 44.58, + "learning_rate": 9.833509576912394e-05, + "loss": 0.0005, + "step": 10432 + }, + { + "epoch": 44.6, + "learning_rate": 9.833211206043807e-05, + "loss": 0.0012, + "step": 10436 + }, + { + "epoch": 44.62, + "learning_rate": 9.832912572591072e-05, + "loss": 0.003, + "step": 10440 + }, + { + "epoch": 44.63, + "learning_rate": 9.832613676570417e-05, + "loss": 0.001, + "step": 10444 + }, + { + "epoch": 44.65, + "learning_rate": 9.832314517998076e-05, + "loss": 0.0005, + "step": 10448 + }, + { + "epoch": 44.67, + "learning_rate": 9.832015096890308e-05, + "loss": 0.0006, + "step": 10452 + }, + { + "epoch": 44.68, + "learning_rate": 9.831715413263375e-05, + "loss": 0.0025, + "step": 10456 + }, + { + "epoch": 44.7, + "learning_rate": 9.831415467133562e-05, + "loss": 0.0005, + "step": 10460 + }, + { + "epoch": 44.72, + "learning_rate": 9.831115258517163e-05, + "loss": 0.0008, + "step": 10464 + }, + { + "epoch": 44.74, + "learning_rate": 9.830814787430486e-05, + "loss": 0.0003, + "step": 10468 + }, + { + "epoch": 44.75, + "learning_rate": 9.830514053889861e-05, + "loss": 0.0008, + "step": 10472 + }, + { + "epoch": 44.77, + "learning_rate": 9.830213057911624e-05, + "loss": 0.0035, + "step": 10476 + }, + { + "epoch": 44.79, + "learning_rate": 9.829911799512124e-05, + "loss": 0.0015, + "step": 10480 + }, + { + "epoch": 44.8, + "learning_rate": 9.829610278707735e-05, + "loss": 0.0002, + "step": 10484 + }, + { + "epoch": 44.82, + "learning_rate": 9.829308495514832e-05, + "loss": 0.0005, + "step": 10488 + }, + { + "epoch": 44.84, + "learning_rate": 9.829006449949813e-05, + "loss": 0.0015, + "step": 10492 + }, + { + "epoch": 44.85, + "learning_rate": 9.82870414202909e-05, + "loss": 0.0007, + "step": 10496 + }, + { + "epoch": 44.87, + "learning_rate": 9.828401571769086e-05, + "loss": 0.0004, + "step": 10500 + }, + { + "epoch": 44.89, + "learning_rate": 9.828098739186238e-05, + "loss": 0.0006, + "step": 10504 + }, + { + "epoch": 44.91, + "learning_rate": 9.827795644296998e-05, + "loss": 0.001, + "step": 10508 + }, + { + "epoch": 44.92, + "learning_rate": 9.827492287117836e-05, + "loss": 0.0011, + "step": 10512 + }, + { + "epoch": 44.94, + "learning_rate": 9.82718866766523e-05, + "loss": 0.0008, + "step": 10516 + }, + { + "epoch": 44.96, + "learning_rate": 9.826884785955679e-05, + "loss": 0.003, + "step": 10520 + }, + { + "epoch": 44.97, + "learning_rate": 9.826580642005688e-05, + "loss": 0.0006, + "step": 10524 + }, + { + "epoch": 44.99, + "learning_rate": 9.826276235831785e-05, + "loss": 0.0011, + "step": 10528 + }, + { + "epoch": 45.01, + "learning_rate": 9.825971567450505e-05, + "loss": 0.0002, + "step": 10532 + }, + { + "epoch": 45.03, + "learning_rate": 9.825666636878403e-05, + "loss": 0.0008, + "step": 10536 + }, + { + "epoch": 45.04, + "learning_rate": 9.825361444132044e-05, + "loss": 0.0004, + "step": 10540 + }, + { + "epoch": 45.06, + "learning_rate": 9.82505598922801e-05, + "loss": 0.0006, + "step": 10544 + }, + { + "epoch": 45.08, + "learning_rate": 9.824750272182894e-05, + "loss": 0.0005, + "step": 10548 + }, + { + "epoch": 45.09, + "learning_rate": 9.824444293013306e-05, + "loss": 0.0002, + "step": 10552 + }, + { + "epoch": 45.11, + "learning_rate": 9.824138051735872e-05, + "loss": 0.0014, + "step": 10556 + }, + { + "epoch": 45.13, + "learning_rate": 9.823831548367226e-05, + "loss": 0.0008, + "step": 10560 + }, + { + "epoch": 45.15, + "learning_rate": 9.823524782924024e-05, + "loss": 0.0008, + "step": 10564 + }, + { + "epoch": 45.16, + "learning_rate": 9.82321775542293e-05, + "loss": 0.0003, + "step": 10568 + }, + { + "epoch": 45.18, + "learning_rate": 9.822910465880625e-05, + "loss": 0.0006, + "step": 10572 + }, + { + "epoch": 45.2, + "learning_rate": 9.822602914313803e-05, + "loss": 0.0015, + "step": 10576 + }, + { + "epoch": 45.21, + "learning_rate": 9.822295100739176e-05, + "loss": 0.0048, + "step": 10580 + }, + { + "epoch": 45.23, + "learning_rate": 9.821987025173462e-05, + "loss": 0.0041, + "step": 10584 + }, + { + "epoch": 45.25, + "learning_rate": 9.821678687633404e-05, + "loss": 0.002, + "step": 10588 + }, + { + "epoch": 45.26, + "learning_rate": 9.821370088135748e-05, + "loss": 0.0005, + "step": 10592 + }, + { + "epoch": 45.28, + "learning_rate": 9.821061226697266e-05, + "loss": 0.0007, + "step": 10596 + }, + { + "epoch": 45.3, + "learning_rate": 9.820752103334734e-05, + "loss": 0.0008, + "step": 10600 + }, + { + "epoch": 45.32, + "learning_rate": 9.820442718064948e-05, + "loss": 0.0018, + "step": 10604 + }, + { + "epoch": 45.33, + "learning_rate": 9.820133070904717e-05, + "loss": 0.001, + "step": 10608 + }, + { + "epoch": 45.35, + "learning_rate": 9.819823161870863e-05, + "loss": 0.0002, + "step": 10612 + }, + { + "epoch": 45.37, + "learning_rate": 9.819512990980223e-05, + "loss": 0.0004, + "step": 10616 + }, + { + "epoch": 45.38, + "learning_rate": 9.819202558249649e-05, + "loss": 0.0015, + "step": 10620 + }, + { + "epoch": 45.4, + "learning_rate": 9.818891863696004e-05, + "loss": 0.0005, + "step": 10624 + }, + { + "epoch": 45.42, + "learning_rate": 9.818580907336173e-05, + "loss": 0.0009, + "step": 10628 + }, + { + "epoch": 45.44, + "learning_rate": 9.818269689187045e-05, + "loss": 0.0006, + "step": 10632 + }, + { + "epoch": 45.45, + "learning_rate": 9.817958209265531e-05, + "loss": 0.0014, + "step": 10636 + }, + { + "epoch": 45.47, + "learning_rate": 9.817646467588551e-05, + "loss": 0.0006, + "step": 10640 + }, + { + "epoch": 45.49, + "learning_rate": 9.817334464173045e-05, + "loss": 0.0011, + "step": 10644 + }, + { + "epoch": 45.5, + "learning_rate": 9.817022199035961e-05, + "loss": 0.0003, + "step": 10648 + }, + { + "epoch": 45.52, + "learning_rate": 9.816709672194265e-05, + "loss": 0.0007, + "step": 10652 + }, + { + "epoch": 45.54, + "learning_rate": 9.816396883664938e-05, + "loss": 0.0002, + "step": 10656 + }, + { + "epoch": 45.56, + "learning_rate": 9.81608383346497e-05, + "loss": 0.0004, + "step": 10660 + }, + { + "epoch": 45.57, + "learning_rate": 9.815770521611371e-05, + "loss": 0.0011, + "step": 10664 + }, + { + "epoch": 45.59, + "learning_rate": 9.815456948121164e-05, + "loss": 0.0026, + "step": 10668 + }, + { + "epoch": 45.61, + "learning_rate": 9.815143113011383e-05, + "loss": 0.0032, + "step": 10672 + }, + { + "epoch": 45.62, + "learning_rate": 9.81482901629908e-05, + "loss": 0.0008, + "step": 10676 + }, + { + "epoch": 45.64, + "learning_rate": 9.814514658001317e-05, + "loss": 0.0014, + "step": 10680 + }, + { + "epoch": 45.66, + "learning_rate": 9.814200038135177e-05, + "loss": 0.0004, + "step": 10684 + }, + { + "epoch": 45.68, + "learning_rate": 9.813885156717749e-05, + "loss": 0.0008, + "step": 10688 + }, + { + "epoch": 45.69, + "learning_rate": 9.813570013766142e-05, + "loss": 0.0032, + "step": 10692 + }, + { + "epoch": 45.71, + "learning_rate": 9.813254609297477e-05, + "loss": 0.0004, + "step": 10696 + }, + { + "epoch": 45.73, + "learning_rate": 9.812938943328892e-05, + "loss": 0.0017, + "step": 10700 + }, + { + "epoch": 45.74, + "learning_rate": 9.812623015877533e-05, + "loss": 0.0024, + "step": 10704 + }, + { + "epoch": 45.76, + "learning_rate": 9.812306826960566e-05, + "loss": 0.0007, + "step": 10708 + }, + { + "epoch": 45.78, + "learning_rate": 9.81199037659517e-05, + "loss": 0.0013, + "step": 10712 + }, + { + "epoch": 45.79, + "learning_rate": 9.811673664798538e-05, + "loss": 0.0008, + "step": 10716 + }, + { + "epoch": 45.81, + "learning_rate": 9.811356691587874e-05, + "loss": 0.0003, + "step": 10720 + }, + { + "epoch": 45.83, + "learning_rate": 9.8110394569804e-05, + "loss": 0.0026, + "step": 10724 + }, + { + "epoch": 45.85, + "learning_rate": 9.81072196099335e-05, + "loss": 0.0004, + "step": 10728 + }, + { + "epoch": 45.86, + "learning_rate": 9.810404203643977e-05, + "loss": 0.0007, + "step": 10732 + }, + { + "epoch": 45.88, + "learning_rate": 9.81008618494954e-05, + "loss": 0.0005, + "step": 10736 + }, + { + "epoch": 45.9, + "learning_rate": 9.809767904927321e-05, + "loss": 0.0029, + "step": 10740 + }, + { + "epoch": 45.91, + "learning_rate": 9.809449363594607e-05, + "loss": 0.0022, + "step": 10744 + }, + { + "epoch": 45.93, + "learning_rate": 9.809130560968709e-05, + "loss": 0.002, + "step": 10748 + }, + { + "epoch": 45.95, + "learning_rate": 9.808811497066943e-05, + "loss": 0.0028, + "step": 10752 + }, + { + "epoch": 45.97, + "learning_rate": 9.808492171906647e-05, + "loss": 0.0023, + "step": 10756 + }, + { + "epoch": 45.98, + "learning_rate": 9.808172585505167e-05, + "loss": 0.0015, + "step": 10760 + }, + { + "epoch": 46.0, + "learning_rate": 9.807852737879868e-05, + "loss": 0.0014, + "step": 10764 + }, + { + "epoch": 46.02, + "learning_rate": 9.807532629048125e-05, + "loss": 0.003, + "step": 10768 + }, + { + "epoch": 46.03, + "learning_rate": 9.807212259027329e-05, + "loss": 0.0003, + "step": 10772 + }, + { + "epoch": 46.05, + "learning_rate": 9.806891627834889e-05, + "loss": 0.002, + "step": 10776 + }, + { + "epoch": 46.07, + "learning_rate": 9.80657073548822e-05, + "loss": 0.001, + "step": 10780 + }, + { + "epoch": 46.09, + "learning_rate": 9.80624958200476e-05, + "loss": 0.0014, + "step": 10784 + }, + { + "epoch": 46.1, + "learning_rate": 9.805928167401955e-05, + "loss": 0.001, + "step": 10788 + }, + { + "epoch": 46.12, + "learning_rate": 9.805606491697267e-05, + "loss": 0.003, + "step": 10792 + }, + { + "epoch": 46.14, + "learning_rate": 9.805284554908172e-05, + "loss": 0.0012, + "step": 10796 + }, + { + "epoch": 46.15, + "learning_rate": 9.80496235705216e-05, + "loss": 0.0004, + "step": 10800 + }, + { + "epoch": 46.17, + "learning_rate": 9.804639898146739e-05, + "loss": 0.0006, + "step": 10804 + }, + { + "epoch": 46.19, + "learning_rate": 9.804317178209426e-05, + "loss": 0.0007, + "step": 10808 + }, + { + "epoch": 46.21, + "learning_rate": 9.803994197257751e-05, + "loss": 0.0003, + "step": 10812 + }, + { + "epoch": 46.22, + "learning_rate": 9.803670955309265e-05, + "loss": 0.0002, + "step": 10816 + }, + { + "epoch": 46.24, + "learning_rate": 9.80334745238153e-05, + "loss": 0.0038, + "step": 10820 + }, + { + "epoch": 46.26, + "learning_rate": 9.80302368849212e-05, + "loss": 0.0003, + "step": 10824 + }, + { + "epoch": 46.27, + "learning_rate": 9.802699663658625e-05, + "loss": 0.0006, + "step": 10828 + }, + { + "epoch": 46.29, + "learning_rate": 9.802375377898649e-05, + "loss": 0.0004, + "step": 10832 + }, + { + "epoch": 46.31, + "learning_rate": 9.802050831229808e-05, + "loss": 0.0003, + "step": 10836 + }, + { + "epoch": 46.32, + "learning_rate": 9.801726023669739e-05, + "loss": 0.0002, + "step": 10840 + }, + { + "epoch": 46.34, + "learning_rate": 9.801400955236084e-05, + "loss": 0.0009, + "step": 10844 + }, + { + "epoch": 46.36, + "learning_rate": 9.801075625946508e-05, + "loss": 0.0012, + "step": 10848 + }, + { + "epoch": 46.38, + "learning_rate": 9.800750035818682e-05, + "loss": 0.0005, + "step": 10852 + }, + { + "epoch": 46.39, + "learning_rate": 9.800424184870298e-05, + "loss": 0.0019, + "step": 10856 + }, + { + "epoch": 46.41, + "learning_rate": 9.800098073119055e-05, + "loss": 0.0015, + "step": 10860 + }, + { + "epoch": 46.43, + "learning_rate": 9.799771700582676e-05, + "loss": 0.0003, + "step": 10864 + }, + { + "epoch": 46.44, + "learning_rate": 9.799445067278888e-05, + "loss": 0.0005, + "step": 10868 + }, + { + "epoch": 46.46, + "learning_rate": 9.799118173225438e-05, + "loss": 0.0009, + "step": 10872 + }, + { + "epoch": 46.48, + "learning_rate": 9.798791018440088e-05, + "loss": 0.0005, + "step": 10876 + }, + { + "epoch": 46.5, + "learning_rate": 9.798463602940608e-05, + "loss": 0.0007, + "step": 10880 + }, + { + "epoch": 46.51, + "learning_rate": 9.798135926744791e-05, + "loss": 0.0009, + "step": 10884 + }, + { + "epoch": 46.53, + "learning_rate": 9.797807989870435e-05, + "loss": 0.0043, + "step": 10888 + }, + { + "epoch": 46.55, + "learning_rate": 9.79747979233536e-05, + "loss": 0.0027, + "step": 10892 + }, + { + "epoch": 46.56, + "learning_rate": 9.797151334157393e-05, + "loss": 0.0012, + "step": 10896 + }, + { + "epoch": 46.58, + "learning_rate": 9.796822615354383e-05, + "loss": 0.0004, + "step": 10900 + }, + { + "epoch": 46.6, + "learning_rate": 9.796493635944186e-05, + "loss": 0.0007, + "step": 10904 + }, + { + "epoch": 46.62, + "learning_rate": 9.796164395944675e-05, + "loss": 0.0009, + "step": 10908 + }, + { + "epoch": 46.63, + "learning_rate": 9.795834895373739e-05, + "loss": 0.0008, + "step": 10912 + }, + { + "epoch": 46.65, + "learning_rate": 9.79550513424928e-05, + "loss": 0.0002, + "step": 10916 + }, + { + "epoch": 46.67, + "learning_rate": 9.795175112589213e-05, + "loss": 0.0009, + "step": 10920 + }, + { + "epoch": 46.68, + "learning_rate": 9.794844830411465e-05, + "loss": 0.0002, + "step": 10924 + }, + { + "epoch": 46.7, + "learning_rate": 9.794514287733983e-05, + "loss": 0.0005, + "step": 10928 + }, + { + "epoch": 46.72, + "learning_rate": 9.794183484574724e-05, + "loss": 0.001, + "step": 10932 + }, + { + "epoch": 46.74, + "learning_rate": 9.793852420951661e-05, + "loss": 0.001, + "step": 10936 + }, + { + "epoch": 46.75, + "learning_rate": 9.79352109688278e-05, + "loss": 0.0018, + "step": 10940 + }, + { + "epoch": 46.77, + "learning_rate": 9.793189512386081e-05, + "loss": 0.0014, + "step": 10944 + }, + { + "epoch": 46.79, + "learning_rate": 9.792857667479581e-05, + "loss": 0.0002, + "step": 10948 + }, + { + "epoch": 46.8, + "learning_rate": 9.792525562181305e-05, + "loss": 0.0008, + "step": 10952 + }, + { + "epoch": 46.82, + "learning_rate": 9.792193196509299e-05, + "loss": 0.0017, + "step": 10956 + }, + { + "epoch": 46.84, + "learning_rate": 9.791860570481618e-05, + "loss": 0.0006, + "step": 10960 + }, + { + "epoch": 46.85, + "learning_rate": 9.791527684116336e-05, + "loss": 0.0017, + "step": 10964 + }, + { + "epoch": 46.87, + "learning_rate": 9.791194537431536e-05, + "loss": 0.0005, + "step": 10968 + }, + { + "epoch": 46.89, + "learning_rate": 9.79086113044532e-05, + "loss": 0.0002, + "step": 10972 + }, + { + "epoch": 46.91, + "learning_rate": 9.790527463175799e-05, + "loss": 0.0005, + "step": 10976 + }, + { + "epoch": 46.92, + "learning_rate": 9.790193535641101e-05, + "loss": 0.0008, + "step": 10980 + }, + { + "epoch": 46.94, + "learning_rate": 9.789859347859369e-05, + "loss": 0.0004, + "step": 10984 + }, + { + "epoch": 46.96, + "learning_rate": 9.789524899848761e-05, + "loss": 0.0016, + "step": 10988 + }, + { + "epoch": 46.97, + "learning_rate": 9.789190191627444e-05, + "loss": 0.0003, + "step": 10992 + }, + { + "epoch": 46.99, + "learning_rate": 9.788855223213605e-05, + "loss": 0.002, + "step": 10996 + }, + { + "epoch": 47.01, + "learning_rate": 9.788519994625441e-05, + "loss": 0.0004, + "step": 11000 + }, + { + "epoch": 47.01, + "eval_exact_match": 0.5207900207900208, + "eval_loss": 0.7863634824752808, + "eval_runtime": 130.5406, + "eval_samples_per_second": 7.369, + "step": 11000 + }, + { + "epoch": 47.03, + "learning_rate": 9.788184505881164e-05, + "loss": 0.0013, + "step": 11004 + }, + { + "epoch": 47.04, + "learning_rate": 9.787848756999002e-05, + "loss": 0.0003, + "step": 11008 + }, + { + "epoch": 47.06, + "learning_rate": 9.787512747997197e-05, + "loss": 0.0006, + "step": 11012 + }, + { + "epoch": 47.08, + "learning_rate": 9.787176478894002e-05, + "loss": 0.0008, + "step": 11016 + }, + { + "epoch": 47.09, + "learning_rate": 9.786839949707686e-05, + "loss": 0.0003, + "step": 11020 + }, + { + "epoch": 47.11, + "learning_rate": 9.786503160456536e-05, + "loss": 0.0004, + "step": 11024 + }, + { + "epoch": 47.13, + "learning_rate": 9.786166111158845e-05, + "loss": 0.0004, + "step": 11028 + }, + { + "epoch": 47.15, + "learning_rate": 9.785828801832928e-05, + "loss": 0.0006, + "step": 11032 + }, + { + "epoch": 47.16, + "learning_rate": 9.785491232497108e-05, + "loss": 0.0005, + "step": 11036 + }, + { + "epoch": 47.18, + "learning_rate": 9.785153403169726e-05, + "loss": 0.0003, + "step": 11040 + }, + { + "epoch": 47.2, + "learning_rate": 9.784815313869136e-05, + "loss": 0.0014, + "step": 11044 + }, + { + "epoch": 47.21, + "learning_rate": 9.784476964613707e-05, + "loss": 0.0006, + "step": 11048 + }, + { + "epoch": 47.23, + "learning_rate": 9.784138355421819e-05, + "loss": 0.0005, + "step": 11052 + }, + { + "epoch": 47.25, + "learning_rate": 9.78379948631187e-05, + "loss": 0.001, + "step": 11056 + }, + { + "epoch": 47.26, + "learning_rate": 9.783460357302271e-05, + "loss": 0.0006, + "step": 11060 + }, + { + "epoch": 47.28, + "learning_rate": 9.783120968411445e-05, + "loss": 0.0011, + "step": 11064 + }, + { + "epoch": 47.3, + "learning_rate": 9.782781319657832e-05, + "loss": 0.0002, + "step": 11068 + }, + { + "epoch": 47.32, + "learning_rate": 9.782441411059884e-05, + "loss": 0.0006, + "step": 11072 + }, + { + "epoch": 47.33, + "learning_rate": 9.782101242636068e-05, + "loss": 0.001, + "step": 11076 + }, + { + "epoch": 47.35, + "learning_rate": 9.781760814404864e-05, + "loss": 0.0009, + "step": 11080 + }, + { + "epoch": 47.37, + "learning_rate": 9.78142012638477e-05, + "loss": 0.0017, + "step": 11084 + }, + { + "epoch": 47.38, + "learning_rate": 9.781079178594294e-05, + "loss": 0.0029, + "step": 11088 + }, + { + "epoch": 47.4, + "learning_rate": 9.780737971051957e-05, + "loss": 0.0012, + "step": 11092 + }, + { + "epoch": 47.42, + "learning_rate": 9.780396503776301e-05, + "loss": 0.0015, + "step": 11096 + }, + { + "epoch": 47.44, + "learning_rate": 9.780054776785875e-05, + "loss": 0.0013, + "step": 11100 + }, + { + "epoch": 47.45, + "learning_rate": 9.779712790099242e-05, + "loss": 0.0045, + "step": 11104 + }, + { + "epoch": 47.47, + "learning_rate": 9.779370543734987e-05, + "loss": 0.0019, + "step": 11108 + }, + { + "epoch": 47.49, + "learning_rate": 9.779028037711701e-05, + "loss": 0.0006, + "step": 11112 + }, + { + "epoch": 47.5, + "learning_rate": 9.778685272047994e-05, + "loss": 0.0004, + "step": 11116 + }, + { + "epoch": 47.52, + "learning_rate": 9.778342246762486e-05, + "loss": 0.0006, + "step": 11120 + }, + { + "epoch": 47.54, + "learning_rate": 9.777998961873815e-05, + "loss": 0.0004, + "step": 11124 + }, + { + "epoch": 47.56, + "learning_rate": 9.77765541740063e-05, + "loss": 0.0003, + "step": 11128 + }, + { + "epoch": 47.57, + "learning_rate": 9.777311613361596e-05, + "loss": 0.0017, + "step": 11132 + }, + { + "epoch": 47.59, + "learning_rate": 9.776967549775392e-05, + "loss": 0.0008, + "step": 11136 + }, + { + "epoch": 47.61, + "learning_rate": 9.77662322666071e-05, + "loss": 0.0004, + "step": 11140 + }, + { + "epoch": 47.62, + "learning_rate": 9.776278644036257e-05, + "loss": 0.0006, + "step": 11144 + }, + { + "epoch": 47.64, + "learning_rate": 9.775933801920754e-05, + "loss": 0.0032, + "step": 11148 + }, + { + "epoch": 47.66, + "learning_rate": 9.775588700332935e-05, + "loss": 0.0021, + "step": 11152 + }, + { + "epoch": 47.68, + "learning_rate": 9.77524333929155e-05, + "loss": 0.0006, + "step": 11156 + }, + { + "epoch": 47.69, + "learning_rate": 9.774897718815364e-05, + "loss": 0.0006, + "step": 11160 + }, + { + "epoch": 47.71, + "learning_rate": 9.77455183892315e-05, + "loss": 0.0006, + "step": 11164 + }, + { + "epoch": 47.73, + "learning_rate": 9.774205699633704e-05, + "loss": 0.0004, + "step": 11168 + }, + { + "epoch": 47.74, + "learning_rate": 9.773859300965828e-05, + "loss": 0.0005, + "step": 11172 + }, + { + "epoch": 47.76, + "learning_rate": 9.773512642938343e-05, + "loss": 0.0003, + "step": 11176 + }, + { + "epoch": 47.78, + "learning_rate": 9.773165725570081e-05, + "loss": 0.0007, + "step": 11180 + }, + { + "epoch": 47.79, + "learning_rate": 9.772818548879893e-05, + "loss": 0.0021, + "step": 11184 + }, + { + "epoch": 47.81, + "learning_rate": 9.772471112886639e-05, + "loss": 0.0003, + "step": 11188 + }, + { + "epoch": 47.83, + "learning_rate": 9.772123417609194e-05, + "loss": 0.0006, + "step": 11192 + }, + { + "epoch": 47.85, + "learning_rate": 9.771775463066449e-05, + "loss": 0.0006, + "step": 11196 + }, + { + "epoch": 47.86, + "learning_rate": 9.771427249277306e-05, + "loss": 0.0028, + "step": 11200 + }, + { + "epoch": 47.88, + "learning_rate": 9.771078776260687e-05, + "loss": 0.0002, + "step": 11204 + }, + { + "epoch": 47.9, + "learning_rate": 9.77073004403552e-05, + "loss": 0.0003, + "step": 11208 + }, + { + "epoch": 47.91, + "learning_rate": 9.770381052620756e-05, + "loss": 0.0033, + "step": 11212 + }, + { + "epoch": 47.93, + "learning_rate": 9.770031802035353e-05, + "loss": 0.0002, + "step": 11216 + }, + { + "epoch": 47.95, + "learning_rate": 9.769682292298284e-05, + "loss": 0.0012, + "step": 11220 + }, + { + "epoch": 47.97, + "learning_rate": 9.76933252342854e-05, + "loss": 0.0012, + "step": 11224 + }, + { + "epoch": 47.98, + "learning_rate": 9.768982495445122e-05, + "loss": 0.0008, + "step": 11228 + }, + { + "epoch": 48.0, + "learning_rate": 9.768632208367048e-05, + "loss": 0.0015, + "step": 11232 + }, + { + "epoch": 48.02, + "learning_rate": 9.768281662213347e-05, + "loss": 0.001, + "step": 11236 + }, + { + "epoch": 48.03, + "learning_rate": 9.767930857003065e-05, + "loss": 0.0006, + "step": 11240 + }, + { + "epoch": 48.05, + "learning_rate": 9.767579792755262e-05, + "loss": 0.0005, + "step": 11244 + }, + { + "epoch": 48.07, + "learning_rate": 9.76722846948901e-05, + "loss": 0.0004, + "step": 11248 + }, + { + "epoch": 48.09, + "learning_rate": 9.766876887223393e-05, + "loss": 0.001, + "step": 11252 + }, + { + "epoch": 48.1, + "learning_rate": 9.766525045977519e-05, + "loss": 0.0016, + "step": 11256 + }, + { + "epoch": 48.12, + "learning_rate": 9.766172945770497e-05, + "loss": 0.0009, + "step": 11260 + }, + { + "epoch": 48.14, + "learning_rate": 9.76582058662146e-05, + "loss": 0.0044, + "step": 11264 + }, + { + "epoch": 48.15, + "learning_rate": 9.765467968549549e-05, + "loss": 0.0007, + "step": 11268 + }, + { + "epoch": 48.17, + "learning_rate": 9.765115091573923e-05, + "loss": 0.0008, + "step": 11272 + }, + { + "epoch": 48.19, + "learning_rate": 9.764761955713752e-05, + "loss": 0.0004, + "step": 11276 + }, + { + "epoch": 48.21, + "learning_rate": 9.764408560988225e-05, + "loss": 0.001, + "step": 11280 + }, + { + "epoch": 48.22, + "learning_rate": 9.764054907416537e-05, + "loss": 0.0012, + "step": 11284 + }, + { + "epoch": 48.24, + "learning_rate": 9.763700995017905e-05, + "loss": 0.0007, + "step": 11288 + }, + { + "epoch": 48.26, + "learning_rate": 9.763346823811555e-05, + "loss": 0.0012, + "step": 11292 + }, + { + "epoch": 48.27, + "learning_rate": 9.76299239381673e-05, + "loss": 0.0009, + "step": 11296 + }, + { + "epoch": 48.29, + "learning_rate": 9.762637705052684e-05, + "loss": 0.0007, + "step": 11300 + }, + { + "epoch": 48.31, + "learning_rate": 9.76228275753869e-05, + "loss": 0.0009, + "step": 11304 + }, + { + "epoch": 48.32, + "learning_rate": 9.76192755129403e-05, + "loss": 0.0014, + "step": 11308 + }, + { + "epoch": 48.34, + "learning_rate": 9.761572086338002e-05, + "loss": 0.0006, + "step": 11312 + }, + { + "epoch": 48.36, + "learning_rate": 9.761216362689918e-05, + "loss": 0.0006, + "step": 11316 + }, + { + "epoch": 48.38, + "learning_rate": 9.760860380369105e-05, + "loss": 0.001, + "step": 11320 + }, + { + "epoch": 48.39, + "learning_rate": 9.760504139394902e-05, + "loss": 0.0011, + "step": 11324 + }, + { + "epoch": 48.41, + "learning_rate": 9.760147639786665e-05, + "loss": 0.0001, + "step": 11328 + }, + { + "epoch": 48.43, + "learning_rate": 9.75979088156376e-05, + "loss": 0.0004, + "step": 11332 + }, + { + "epoch": 48.44, + "learning_rate": 9.759433864745571e-05, + "loss": 0.0002, + "step": 11336 + }, + { + "epoch": 48.46, + "learning_rate": 9.759076589351494e-05, + "loss": 0.0007, + "step": 11340 + }, + { + "epoch": 48.48, + "learning_rate": 9.75871905540094e-05, + "loss": 0.0013, + "step": 11344 + }, + { + "epoch": 48.5, + "learning_rate": 9.758361262913334e-05, + "loss": 0.0004, + "step": 11348 + }, + { + "epoch": 48.51, + "learning_rate": 9.758003211908111e-05, + "loss": 0.0007, + "step": 11352 + }, + { + "epoch": 48.53, + "learning_rate": 9.757644902404728e-05, + "loss": 0.0005, + "step": 11356 + }, + { + "epoch": 48.55, + "learning_rate": 9.75728633442265e-05, + "loss": 0.0002, + "step": 11360 + }, + { + "epoch": 48.56, + "learning_rate": 9.756927507981357e-05, + "loss": 0.0011, + "step": 11364 + }, + { + "epoch": 48.58, + "learning_rate": 9.756568423100344e-05, + "loss": 0.0003, + "step": 11368 + }, + { + "epoch": 48.6, + "learning_rate": 9.756209079799121e-05, + "loss": 0.001, + "step": 11372 + }, + { + "epoch": 48.62, + "learning_rate": 9.75584947809721e-05, + "loss": 0.0003, + "step": 11376 + }, + { + "epoch": 48.63, + "learning_rate": 9.755489618014147e-05, + "loss": 0.0002, + "step": 11380 + }, + { + "epoch": 48.65, + "learning_rate": 9.755129499569482e-05, + "loss": 0.0019, + "step": 11384 + }, + { + "epoch": 48.67, + "learning_rate": 9.754769122782784e-05, + "loss": 0.0008, + "step": 11388 + }, + { + "epoch": 48.68, + "learning_rate": 9.754408487673628e-05, + "loss": 0.0002, + "step": 11392 + }, + { + "epoch": 48.7, + "learning_rate": 9.754047594261609e-05, + "loss": 0.0004, + "step": 11396 + }, + { + "epoch": 48.72, + "learning_rate": 9.753686442566334e-05, + "loss": 0.0005, + "step": 11400 + }, + { + "epoch": 48.74, + "learning_rate": 9.753325032607423e-05, + "loss": 0.0001, + "step": 11404 + }, + { + "epoch": 48.75, + "learning_rate": 9.75296336440451e-05, + "loss": 0.0004, + "step": 11408 + }, + { + "epoch": 48.77, + "learning_rate": 9.752601437977248e-05, + "loss": 0.0018, + "step": 11412 + }, + { + "epoch": 48.79, + "learning_rate": 9.752239253345297e-05, + "loss": 0.0003, + "step": 11416 + }, + { + "epoch": 48.8, + "learning_rate": 9.751876810528335e-05, + "loss": 0.001, + "step": 11420 + }, + { + "epoch": 48.82, + "learning_rate": 9.751514109546054e-05, + "loss": 0.0031, + "step": 11424 + }, + { + "epoch": 48.84, + "learning_rate": 9.751151150418158e-05, + "loss": 0.0004, + "step": 11428 + }, + { + "epoch": 48.85, + "learning_rate": 9.750787933164366e-05, + "loss": 0.0027, + "step": 11432 + }, + { + "epoch": 48.87, + "learning_rate": 9.750424457804412e-05, + "loss": 0.0003, + "step": 11436 + }, + { + "epoch": 48.89, + "learning_rate": 9.750060724358045e-05, + "loss": 0.0021, + "step": 11440 + }, + { + "epoch": 48.91, + "learning_rate": 9.749696732845021e-05, + "loss": 0.0033, + "step": 11444 + }, + { + "epoch": 48.92, + "learning_rate": 9.749332483285123e-05, + "loss": 0.0011, + "step": 11448 + }, + { + "epoch": 48.94, + "learning_rate": 9.748967975698135e-05, + "loss": 0.0002, + "step": 11452 + }, + { + "epoch": 48.96, + "learning_rate": 9.748603210103862e-05, + "loss": 0.0012, + "step": 11456 + }, + { + "epoch": 48.97, + "learning_rate": 9.748238186522119e-05, + "loss": 0.0025, + "step": 11460 + }, + { + "epoch": 48.99, + "learning_rate": 9.747872904972741e-05, + "loss": 0.006, + "step": 11464 + }, + { + "epoch": 49.01, + "learning_rate": 9.747507365475573e-05, + "loss": 0.0002, + "step": 11468 + }, + { + "epoch": 49.03, + "learning_rate": 9.747141568050472e-05, + "loss": 0.0004, + "step": 11472 + }, + { + "epoch": 49.04, + "learning_rate": 9.746775512717312e-05, + "loss": 0.0005, + "step": 11476 + }, + { + "epoch": 49.06, + "learning_rate": 9.746409199495983e-05, + "loss": 0.0004, + "step": 11480 + }, + { + "epoch": 49.08, + "learning_rate": 9.746042628406384e-05, + "loss": 0.0016, + "step": 11484 + }, + { + "epoch": 49.09, + "learning_rate": 9.745675799468432e-05, + "loss": 0.0002, + "step": 11488 + }, + { + "epoch": 49.11, + "learning_rate": 9.745308712702055e-05, + "loss": 0.0008, + "step": 11492 + }, + { + "epoch": 49.13, + "learning_rate": 9.744941368127199e-05, + "loss": 0.0002, + "step": 11496 + }, + { + "epoch": 49.15, + "learning_rate": 9.744573765763816e-05, + "loss": 0.0006, + "step": 11500 + }, + { + "epoch": 49.16, + "learning_rate": 9.744205905631884e-05, + "loss": 0.0013, + "step": 11504 + }, + { + "epoch": 49.18, + "learning_rate": 9.743837787751386e-05, + "loss": 0.0009, + "step": 11508 + }, + { + "epoch": 49.2, + "learning_rate": 9.743469412142321e-05, + "loss": 0.0004, + "step": 11512 + }, + { + "epoch": 49.21, + "learning_rate": 9.743100778824703e-05, + "loss": 0.0003, + "step": 11516 + }, + { + "epoch": 49.23, + "learning_rate": 9.742731887818558e-05, + "loss": 0.0003, + "step": 11520 + }, + { + "epoch": 49.25, + "learning_rate": 9.74236273914393e-05, + "loss": 0.0002, + "step": 11524 + }, + { + "epoch": 49.26, + "learning_rate": 9.741993332820873e-05, + "loss": 0.0004, + "step": 11528 + }, + { + "epoch": 49.28, + "learning_rate": 9.741623668869457e-05, + "loss": 0.0002, + "step": 11532 + }, + { + "epoch": 49.3, + "learning_rate": 9.741253747309767e-05, + "loss": 0.0002, + "step": 11536 + }, + { + "epoch": 49.32, + "learning_rate": 9.740883568161897e-05, + "loss": 0.0003, + "step": 11540 + }, + { + "epoch": 49.33, + "learning_rate": 9.740513131445961e-05, + "loss": 0.0007, + "step": 11544 + }, + { + "epoch": 49.35, + "learning_rate": 9.740142437182084e-05, + "loss": 0.0024, + "step": 11548 + }, + { + "epoch": 49.37, + "learning_rate": 9.739771485390406e-05, + "loss": 0.0004, + "step": 11552 + }, + { + "epoch": 49.38, + "learning_rate": 9.73940027609108e-05, + "loss": 0.0014, + "step": 11556 + }, + { + "epoch": 49.4, + "learning_rate": 9.739028809304274e-05, + "loss": 0.001, + "step": 11560 + }, + { + "epoch": 49.42, + "learning_rate": 9.738657085050167e-05, + "loss": 0.0013, + "step": 11564 + }, + { + "epoch": 49.44, + "learning_rate": 9.738285103348958e-05, + "loss": 0.0003, + "step": 11568 + }, + { + "epoch": 49.45, + "learning_rate": 9.737912864220856e-05, + "loss": 0.0017, + "step": 11572 + }, + { + "epoch": 49.47, + "learning_rate": 9.73754036768608e-05, + "loss": 0.0009, + "step": 11576 + }, + { + "epoch": 49.49, + "learning_rate": 9.737167613764873e-05, + "loss": 0.0004, + "step": 11580 + }, + { + "epoch": 49.5, + "learning_rate": 9.736794602477483e-05, + "loss": 0.0008, + "step": 11584 + }, + { + "epoch": 49.52, + "learning_rate": 9.736421333844177e-05, + "loss": 0.0032, + "step": 11588 + }, + { + "epoch": 49.54, + "learning_rate": 9.736047807885233e-05, + "loss": 0.0004, + "step": 11592 + }, + { + "epoch": 49.56, + "learning_rate": 9.735674024620947e-05, + "loss": 0.0011, + "step": 11596 + }, + { + "epoch": 49.57, + "learning_rate": 9.735299984071621e-05, + "loss": 0.0023, + "step": 11600 + }, + { + "epoch": 49.59, + "learning_rate": 9.734925686257584e-05, + "loss": 0.001, + "step": 11604 + }, + { + "epoch": 49.61, + "learning_rate": 9.734551131199163e-05, + "loss": 0.0008, + "step": 11608 + }, + { + "epoch": 49.62, + "learning_rate": 9.734176318916715e-05, + "loss": 0.0002, + "step": 11612 + }, + { + "epoch": 49.64, + "learning_rate": 9.733801249430596e-05, + "loss": 0.0006, + "step": 11616 + }, + { + "epoch": 49.66, + "learning_rate": 9.73342592276119e-05, + "loss": 0.0016, + "step": 11620 + }, + { + "epoch": 49.68, + "learning_rate": 9.733050338928882e-05, + "loss": 0.0007, + "step": 11624 + }, + { + "epoch": 49.69, + "learning_rate": 9.732674497954082e-05, + "loss": 0.0009, + "step": 11628 + }, + { + "epoch": 49.71, + "learning_rate": 9.732298399857206e-05, + "loss": 0.0012, + "step": 11632 + }, + { + "epoch": 49.73, + "learning_rate": 9.73192204465869e-05, + "loss": 0.0019, + "step": 11636 + }, + { + "epoch": 49.74, + "learning_rate": 9.731545432378977e-05, + "loss": 0.0004, + "step": 11640 + }, + { + "epoch": 49.76, + "learning_rate": 9.73116856303853e-05, + "loss": 0.0004, + "step": 11644 + }, + { + "epoch": 49.78, + "learning_rate": 9.730791436657827e-05, + "loss": 0.001, + "step": 11648 + }, + { + "epoch": 49.79, + "learning_rate": 9.730414053257352e-05, + "loss": 0.0002, + "step": 11652 + }, + { + "epoch": 49.81, + "learning_rate": 9.730036412857611e-05, + "loss": 0.0007, + "step": 11656 + }, + { + "epoch": 49.83, + "learning_rate": 9.72965851547912e-05, + "loss": 0.0002, + "step": 11660 + }, + { + "epoch": 49.85, + "learning_rate": 9.72928036114241e-05, + "loss": 0.0021, + "step": 11664 + }, + { + "epoch": 49.86, + "learning_rate": 9.728901949868024e-05, + "loss": 0.0029, + "step": 11668 + }, + { + "epoch": 49.88, + "learning_rate": 9.728523281676524e-05, + "loss": 0.0008, + "step": 11672 + }, + { + "epoch": 49.9, + "learning_rate": 9.728144356588481e-05, + "loss": 0.0002, + "step": 11676 + }, + { + "epoch": 49.91, + "learning_rate": 9.727765174624482e-05, + "loss": 0.0004, + "step": 11680 + }, + { + "epoch": 49.93, + "learning_rate": 9.727385735805126e-05, + "loss": 0.0002, + "step": 11684 + }, + { + "epoch": 49.95, + "learning_rate": 9.72700604015103e-05, + "loss": 0.0006, + "step": 11688 + }, + { + "epoch": 49.97, + "learning_rate": 9.726626087682821e-05, + "loss": 0.0015, + "step": 11692 + }, + { + "epoch": 49.98, + "learning_rate": 9.726245878421142e-05, + "loss": 0.0017, + "step": 11696 + }, + { + "epoch": 50.0, + "learning_rate": 9.72586541238665e-05, + "loss": 0.0007, + "step": 11700 + }, + { + "epoch": 50.02, + "learning_rate": 9.725484689600013e-05, + "loss": 0.0005, + "step": 11704 + }, + { + "epoch": 50.03, + "learning_rate": 9.72510371008192e-05, + "loss": 0.0006, + "step": 11708 + }, + { + "epoch": 50.05, + "learning_rate": 9.724722473853065e-05, + "loss": 0.0004, + "step": 11712 + }, + { + "epoch": 50.07, + "learning_rate": 9.724340980934162e-05, + "loss": 0.0003, + "step": 11716 + }, + { + "epoch": 50.09, + "learning_rate": 9.723959231345936e-05, + "loss": 0.0008, + "step": 11720 + }, + { + "epoch": 50.1, + "learning_rate": 9.723577225109128e-05, + "loss": 0.0009, + "step": 11724 + }, + { + "epoch": 50.12, + "learning_rate": 9.723194962244493e-05, + "loss": 0.0015, + "step": 11728 + }, + { + "epoch": 50.14, + "learning_rate": 9.722812442772798e-05, + "loss": 0.0001, + "step": 11732 + }, + { + "epoch": 50.15, + "learning_rate": 9.722429666714823e-05, + "loss": 0.0045, + "step": 11736 + }, + { + "epoch": 50.17, + "learning_rate": 9.722046634091367e-05, + "loss": 0.0069, + "step": 11740 + }, + { + "epoch": 50.19, + "learning_rate": 9.72166334492324e-05, + "loss": 0.0003, + "step": 11744 + }, + { + "epoch": 50.21, + "learning_rate": 9.721279799231263e-05, + "loss": 0.0005, + "step": 11748 + }, + { + "epoch": 50.22, + "learning_rate": 9.720895997036275e-05, + "loss": 0.0008, + "step": 11752 + }, + { + "epoch": 50.24, + "learning_rate": 9.72051193835913e-05, + "loss": 0.0003, + "step": 11756 + }, + { + "epoch": 50.26, + "learning_rate": 9.720127623220689e-05, + "loss": 0.0002, + "step": 11760 + }, + { + "epoch": 50.27, + "learning_rate": 9.719743051641834e-05, + "loss": 0.0008, + "step": 11764 + }, + { + "epoch": 50.29, + "learning_rate": 9.719358223643459e-05, + "loss": 0.0003, + "step": 11768 + }, + { + "epoch": 50.31, + "learning_rate": 9.718973139246471e-05, + "loss": 0.0004, + "step": 11772 + }, + { + "epoch": 50.32, + "learning_rate": 9.71858779847179e-05, + "loss": 0.0002, + "step": 11776 + }, + { + "epoch": 50.34, + "learning_rate": 9.718202201340353e-05, + "loss": 0.0012, + "step": 11780 + }, + { + "epoch": 50.36, + "learning_rate": 9.717816347873108e-05, + "loss": 0.001, + "step": 11784 + }, + { + "epoch": 50.38, + "learning_rate": 9.717430238091019e-05, + "loss": 0.0002, + "step": 11788 + }, + { + "epoch": 50.39, + "learning_rate": 9.717043872015061e-05, + "loss": 0.0011, + "step": 11792 + }, + { + "epoch": 50.41, + "learning_rate": 9.716657249666228e-05, + "loss": 0.001, + "step": 11796 + }, + { + "epoch": 50.43, + "learning_rate": 9.716270371065523e-05, + "loss": 0.001, + "step": 11800 + }, + { + "epoch": 50.44, + "learning_rate": 9.715883236233965e-05, + "loss": 0.0002, + "step": 11804 + }, + { + "epoch": 50.46, + "learning_rate": 9.715495845192586e-05, + "loss": 0.0004, + "step": 11808 + }, + { + "epoch": 50.48, + "learning_rate": 9.715108197962435e-05, + "loss": 0.0017, + "step": 11812 + }, + { + "epoch": 50.5, + "learning_rate": 9.714720294564569e-05, + "loss": 0.0007, + "step": 11816 + }, + { + "epoch": 50.51, + "learning_rate": 9.714332135020066e-05, + "loss": 0.0006, + "step": 11820 + }, + { + "epoch": 50.53, + "learning_rate": 9.71394371935001e-05, + "loss": 0.0011, + "step": 11824 + }, + { + "epoch": 50.55, + "learning_rate": 9.713555047575509e-05, + "loss": 0.0003, + "step": 11828 + }, + { + "epoch": 50.56, + "learning_rate": 9.713166119717674e-05, + "loss": 0.0006, + "step": 11832 + }, + { + "epoch": 50.58, + "learning_rate": 9.712776935797638e-05, + "loss": 0.0009, + "step": 11836 + }, + { + "epoch": 50.6, + "learning_rate": 9.712387495836544e-05, + "loss": 0.0004, + "step": 11840 + }, + { + "epoch": 50.62, + "learning_rate": 9.711997799855552e-05, + "loss": 0.0002, + "step": 11844 + }, + { + "epoch": 50.63, + "learning_rate": 9.71160784787583e-05, + "loss": 0.0003, + "step": 11848 + }, + { + "epoch": 50.65, + "learning_rate": 9.711217639918567e-05, + "loss": 0.0008, + "step": 11852 + }, + { + "epoch": 50.67, + "learning_rate": 9.710827176004961e-05, + "loss": 0.0006, + "step": 11856 + }, + { + "epoch": 50.68, + "learning_rate": 9.710436456156226e-05, + "loss": 0.0004, + "step": 11860 + }, + { + "epoch": 50.7, + "learning_rate": 9.71004548039359e-05, + "loss": 0.0003, + "step": 11864 + }, + { + "epoch": 50.72, + "learning_rate": 9.709654248738293e-05, + "loss": 0.0018, + "step": 11868 + }, + { + "epoch": 50.74, + "learning_rate": 9.709262761211592e-05, + "loss": 0.0012, + "step": 11872 + }, + { + "epoch": 50.75, + "learning_rate": 9.708871017834756e-05, + "loss": 0.0006, + "step": 11876 + }, + { + "epoch": 50.77, + "learning_rate": 9.708479018629066e-05, + "loss": 0.0007, + "step": 11880 + }, + { + "epoch": 50.79, + "learning_rate": 9.70808676361582e-05, + "loss": 0.0006, + "step": 11884 + }, + { + "epoch": 50.8, + "learning_rate": 9.707694252816331e-05, + "loss": 0.0004, + "step": 11888 + }, + { + "epoch": 50.82, + "learning_rate": 9.707301486251921e-05, + "loss": 0.0002, + "step": 11892 + }, + { + "epoch": 50.84, + "learning_rate": 9.70690846394393e-05, + "loss": 0.0018, + "step": 11896 + }, + { + "epoch": 50.85, + "learning_rate": 9.70651518591371e-05, + "loss": 0.0038, + "step": 11900 + }, + { + "epoch": 50.87, + "learning_rate": 9.706121652182628e-05, + "loss": 0.0006, + "step": 11904 + }, + { + "epoch": 50.89, + "learning_rate": 9.705727862772064e-05, + "loss": 0.0008, + "step": 11908 + }, + { + "epoch": 50.91, + "learning_rate": 9.705333817703413e-05, + "loss": 0.0001, + "step": 11912 + }, + { + "epoch": 50.92, + "learning_rate": 9.704939516998082e-05, + "loss": 0.0004, + "step": 11916 + }, + { + "epoch": 50.94, + "learning_rate": 9.704544960677492e-05, + "loss": 0.0007, + "step": 11920 + }, + { + "epoch": 50.96, + "learning_rate": 9.704150148763082e-05, + "loss": 0.0006, + "step": 11924 + }, + { + "epoch": 50.97, + "learning_rate": 9.7037550812763e-05, + "loss": 0.0005, + "step": 11928 + }, + { + "epoch": 50.99, + "learning_rate": 9.703359758238609e-05, + "loss": 0.0013, + "step": 11932 + }, + { + "epoch": 51.01, + "learning_rate": 9.702964179671488e-05, + "loss": 0.0037, + "step": 11936 + }, + { + "epoch": 51.03, + "learning_rate": 9.702568345596428e-05, + "loss": 0.0006, + "step": 11940 + }, + { + "epoch": 51.04, + "learning_rate": 9.702172256034933e-05, + "loss": 0.004, + "step": 11944 + }, + { + "epoch": 51.06, + "learning_rate": 9.701775911008524e-05, + "loss": 0.0008, + "step": 11948 + }, + { + "epoch": 51.08, + "learning_rate": 9.701379310538733e-05, + "loss": 0.0007, + "step": 11952 + }, + { + "epoch": 51.09, + "learning_rate": 9.700982454647108e-05, + "loss": 0.0003, + "step": 11956 + }, + { + "epoch": 51.11, + "learning_rate": 9.700585343355208e-05, + "loss": 0.0029, + "step": 11960 + }, + { + "epoch": 51.13, + "learning_rate": 9.70018797668461e-05, + "loss": 0.0006, + "step": 11964 + }, + { + "epoch": 51.15, + "learning_rate": 9.699790354656902e-05, + "loss": 0.0013, + "step": 11968 + }, + { + "epoch": 51.16, + "learning_rate": 9.699392477293686e-05, + "loss": 0.0005, + "step": 11972 + }, + { + "epoch": 51.18, + "learning_rate": 9.698994344616577e-05, + "loss": 0.0003, + "step": 11976 + }, + { + "epoch": 51.2, + "learning_rate": 9.698595956647205e-05, + "loss": 0.0008, + "step": 11980 + }, + { + "epoch": 51.21, + "learning_rate": 9.698197313407218e-05, + "loss": 0.0002, + "step": 11984 + }, + { + "epoch": 51.23, + "learning_rate": 9.697798414918271e-05, + "loss": 0.0024, + "step": 11988 + }, + { + "epoch": 51.25, + "learning_rate": 9.697399261202036e-05, + "loss": 0.0003, + "step": 11992 + }, + { + "epoch": 51.26, + "learning_rate": 9.6969998522802e-05, + "loss": 0.0009, + "step": 11996 + }, + { + "epoch": 51.28, + "learning_rate": 9.696600188174459e-05, + "loss": 0.0019, + "step": 12000 + }, + { + "epoch": 51.28, + "eval_exact_match": 0.5166320166320166, + "eval_loss": 0.8132425546646118, + "eval_runtime": 140.4988, + "eval_samples_per_second": 6.847, + "step": 12000 + }, + { + "epoch": 51.3, + "learning_rate": 9.696200268906532e-05, + "loss": 0.0009, + "step": 12004 + }, + { + "epoch": 51.32, + "learning_rate": 9.69580009449814e-05, + "loss": 0.0016, + "step": 12008 + }, + { + "epoch": 51.33, + "learning_rate": 9.69539966497103e-05, + "loss": 0.0015, + "step": 12012 + }, + { + "epoch": 51.35, + "learning_rate": 9.694998980346952e-05, + "loss": 0.001, + "step": 12016 + }, + { + "epoch": 51.37, + "learning_rate": 9.694598040647679e-05, + "loss": 0.0005, + "step": 12020 + }, + { + "epoch": 51.38, + "learning_rate": 9.694196845894992e-05, + "loss": 0.0003, + "step": 12024 + }, + { + "epoch": 51.4, + "learning_rate": 9.693795396110686e-05, + "loss": 0.0008, + "step": 12028 + }, + { + "epoch": 51.42, + "learning_rate": 9.693393691316572e-05, + "loss": 0.0011, + "step": 12032 + }, + { + "epoch": 51.44, + "learning_rate": 9.692991731534477e-05, + "loss": 0.0025, + "step": 12036 + }, + { + "epoch": 51.45, + "learning_rate": 9.692589516786237e-05, + "loss": 0.0011, + "step": 12040 + }, + { + "epoch": 51.47, + "learning_rate": 9.692187047093703e-05, + "loss": 0.0015, + "step": 12044 + }, + { + "epoch": 51.49, + "learning_rate": 9.691784322478743e-05, + "loss": 0.0006, + "step": 12048 + }, + { + "epoch": 51.5, + "learning_rate": 9.691381342963235e-05, + "loss": 0.0016, + "step": 12052 + }, + { + "epoch": 51.52, + "learning_rate": 9.690978108569073e-05, + "loss": 0.0007, + "step": 12056 + }, + { + "epoch": 51.54, + "learning_rate": 9.690574619318166e-05, + "loss": 0.0009, + "step": 12060 + }, + { + "epoch": 51.56, + "learning_rate": 9.690170875232431e-05, + "loss": 0.0016, + "step": 12064 + }, + { + "epoch": 51.57, + "learning_rate": 9.689766876333809e-05, + "loss": 0.0001, + "step": 12068 + }, + { + "epoch": 51.59, + "learning_rate": 9.689362622644244e-05, + "loss": 0.0005, + "step": 12072 + }, + { + "epoch": 51.61, + "learning_rate": 9.688958114185702e-05, + "loss": 0.0002, + "step": 12076 + }, + { + "epoch": 51.62, + "learning_rate": 9.688553350980157e-05, + "loss": 0.002, + "step": 12080 + }, + { + "epoch": 51.64, + "learning_rate": 9.6881483330496e-05, + "loss": 0.0004, + "step": 12084 + }, + { + "epoch": 51.66, + "learning_rate": 9.687743060416036e-05, + "loss": 0.0014, + "step": 12088 + }, + { + "epoch": 51.68, + "learning_rate": 9.687337533101484e-05, + "loss": 0.0008, + "step": 12092 + }, + { + "epoch": 51.69, + "learning_rate": 9.686931751127974e-05, + "loss": 0.0001, + "step": 12096 + }, + { + "epoch": 51.71, + "learning_rate": 9.686525714517554e-05, + "loss": 0.0004, + "step": 12100 + }, + { + "epoch": 51.73, + "learning_rate": 9.68611942329228e-05, + "loss": 0.0002, + "step": 12104 + }, + { + "epoch": 51.74, + "learning_rate": 9.685712877474229e-05, + "loss": 0.0003, + "step": 12108 + }, + { + "epoch": 51.76, + "learning_rate": 9.685306077085488e-05, + "loss": 0.0014, + "step": 12112 + }, + { + "epoch": 51.78, + "learning_rate": 9.684899022148154e-05, + "loss": 0.0002, + "step": 12116 + }, + { + "epoch": 51.79, + "learning_rate": 9.684491712684347e-05, + "loss": 0.0005, + "step": 12120 + }, + { + "epoch": 51.81, + "learning_rate": 9.684084148716195e-05, + "loss": 0.0002, + "step": 12124 + }, + { + "epoch": 51.83, + "learning_rate": 9.683676330265838e-05, + "loss": 0.0002, + "step": 12128 + }, + { + "epoch": 51.85, + "learning_rate": 9.683268257355433e-05, + "loss": 0.0036, + "step": 12132 + }, + { + "epoch": 51.86, + "learning_rate": 9.682859930007153e-05, + "loss": 0.0003, + "step": 12136 + }, + { + "epoch": 51.88, + "learning_rate": 9.682451348243178e-05, + "loss": 0.0016, + "step": 12140 + }, + { + "epoch": 51.9, + "learning_rate": 9.68204251208571e-05, + "loss": 0.0014, + "step": 12144 + }, + { + "epoch": 51.91, + "learning_rate": 9.681633421556957e-05, + "loss": 0.0015, + "step": 12148 + }, + { + "epoch": 51.93, + "learning_rate": 9.681224076679147e-05, + "loss": 0.0002, + "step": 12152 + }, + { + "epoch": 51.95, + "learning_rate": 9.680814477474518e-05, + "loss": 0.0008, + "step": 12156 + }, + { + "epoch": 51.97, + "learning_rate": 9.680404623965325e-05, + "loss": 0.003, + "step": 12160 + }, + { + "epoch": 51.98, + "learning_rate": 9.679994516173834e-05, + "loss": 0.0016, + "step": 12164 + }, + { + "epoch": 52.0, + "learning_rate": 9.679584154122323e-05, + "loss": 0.0004, + "step": 12168 + }, + { + "epoch": 52.02, + "learning_rate": 9.679173537833092e-05, + "loss": 0.0037, + "step": 12172 + }, + { + "epoch": 52.03, + "learning_rate": 9.678762667328445e-05, + "loss": 0.0006, + "step": 12176 + }, + { + "epoch": 52.05, + "learning_rate": 9.678351542630706e-05, + "loss": 0.0008, + "step": 12180 + }, + { + "epoch": 52.07, + "learning_rate": 9.677940163762212e-05, + "loss": 0.0005, + "step": 12184 + }, + { + "epoch": 52.09, + "learning_rate": 9.67752853074531e-05, + "loss": 0.0003, + "step": 12188 + }, + { + "epoch": 52.1, + "learning_rate": 9.677116643602367e-05, + "loss": 0.0002, + "step": 12192 + }, + { + "epoch": 52.12, + "learning_rate": 9.676704502355757e-05, + "loss": 0.0004, + "step": 12196 + }, + { + "epoch": 52.14, + "learning_rate": 9.676292107027876e-05, + "loss": 0.0009, + "step": 12200 + }, + { + "epoch": 52.15, + "learning_rate": 9.675879457641124e-05, + "loss": 0.0003, + "step": 12204 + }, + { + "epoch": 52.17, + "learning_rate": 9.675466554217922e-05, + "loss": 0.0007, + "step": 12208 + }, + { + "epoch": 52.19, + "learning_rate": 9.675053396780704e-05, + "loss": 0.0003, + "step": 12212 + }, + { + "epoch": 52.21, + "learning_rate": 9.674639985351915e-05, + "loss": 0.0002, + "step": 12216 + }, + { + "epoch": 52.22, + "learning_rate": 9.674226319954014e-05, + "loss": 0.0001, + "step": 12220 + }, + { + "epoch": 52.24, + "learning_rate": 9.673812400609479e-05, + "loss": 0.0002, + "step": 12224 + }, + { + "epoch": 52.26, + "learning_rate": 9.673398227340794e-05, + "loss": 0.0002, + "step": 12228 + }, + { + "epoch": 52.27, + "learning_rate": 9.672983800170464e-05, + "loss": 0.0002, + "step": 12232 + }, + { + "epoch": 52.29, + "learning_rate": 9.672569119121e-05, + "loss": 0.0005, + "step": 12236 + }, + { + "epoch": 52.31, + "learning_rate": 9.672154184214934e-05, + "loss": 0.0017, + "step": 12240 + }, + { + "epoch": 52.32, + "learning_rate": 9.67173899547481e-05, + "loss": 0.0002, + "step": 12244 + }, + { + "epoch": 52.34, + "learning_rate": 9.671323552923184e-05, + "loss": 0.0009, + "step": 12248 + }, + { + "epoch": 52.36, + "learning_rate": 9.670907856582627e-05, + "loss": 0.0002, + "step": 12252 + }, + { + "epoch": 52.38, + "learning_rate": 9.670491906475722e-05, + "loss": 0.0002, + "step": 12256 + }, + { + "epoch": 52.39, + "learning_rate": 9.670075702625068e-05, + "loss": 0.0004, + "step": 12260 + }, + { + "epoch": 52.41, + "learning_rate": 9.669659245053278e-05, + "loss": 0.0017, + "step": 12264 + }, + { + "epoch": 52.43, + "learning_rate": 9.669242533782974e-05, + "loss": 0.0018, + "step": 12268 + }, + { + "epoch": 52.44, + "learning_rate": 9.668825568836801e-05, + "loss": 0.0011, + "step": 12272 + }, + { + "epoch": 52.46, + "learning_rate": 9.66840835023741e-05, + "loss": 0.0002, + "step": 12276 + }, + { + "epoch": 52.48, + "learning_rate": 9.667990878007466e-05, + "loss": 0.0014, + "step": 12280 + }, + { + "epoch": 52.5, + "learning_rate": 9.667573152169652e-05, + "loss": 0.0002, + "step": 12284 + }, + { + "epoch": 52.51, + "learning_rate": 9.667155172746663e-05, + "loss": 0.001, + "step": 12288 + }, + { + "epoch": 52.53, + "learning_rate": 9.666736939761207e-05, + "loss": 0.0007, + "step": 12292 + }, + { + "epoch": 52.55, + "learning_rate": 9.666318453236006e-05, + "loss": 0.0021, + "step": 12296 + }, + { + "epoch": 52.56, + "learning_rate": 9.665899713193797e-05, + "loss": 0.0002, + "step": 12300 + }, + { + "epoch": 52.58, + "learning_rate": 9.665480719657327e-05, + "loss": 0.0011, + "step": 12304 + }, + { + "epoch": 52.6, + "learning_rate": 9.665061472649364e-05, + "loss": 0.0013, + "step": 12308 + }, + { + "epoch": 52.62, + "learning_rate": 9.66464197219268e-05, + "loss": 0.0003, + "step": 12312 + }, + { + "epoch": 52.63, + "learning_rate": 9.664222218310071e-05, + "loss": 0.0006, + "step": 12316 + }, + { + "epoch": 52.65, + "learning_rate": 9.663802211024337e-05, + "loss": 0.0003, + "step": 12320 + }, + { + "epoch": 52.67, + "learning_rate": 9.663381950358302e-05, + "loss": 0.0003, + "step": 12324 + }, + { + "epoch": 52.68, + "learning_rate": 9.662961436334795e-05, + "loss": 0.0006, + "step": 12328 + }, + { + "epoch": 52.7, + "learning_rate": 9.662540668976662e-05, + "loss": 0.0002, + "step": 12332 + }, + { + "epoch": 52.72, + "learning_rate": 9.662119648306767e-05, + "loss": 0.0003, + "step": 12336 + }, + { + "epoch": 52.74, + "learning_rate": 9.661698374347978e-05, + "loss": 0.0009, + "step": 12340 + }, + { + "epoch": 52.75, + "learning_rate": 9.661276847123184e-05, + "loss": 0.0005, + "step": 12344 + }, + { + "epoch": 52.77, + "learning_rate": 9.66085506665529e-05, + "loss": 0.002, + "step": 12348 + }, + { + "epoch": 52.79, + "learning_rate": 9.660433032967205e-05, + "loss": 0.0039, + "step": 12352 + }, + { + "epoch": 52.8, + "learning_rate": 9.660010746081864e-05, + "loss": 0.0002, + "step": 12356 + }, + { + "epoch": 52.82, + "learning_rate": 9.659588206022203e-05, + "loss": 0.0014, + "step": 12360 + }, + { + "epoch": 52.84, + "learning_rate": 9.659165412811184e-05, + "loss": 0.0005, + "step": 12364 + }, + { + "epoch": 52.85, + "learning_rate": 9.658742366471773e-05, + "loss": 0.0002, + "step": 12368 + }, + { + "epoch": 52.87, + "learning_rate": 9.658319067026958e-05, + "loss": 0.0003, + "step": 12372 + }, + { + "epoch": 52.89, + "learning_rate": 9.657895514499731e-05, + "loss": 0.002, + "step": 12376 + }, + { + "epoch": 52.91, + "learning_rate": 9.657471708913108e-05, + "loss": 0.0008, + "step": 12380 + }, + { + "epoch": 52.92, + "learning_rate": 9.65704765029011e-05, + "loss": 0.0003, + "step": 12384 + }, + { + "epoch": 52.94, + "learning_rate": 9.656623338653778e-05, + "loss": 0.0002, + "step": 12388 + }, + { + "epoch": 52.96, + "learning_rate": 9.656198774027167e-05, + "loss": 0.0003, + "step": 12392 + }, + { + "epoch": 52.97, + "learning_rate": 9.655773956433339e-05, + "loss": 0.001, + "step": 12396 + }, + { + "epoch": 52.99, + "learning_rate": 9.655348885895375e-05, + "loss": 0.0004, + "step": 12400 + }, + { + "epoch": 53.01, + "learning_rate": 9.654923562436368e-05, + "loss": 0.0008, + "step": 12404 + }, + { + "epoch": 53.03, + "learning_rate": 9.654497986079427e-05, + "loss": 0.0006, + "step": 12408 + }, + { + "epoch": 53.04, + "learning_rate": 9.654072156847673e-05, + "loss": 0.0003, + "step": 12412 + }, + { + "epoch": 53.06, + "learning_rate": 9.653646074764242e-05, + "loss": 0.0026, + "step": 12416 + }, + { + "epoch": 53.08, + "learning_rate": 9.653219739852281e-05, + "loss": 0.0017, + "step": 12420 + }, + { + "epoch": 53.09, + "learning_rate": 9.652793152134951e-05, + "loss": 0.0005, + "step": 12424 + }, + { + "epoch": 53.11, + "learning_rate": 9.652366311635432e-05, + "loss": 0.0005, + "step": 12428 + }, + { + "epoch": 53.13, + "learning_rate": 9.65193921837691e-05, + "loss": 0.0003, + "step": 12432 + }, + { + "epoch": 53.15, + "learning_rate": 9.651511872382593e-05, + "loss": 0.0009, + "step": 12436 + }, + { + "epoch": 53.16, + "learning_rate": 9.651084273675695e-05, + "loss": 0.0003, + "step": 12440 + }, + { + "epoch": 53.18, + "learning_rate": 9.650656422279446e-05, + "loss": 0.0009, + "step": 12444 + }, + { + "epoch": 53.2, + "learning_rate": 9.650228318217094e-05, + "loss": 0.001, + "step": 12448 + }, + { + "epoch": 53.21, + "learning_rate": 9.649799961511895e-05, + "loss": 0.0011, + "step": 12452 + }, + { + "epoch": 53.23, + "learning_rate": 9.649371352187123e-05, + "loss": 0.0002, + "step": 12456 + }, + { + "epoch": 53.25, + "learning_rate": 9.648942490266064e-05, + "loss": 0.0004, + "step": 12460 + }, + { + "epoch": 53.26, + "learning_rate": 9.648513375772016e-05, + "loss": 0.0004, + "step": 12464 + }, + { + "epoch": 53.28, + "learning_rate": 9.648084008728293e-05, + "loss": 0.0008, + "step": 12468 + }, + { + "epoch": 53.3, + "learning_rate": 9.647654389158223e-05, + "loss": 0.0003, + "step": 12472 + }, + { + "epoch": 53.32, + "learning_rate": 9.647224517085147e-05, + "loss": 0.0012, + "step": 12476 + }, + { + "epoch": 53.33, + "learning_rate": 9.64679439253242e-05, + "loss": 0.0001, + "step": 12480 + }, + { + "epoch": 53.35, + "learning_rate": 9.646364015523408e-05, + "loss": 0.001, + "step": 12484 + }, + { + "epoch": 53.37, + "learning_rate": 9.645933386081494e-05, + "loss": 0.0009, + "step": 12488 + }, + { + "epoch": 53.38, + "learning_rate": 9.645502504230075e-05, + "loss": 0.0007, + "step": 12492 + }, + { + "epoch": 53.4, + "learning_rate": 9.645071369992559e-05, + "loss": 0.0005, + "step": 12496 + }, + { + "epoch": 53.42, + "learning_rate": 9.644639983392369e-05, + "loss": 0.0004, + "step": 12500 + }, + { + "epoch": 53.44, + "learning_rate": 9.644208344452943e-05, + "loss": 0.0002, + "step": 12504 + }, + { + "epoch": 53.45, + "learning_rate": 9.64377645319773e-05, + "loss": 0.0002, + "step": 12508 + }, + { + "epoch": 53.47, + "learning_rate": 9.643344309650197e-05, + "loss": 0.0003, + "step": 12512 + }, + { + "epoch": 53.49, + "learning_rate": 9.642911913833821e-05, + "loss": 0.0004, + "step": 12516 + }, + { + "epoch": 53.5, + "learning_rate": 9.642479265772091e-05, + "loss": 0.0001, + "step": 12520 + }, + { + "epoch": 53.52, + "learning_rate": 9.642046365488516e-05, + "loss": 0.0009, + "step": 12524 + }, + { + "epoch": 53.54, + "learning_rate": 9.64161321300661e-05, + "loss": 0.0019, + "step": 12528 + }, + { + "epoch": 53.56, + "learning_rate": 9.641179808349913e-05, + "loss": 0.0002, + "step": 12532 + }, + { + "epoch": 53.57, + "learning_rate": 9.640746151541966e-05, + "loss": 0.0009, + "step": 12536 + }, + { + "epoch": 53.59, + "learning_rate": 9.640312242606332e-05, + "loss": 0.0007, + "step": 12540 + }, + { + "epoch": 53.61, + "learning_rate": 9.639878081566582e-05, + "loss": 0.0006, + "step": 12544 + }, + { + "epoch": 53.62, + "learning_rate": 9.639443668446307e-05, + "loss": 0.0004, + "step": 12548 + }, + { + "epoch": 53.64, + "learning_rate": 9.639009003269108e-05, + "loss": 0.0008, + "step": 12552 + }, + { + "epoch": 53.66, + "learning_rate": 9.638574086058597e-05, + "loss": 0.0005, + "step": 12556 + }, + { + "epoch": 53.68, + "learning_rate": 9.638138916838404e-05, + "loss": 0.0005, + "step": 12560 + }, + { + "epoch": 53.69, + "learning_rate": 9.637703495632171e-05, + "loss": 0.0003, + "step": 12564 + }, + { + "epoch": 53.71, + "learning_rate": 9.637267822463556e-05, + "loss": 0.0019, + "step": 12568 + }, + { + "epoch": 53.73, + "learning_rate": 9.63683189735623e-05, + "loss": 0.002, + "step": 12572 + }, + { + "epoch": 53.74, + "learning_rate": 9.63639572033387e-05, + "loss": 0.0005, + "step": 12576 + }, + { + "epoch": 53.76, + "learning_rate": 9.635959291420178e-05, + "loss": 0.0002, + "step": 12580 + }, + { + "epoch": 53.78, + "learning_rate": 9.635522610638865e-05, + "loss": 0.0006, + "step": 12584 + }, + { + "epoch": 53.79, + "learning_rate": 9.635085678013653e-05, + "loss": 0.0015, + "step": 12588 + }, + { + "epoch": 53.81, + "learning_rate": 9.634648493568284e-05, + "loss": 0.001, + "step": 12592 + }, + { + "epoch": 53.83, + "learning_rate": 9.634211057326506e-05, + "loss": 0.0006, + "step": 12596 + }, + { + "epoch": 53.85, + "learning_rate": 9.633773369312085e-05, + "loss": 0.0018, + "step": 12600 + }, + { + "epoch": 53.86, + "learning_rate": 9.633335429548801e-05, + "loss": 0.0001, + "step": 12604 + }, + { + "epoch": 53.88, + "learning_rate": 9.632897238060448e-05, + "loss": 0.0004, + "step": 12608 + }, + { + "epoch": 53.9, + "learning_rate": 9.632458794870831e-05, + "loss": 0.0008, + "step": 12612 + }, + { + "epoch": 53.91, + "learning_rate": 9.632020100003771e-05, + "loss": 0.0008, + "step": 12616 + }, + { + "epoch": 53.93, + "learning_rate": 9.631581153483102e-05, + "loss": 0.0005, + "step": 12620 + }, + { + "epoch": 53.95, + "learning_rate": 9.631141955332673e-05, + "loss": 0.0011, + "step": 12624 + }, + { + "epoch": 53.97, + "learning_rate": 9.630702505576341e-05, + "loss": 0.0003, + "step": 12628 + }, + { + "epoch": 53.98, + "learning_rate": 9.630262804237984e-05, + "loss": 0.0006, + "step": 12632 + }, + { + "epoch": 54.0, + "learning_rate": 9.62982285134149e-05, + "loss": 0.0003, + "step": 12636 + }, + { + "epoch": 54.02, + "learning_rate": 9.629382646910762e-05, + "loss": 0.0003, + "step": 12640 + }, + { + "epoch": 54.03, + "learning_rate": 9.628942190969715e-05, + "loss": 0.0023, + "step": 12644 + }, + { + "epoch": 54.05, + "learning_rate": 9.628501483542278e-05, + "loss": 0.0008, + "step": 12648 + }, + { + "epoch": 54.07, + "learning_rate": 9.628060524652395e-05, + "loss": 0.0004, + "step": 12652 + }, + { + "epoch": 54.09, + "learning_rate": 9.627619314324025e-05, + "loss": 0.0001, + "step": 12656 + }, + { + "epoch": 54.1, + "learning_rate": 9.627177852581135e-05, + "loss": 0.0003, + "step": 12660 + }, + { + "epoch": 54.12, + "learning_rate": 9.626736139447711e-05, + "loss": 0.0003, + "step": 12664 + }, + { + "epoch": 54.14, + "learning_rate": 9.62629417494775e-05, + "loss": 0.0002, + "step": 12668 + }, + { + "epoch": 54.15, + "learning_rate": 9.625851959105265e-05, + "loss": 0.0006, + "step": 12672 + }, + { + "epoch": 54.17, + "learning_rate": 9.625409491944282e-05, + "loss": 0.0021, + "step": 12676 + }, + { + "epoch": 54.19, + "learning_rate": 9.624966773488834e-05, + "loss": 0.0002, + "step": 12680 + }, + { + "epoch": 54.21, + "learning_rate": 9.624523803762982e-05, + "loss": 0.0002, + "step": 12684 + }, + { + "epoch": 54.22, + "learning_rate": 9.624080582790786e-05, + "loss": 0.0003, + "step": 12688 + }, + { + "epoch": 54.24, + "learning_rate": 9.623637110596328e-05, + "loss": 0.0006, + "step": 12692 + }, + { + "epoch": 54.26, + "learning_rate": 9.623193387203701e-05, + "loss": 0.0001, + "step": 12696 + }, + { + "epoch": 54.27, + "learning_rate": 9.622749412637014e-05, + "loss": 0.0001, + "step": 12700 + }, + { + "epoch": 54.29, + "learning_rate": 9.622305186920384e-05, + "loss": 0.0003, + "step": 12704 + }, + { + "epoch": 54.31, + "learning_rate": 9.621860710077949e-05, + "loss": 0.0013, + "step": 12708 + }, + { + "epoch": 54.32, + "learning_rate": 9.621415982133855e-05, + "loss": 0.0003, + "step": 12712 + }, + { + "epoch": 54.34, + "learning_rate": 9.620971003112263e-05, + "loss": 0.0003, + "step": 12716 + }, + { + "epoch": 54.36, + "learning_rate": 9.62052577303735e-05, + "loss": 0.0001, + "step": 12720 + }, + { + "epoch": 54.38, + "learning_rate": 9.620080291933307e-05, + "loss": 0.0009, + "step": 12724 + }, + { + "epoch": 54.39, + "learning_rate": 9.619634559824331e-05, + "loss": 0.0008, + "step": 12728 + }, + { + "epoch": 54.41, + "learning_rate": 9.619188576734642e-05, + "loss": 0.0001, + "step": 12732 + }, + { + "epoch": 54.43, + "learning_rate": 9.618742342688469e-05, + "loss": 0.0011, + "step": 12736 + }, + { + "epoch": 54.44, + "learning_rate": 9.618295857710057e-05, + "loss": 0.002, + "step": 12740 + }, + { + "epoch": 54.46, + "learning_rate": 9.61784912182366e-05, + "loss": 0.0002, + "step": 12744 + }, + { + "epoch": 54.48, + "learning_rate": 9.617402135053552e-05, + "loss": 0.0001, + "step": 12748 + }, + { + "epoch": 54.5, + "learning_rate": 9.616954897424015e-05, + "loss": 0.0009, + "step": 12752 + }, + { + "epoch": 54.51, + "learning_rate": 9.61650740895935e-05, + "loss": 0.0002, + "step": 12756 + }, + { + "epoch": 54.53, + "learning_rate": 9.616059669683864e-05, + "loss": 0.0002, + "step": 12760 + }, + { + "epoch": 54.55, + "learning_rate": 9.615611679621887e-05, + "loss": 0.0007, + "step": 12764 + }, + { + "epoch": 54.56, + "learning_rate": 9.615163438797756e-05, + "loss": 0.0003, + "step": 12768 + }, + { + "epoch": 54.58, + "learning_rate": 9.614714947235821e-05, + "loss": 0.0008, + "step": 12772 + }, + { + "epoch": 54.6, + "learning_rate": 9.614266204960453e-05, + "loss": 0.0005, + "step": 12776 + }, + { + "epoch": 54.62, + "learning_rate": 9.613817211996027e-05, + "loss": 0.0013, + "step": 12780 + }, + { + "epoch": 54.63, + "learning_rate": 9.613367968366941e-05, + "loss": 0.0002, + "step": 12784 + }, + { + "epoch": 54.65, + "learning_rate": 9.612918474097599e-05, + "loss": 0.0007, + "step": 12788 + }, + { + "epoch": 54.67, + "learning_rate": 9.612468729212423e-05, + "loss": 0.0006, + "step": 12792 + }, + { + "epoch": 54.68, + "learning_rate": 9.612018733735844e-05, + "loss": 0.0001, + "step": 12796 + }, + { + "epoch": 54.7, + "learning_rate": 9.611568487692316e-05, + "loss": 0.0004, + "step": 12800 + }, + { + "epoch": 54.72, + "learning_rate": 9.611117991106295e-05, + "loss": 0.0002, + "step": 12804 + }, + { + "epoch": 54.74, + "learning_rate": 9.610667244002259e-05, + "loss": 0.0006, + "step": 12808 + }, + { + "epoch": 54.75, + "learning_rate": 9.610216246404694e-05, + "loss": 0.0022, + "step": 12812 + }, + { + "epoch": 54.77, + "learning_rate": 9.609764998338105e-05, + "loss": 0.0012, + "step": 12816 + }, + { + "epoch": 54.79, + "learning_rate": 9.609313499827008e-05, + "loss": 0.0018, + "step": 12820 + }, + { + "epoch": 54.8, + "learning_rate": 9.60886175089593e-05, + "loss": 0.0006, + "step": 12824 + }, + { + "epoch": 54.82, + "learning_rate": 9.608409751569417e-05, + "loss": 0.0012, + "step": 12828 + }, + { + "epoch": 54.84, + "learning_rate": 9.607957501872022e-05, + "loss": 0.0001, + "step": 12832 + }, + { + "epoch": 54.85, + "learning_rate": 9.607505001828321e-05, + "loss": 0.0002, + "step": 12836 + }, + { + "epoch": 54.87, + "learning_rate": 9.607052251462892e-05, + "loss": 0.0004, + "step": 12840 + }, + { + "epoch": 54.89, + "learning_rate": 9.606599250800336e-05, + "loss": 0.0002, + "step": 12844 + }, + { + "epoch": 54.91, + "learning_rate": 9.606145999865264e-05, + "loss": 0.0028, + "step": 12848 + }, + { + "epoch": 54.92, + "learning_rate": 9.6056924986823e-05, + "loss": 0.0002, + "step": 12852 + }, + { + "epoch": 54.94, + "learning_rate": 9.60523874727608e-05, + "loss": 0.0002, + "step": 12856 + }, + { + "epoch": 54.96, + "learning_rate": 9.60478474567126e-05, + "loss": 0.0014, + "step": 12860 + }, + { + "epoch": 54.97, + "learning_rate": 9.604330493892504e-05, + "loss": 0.0003, + "step": 12864 + }, + { + "epoch": 54.99, + "learning_rate": 9.60387599196449e-05, + "loss": 0.0014, + "step": 12868 + }, + { + "epoch": 55.01, + "learning_rate": 9.603421239911912e-05, + "loss": 0.0002, + "step": 12872 + }, + { + "epoch": 55.03, + "learning_rate": 9.602966237759475e-05, + "loss": 0.0001, + "step": 12876 + }, + { + "epoch": 55.04, + "learning_rate": 9.6025109855319e-05, + "loss": 0.0002, + "step": 12880 + }, + { + "epoch": 55.06, + "learning_rate": 9.602055483253923e-05, + "loss": 0.0002, + "step": 12884 + }, + { + "epoch": 55.08, + "learning_rate": 9.601599730950284e-05, + "loss": 0.0005, + "step": 12888 + }, + { + "epoch": 55.09, + "learning_rate": 9.601143728645749e-05, + "loss": 0.0013, + "step": 12892 + }, + { + "epoch": 55.11, + "learning_rate": 9.600687476365092e-05, + "loss": 0.0005, + "step": 12896 + }, + { + "epoch": 55.13, + "learning_rate": 9.600230974133099e-05, + "loss": 0.0002, + "step": 12900 + }, + { + "epoch": 55.15, + "learning_rate": 9.599774221974573e-05, + "loss": 0.0003, + "step": 12904 + }, + { + "epoch": 55.16, + "learning_rate": 9.599317219914328e-05, + "loss": 0.0005, + "step": 12908 + }, + { + "epoch": 55.18, + "learning_rate": 9.598859967977193e-05, + "loss": 0.002, + "step": 12912 + }, + { + "epoch": 55.2, + "learning_rate": 9.598402466188009e-05, + "loss": 0.0008, + "step": 12916 + }, + { + "epoch": 55.21, + "learning_rate": 9.597944714571631e-05, + "loss": 0.0002, + "step": 12920 + }, + { + "epoch": 55.23, + "learning_rate": 9.597486713152933e-05, + "loss": 0.0002, + "step": 12924 + }, + { + "epoch": 55.25, + "learning_rate": 9.597028461956792e-05, + "loss": 0.0011, + "step": 12928 + }, + { + "epoch": 55.26, + "learning_rate": 9.596569961008108e-05, + "loss": 0.0002, + "step": 12932 + }, + { + "epoch": 55.28, + "learning_rate": 9.596111210331788e-05, + "loss": 0.0002, + "step": 12936 + }, + { + "epoch": 55.3, + "learning_rate": 9.59565220995276e-05, + "loss": 0.0016, + "step": 12940 + }, + { + "epoch": 55.32, + "learning_rate": 9.595192959895958e-05, + "loss": 0.0009, + "step": 12944 + }, + { + "epoch": 55.33, + "learning_rate": 9.594733460186331e-05, + "loss": 0.0012, + "step": 12948 + }, + { + "epoch": 55.35, + "learning_rate": 9.594273710848846e-05, + "loss": 0.0018, + "step": 12952 + }, + { + "epoch": 55.37, + "learning_rate": 9.593813711908482e-05, + "loss": 0.001, + "step": 12956 + }, + { + "epoch": 55.38, + "learning_rate": 9.593353463390227e-05, + "loss": 0.0003, + "step": 12960 + }, + { + "epoch": 55.4, + "learning_rate": 9.592892965319088e-05, + "loss": 0.0011, + "step": 12964 + }, + { + "epoch": 55.42, + "learning_rate": 9.592432217720082e-05, + "loss": 0.0001, + "step": 12968 + }, + { + "epoch": 55.44, + "learning_rate": 9.59197122061824e-05, + "loss": 0.0009, + "step": 12972 + }, + { + "epoch": 55.45, + "learning_rate": 9.591509974038612e-05, + "loss": 0.0019, + "step": 12976 + }, + { + "epoch": 55.47, + "learning_rate": 9.591048478006255e-05, + "loss": 0.001, + "step": 12980 + }, + { + "epoch": 55.49, + "learning_rate": 9.590586732546241e-05, + "loss": 0.0003, + "step": 12984 + }, + { + "epoch": 55.5, + "learning_rate": 9.590124737683655e-05, + "loss": 0.0017, + "step": 12988 + }, + { + "epoch": 55.52, + "learning_rate": 9.589662493443599e-05, + "loss": 0.0005, + "step": 12992 + }, + { + "epoch": 55.54, + "learning_rate": 9.589199999851183e-05, + "loss": 0.0007, + "step": 12996 + }, + { + "epoch": 55.56, + "learning_rate": 9.58873725693154e-05, + "loss": 0.0002, + "step": 13000 + }, + { + "epoch": 55.56, + "eval_exact_match": 0.5145530145530145, + "eval_loss": 0.8539115190505981, + "eval_runtime": 141.8733, + "eval_samples_per_second": 6.781, + "step": 13000 + }, + { + "epoch": 55.57, + "learning_rate": 9.588274264709806e-05, + "loss": 0.0008, + "step": 13004 + }, + { + "epoch": 55.59, + "learning_rate": 9.587811023211135e-05, + "loss": 0.0002, + "step": 13008 + }, + { + "epoch": 55.61, + "learning_rate": 9.587347532460695e-05, + "loss": 0.0002, + "step": 13012 + }, + { + "epoch": 55.62, + "learning_rate": 9.586883792483668e-05, + "loss": 0.0003, + "step": 13016 + }, + { + "epoch": 55.64, + "learning_rate": 9.586419803305248e-05, + "loss": 0.0012, + "step": 13020 + }, + { + "epoch": 55.66, + "learning_rate": 9.585955564950643e-05, + "loss": 0.0015, + "step": 13024 + }, + { + "epoch": 55.68, + "learning_rate": 9.585491077445075e-05, + "loss": 0.0005, + "step": 13028 + }, + { + "epoch": 55.69, + "learning_rate": 9.585026340813776e-05, + "loss": 0.0013, + "step": 13032 + }, + { + "epoch": 55.71, + "learning_rate": 9.584561355082001e-05, + "loss": 0.0002, + "step": 13036 + }, + { + "epoch": 55.73, + "learning_rate": 9.584096120275007e-05, + "loss": 0.001, + "step": 13040 + }, + { + "epoch": 55.74, + "learning_rate": 9.583630636418074e-05, + "loss": 0.0005, + "step": 13044 + }, + { + "epoch": 55.76, + "learning_rate": 9.583164903536488e-05, + "loss": 0.0005, + "step": 13048 + }, + { + "epoch": 55.78, + "learning_rate": 9.582698921655551e-05, + "loss": 0.0014, + "step": 13052 + }, + { + "epoch": 55.79, + "learning_rate": 9.582232690800584e-05, + "loss": 0.0001, + "step": 13056 + }, + { + "epoch": 55.81, + "learning_rate": 9.581766210996911e-05, + "loss": 0.0007, + "step": 13060 + }, + { + "epoch": 55.83, + "learning_rate": 9.581299482269882e-05, + "loss": 0.0001, + "step": 13064 + }, + { + "epoch": 55.85, + "learning_rate": 9.58083250464485e-05, + "loss": 0.0005, + "step": 13068 + }, + { + "epoch": 55.86, + "learning_rate": 9.580365278147185e-05, + "loss": 0.0008, + "step": 13072 + }, + { + "epoch": 55.88, + "learning_rate": 9.579897802802273e-05, + "loss": 0.0006, + "step": 13076 + }, + { + "epoch": 55.9, + "learning_rate": 9.579430078635509e-05, + "loss": 0.0004, + "step": 13080 + }, + { + "epoch": 55.91, + "learning_rate": 9.578962105672307e-05, + "loss": 0.0019, + "step": 13084 + }, + { + "epoch": 55.93, + "learning_rate": 9.57849388393809e-05, + "loss": 0.0005, + "step": 13088 + }, + { + "epoch": 55.95, + "learning_rate": 9.578025413458296e-05, + "loss": 0.0002, + "step": 13092 + }, + { + "epoch": 55.97, + "learning_rate": 9.577556694258378e-05, + "loss": 0.0002, + "step": 13096 + }, + { + "epoch": 55.98, + "learning_rate": 9.577087726363798e-05, + "loss": 0.0028, + "step": 13100 + }, + { + "epoch": 56.0, + "learning_rate": 9.57661850980004e-05, + "loss": 0.0002, + "step": 13104 + }, + { + "epoch": 56.02, + "learning_rate": 9.576149044592589e-05, + "loss": 0.0006, + "step": 13108 + }, + { + "epoch": 56.03, + "learning_rate": 9.575679330766955e-05, + "loss": 0.0017, + "step": 13112 + }, + { + "epoch": 56.05, + "learning_rate": 9.575209368348659e-05, + "loss": 0.0006, + "step": 13116 + }, + { + "epoch": 56.07, + "learning_rate": 9.57473915736323e-05, + "loss": 0.0005, + "step": 13120 + }, + { + "epoch": 56.09, + "learning_rate": 9.574268697836216e-05, + "loss": 0.0003, + "step": 13124 + }, + { + "epoch": 56.1, + "learning_rate": 9.573797989793174e-05, + "loss": 0.0005, + "step": 13128 + }, + { + "epoch": 56.12, + "learning_rate": 9.57332703325968e-05, + "loss": 0.0001, + "step": 13132 + }, + { + "epoch": 56.14, + "learning_rate": 9.57285582826132e-05, + "loss": 0.0011, + "step": 13136 + }, + { + "epoch": 56.15, + "learning_rate": 9.572384374823695e-05, + "loss": 0.0011, + "step": 13140 + }, + { + "epoch": 56.17, + "learning_rate": 9.571912672972416e-05, + "loss": 0.0019, + "step": 13144 + }, + { + "epoch": 56.19, + "learning_rate": 9.571440722733112e-05, + "loss": 0.0002, + "step": 13148 + }, + { + "epoch": 56.21, + "learning_rate": 9.570968524131423e-05, + "loss": 0.002, + "step": 13152 + }, + { + "epoch": 56.22, + "learning_rate": 9.570496077193005e-05, + "loss": 0.0003, + "step": 13156 + }, + { + "epoch": 56.24, + "learning_rate": 9.570023381943523e-05, + "loss": 0.001, + "step": 13160 + }, + { + "epoch": 56.26, + "learning_rate": 9.56955043840866e-05, + "loss": 0.0004, + "step": 13164 + }, + { + "epoch": 56.27, + "learning_rate": 9.56907724661411e-05, + "loss": 0.0009, + "step": 13168 + }, + { + "epoch": 56.29, + "learning_rate": 9.568603806585581e-05, + "loss": 0.0002, + "step": 13172 + }, + { + "epoch": 56.31, + "learning_rate": 9.568130118348793e-05, + "loss": 0.0009, + "step": 13176 + }, + { + "epoch": 56.32, + "learning_rate": 9.567656181929485e-05, + "loss": 0.0005, + "step": 13180 + }, + { + "epoch": 56.34, + "learning_rate": 9.567181997353403e-05, + "loss": 0.0015, + "step": 13184 + }, + { + "epoch": 56.36, + "learning_rate": 9.566707564646307e-05, + "loss": 0.0003, + "step": 13188 + }, + { + "epoch": 56.38, + "learning_rate": 9.566232883833977e-05, + "loss": 0.0001, + "step": 13192 + }, + { + "epoch": 56.39, + "learning_rate": 9.565757954942198e-05, + "loss": 0.0012, + "step": 13196 + }, + { + "epoch": 56.41, + "learning_rate": 9.565282777996775e-05, + "loss": 0.0071, + "step": 13200 + }, + { + "epoch": 56.43, + "learning_rate": 9.564807353023523e-05, + "loss": 0.0007, + "step": 13204 + }, + { + "epoch": 56.44, + "learning_rate": 9.564331680048273e-05, + "loss": 0.0008, + "step": 13208 + }, + { + "epoch": 56.46, + "learning_rate": 9.563855759096864e-05, + "loss": 0.0009, + "step": 13212 + }, + { + "epoch": 56.48, + "learning_rate": 9.563379590195157e-05, + "loss": 0.0002, + "step": 13216 + }, + { + "epoch": 56.5, + "learning_rate": 9.56290317336902e-05, + "loss": 0.001, + "step": 13220 + }, + { + "epoch": 56.51, + "learning_rate": 9.562426508644336e-05, + "loss": 0.0009, + "step": 13224 + }, + { + "epoch": 56.53, + "learning_rate": 9.561949596046999e-05, + "loss": 0.0003, + "step": 13228 + }, + { + "epoch": 56.55, + "learning_rate": 9.561472435602924e-05, + "loss": 0.0023, + "step": 13232 + }, + { + "epoch": 56.56, + "learning_rate": 9.560995027338033e-05, + "loss": 0.0003, + "step": 13236 + }, + { + "epoch": 56.58, + "learning_rate": 9.560517371278262e-05, + "loss": 0.0014, + "step": 13240 + }, + { + "epoch": 56.6, + "learning_rate": 9.560039467449563e-05, + "loss": 0.0001, + "step": 13244 + }, + { + "epoch": 56.62, + "learning_rate": 9.5595613158779e-05, + "loss": 0.0003, + "step": 13248 + }, + { + "epoch": 56.63, + "learning_rate": 9.55908291658925e-05, + "loss": 0.0009, + "step": 13252 + }, + { + "epoch": 56.65, + "learning_rate": 9.558604269609604e-05, + "loss": 0.0009, + "step": 13256 + }, + { + "epoch": 56.67, + "learning_rate": 9.558125374964967e-05, + "loss": 0.001, + "step": 13260 + }, + { + "epoch": 56.68, + "learning_rate": 9.557646232681356e-05, + "loss": 0.0001, + "step": 13264 + }, + { + "epoch": 56.7, + "learning_rate": 9.557166842784804e-05, + "loss": 0.0002, + "step": 13268 + }, + { + "epoch": 56.72, + "learning_rate": 9.556687205301354e-05, + "loss": 0.0011, + "step": 13272 + }, + { + "epoch": 56.74, + "learning_rate": 9.556207320257065e-05, + "loss": 0.0011, + "step": 13276 + }, + { + "epoch": 56.75, + "learning_rate": 9.55572718767801e-05, + "loss": 0.0009, + "step": 13280 + }, + { + "epoch": 56.77, + "learning_rate": 9.555246807590273e-05, + "loss": 0.0006, + "step": 13284 + }, + { + "epoch": 56.79, + "learning_rate": 9.554766180019952e-05, + "loss": 0.0018, + "step": 13288 + }, + { + "epoch": 56.8, + "learning_rate": 9.55428530499316e-05, + "loss": 0.0004, + "step": 13292 + }, + { + "epoch": 56.82, + "learning_rate": 9.553804182536021e-05, + "loss": 0.0015, + "step": 13296 + }, + { + "epoch": 56.84, + "learning_rate": 9.553322812674676e-05, + "loss": 0.0004, + "step": 13300 + }, + { + "epoch": 56.85, + "learning_rate": 9.552841195435278e-05, + "loss": 0.0004, + "step": 13304 + }, + { + "epoch": 56.87, + "learning_rate": 9.552359330843991e-05, + "loss": 0.0004, + "step": 13308 + }, + { + "epoch": 56.89, + "learning_rate": 9.551877218926994e-05, + "loss": 0.0014, + "step": 13312 + }, + { + "epoch": 56.91, + "learning_rate": 9.55139485971048e-05, + "loss": 0.0002, + "step": 13316 + }, + { + "epoch": 56.92, + "learning_rate": 9.550912253220656e-05, + "loss": 0.0017, + "step": 13320 + }, + { + "epoch": 56.94, + "learning_rate": 9.550429399483743e-05, + "loss": 0.0005, + "step": 13324 + }, + { + "epoch": 56.96, + "learning_rate": 9.549946298525971e-05, + "loss": 0.0004, + "step": 13328 + }, + { + "epoch": 56.97, + "learning_rate": 9.549462950373587e-05, + "loss": 0.001, + "step": 13332 + }, + { + "epoch": 56.99, + "learning_rate": 9.548979355052853e-05, + "loss": 0.0013, + "step": 13336 + }, + { + "epoch": 57.01, + "learning_rate": 9.54849551259004e-05, + "loss": 0.0002, + "step": 13340 + }, + { + "epoch": 57.03, + "learning_rate": 9.548011423011435e-05, + "loss": 0.0002, + "step": 13344 + }, + { + "epoch": 57.04, + "learning_rate": 9.54752708634334e-05, + "loss": 0.0002, + "step": 13348 + }, + { + "epoch": 57.06, + "learning_rate": 9.547042502612067e-05, + "loss": 0.0004, + "step": 13352 + }, + { + "epoch": 57.08, + "learning_rate": 9.546557671843944e-05, + "loss": 0.0008, + "step": 13356 + }, + { + "epoch": 57.09, + "learning_rate": 9.54607259406531e-05, + "loss": 0.001, + "step": 13360 + }, + { + "epoch": 57.11, + "learning_rate": 9.545587269302521e-05, + "loss": 0.0007, + "step": 13364 + }, + { + "epoch": 57.13, + "learning_rate": 9.545101697581942e-05, + "loss": 0.0003, + "step": 13368 + }, + { + "epoch": 57.15, + "learning_rate": 9.544615878929955e-05, + "loss": 0.0014, + "step": 13372 + }, + { + "epoch": 57.16, + "learning_rate": 9.544129813372953e-05, + "loss": 0.0009, + "step": 13376 + }, + { + "epoch": 57.18, + "learning_rate": 9.543643500937345e-05, + "loss": 0.0004, + "step": 13380 + }, + { + "epoch": 57.2, + "learning_rate": 9.54315694164955e-05, + "loss": 0.0001, + "step": 13384 + }, + { + "epoch": 57.21, + "learning_rate": 9.542670135536005e-05, + "loss": 0.0054, + "step": 13388 + }, + { + "epoch": 57.23, + "learning_rate": 9.542183082623155e-05, + "loss": 0.0005, + "step": 13392 + }, + { + "epoch": 57.25, + "learning_rate": 9.541695782937463e-05, + "loss": 0.0027, + "step": 13396 + }, + { + "epoch": 57.26, + "learning_rate": 9.541208236505403e-05, + "loss": 0.0008, + "step": 13400 + }, + { + "epoch": 57.28, + "learning_rate": 9.540720443353464e-05, + "loss": 0.001, + "step": 13404 + }, + { + "epoch": 57.3, + "learning_rate": 9.540232403508146e-05, + "loss": 0.0014, + "step": 13408 + }, + { + "epoch": 57.32, + "learning_rate": 9.539744116995964e-05, + "loss": 0.0016, + "step": 13412 + }, + { + "epoch": 57.33, + "learning_rate": 9.539255583843445e-05, + "loss": 0.0004, + "step": 13416 + }, + { + "epoch": 57.35, + "learning_rate": 9.538766804077134e-05, + "loss": 0.0004, + "step": 13420 + }, + { + "epoch": 57.37, + "learning_rate": 9.538277777723583e-05, + "loss": 0.0001, + "step": 13424 + }, + { + "epoch": 57.38, + "learning_rate": 9.53778850480936e-05, + "loss": 0.0002, + "step": 13428 + }, + { + "epoch": 57.4, + "learning_rate": 9.537298985361049e-05, + "loss": 0.0002, + "step": 13432 + }, + { + "epoch": 57.42, + "learning_rate": 9.536809219405245e-05, + "loss": 0.0015, + "step": 13436 + }, + { + "epoch": 57.44, + "learning_rate": 9.536319206968556e-05, + "loss": 0.0014, + "step": 13440 + }, + { + "epoch": 57.45, + "learning_rate": 9.535828948077602e-05, + "loss": 0.0006, + "step": 13444 + }, + { + "epoch": 57.47, + "learning_rate": 9.53533844275902e-05, + "loss": 0.0005, + "step": 13448 + }, + { + "epoch": 57.49, + "learning_rate": 9.534847691039461e-05, + "loss": 0.0007, + "step": 13452 + }, + { + "epoch": 57.5, + "learning_rate": 9.534356692945583e-05, + "loss": 0.0002, + "step": 13456 + }, + { + "epoch": 57.52, + "learning_rate": 9.533865448504064e-05, + "loss": 0.0002, + "step": 13460 + }, + { + "epoch": 57.54, + "learning_rate": 9.533373957741594e-05, + "loss": 0.0012, + "step": 13464 + }, + { + "epoch": 57.56, + "learning_rate": 9.532882220684871e-05, + "loss": 0.0016, + "step": 13468 + }, + { + "epoch": 57.57, + "learning_rate": 9.532390237360613e-05, + "loss": 0.0004, + "step": 13472 + }, + { + "epoch": 57.59, + "learning_rate": 9.531898007795552e-05, + "loss": 0.0001, + "step": 13476 + }, + { + "epoch": 57.61, + "learning_rate": 9.531405532016424e-05, + "loss": 0.0002, + "step": 13480 + }, + { + "epoch": 57.62, + "learning_rate": 9.53091281004999e-05, + "loss": 0.0002, + "step": 13484 + }, + { + "epoch": 57.64, + "learning_rate": 9.530419841923018e-05, + "loss": 0.0002, + "step": 13488 + }, + { + "epoch": 57.66, + "learning_rate": 9.52992662766229e-05, + "loss": 0.001, + "step": 13492 + }, + { + "epoch": 57.68, + "learning_rate": 9.529433167294601e-05, + "loss": 0.0024, + "step": 13496 + }, + { + "epoch": 57.69, + "learning_rate": 9.528939460846764e-05, + "loss": 0.0004, + "step": 13500 + }, + { + "epoch": 57.71, + "learning_rate": 9.528445508345596e-05, + "loss": 0.0003, + "step": 13504 + }, + { + "epoch": 57.73, + "learning_rate": 9.527951309817936e-05, + "loss": 0.0004, + "step": 13508 + }, + { + "epoch": 57.74, + "learning_rate": 9.527456865290633e-05, + "loss": 0.0018, + "step": 13512 + }, + { + "epoch": 57.76, + "learning_rate": 9.526962174790552e-05, + "loss": 0.0003, + "step": 13516 + }, + { + "epoch": 57.78, + "learning_rate": 9.526467238344566e-05, + "loss": 0.0001, + "step": 13520 + }, + { + "epoch": 57.79, + "learning_rate": 9.525972055979566e-05, + "loss": 0.0003, + "step": 13524 + }, + { + "epoch": 57.81, + "learning_rate": 9.525476627722454e-05, + "loss": 0.0001, + "step": 13528 + }, + { + "epoch": 57.83, + "learning_rate": 9.524980953600146e-05, + "loss": 0.0003, + "step": 13532 + }, + { + "epoch": 57.85, + "learning_rate": 9.524485033639574e-05, + "loss": 0.0043, + "step": 13536 + }, + { + "epoch": 57.86, + "learning_rate": 9.523988867867676e-05, + "loss": 0.0001, + "step": 13540 + }, + { + "epoch": 57.88, + "learning_rate": 9.523492456311414e-05, + "loss": 0.0002, + "step": 13544 + }, + { + "epoch": 57.9, + "learning_rate": 9.522995798997754e-05, + "loss": 0.0003, + "step": 13548 + }, + { + "epoch": 57.91, + "learning_rate": 9.52249889595368e-05, + "loss": 0.0023, + "step": 13552 + }, + { + "epoch": 57.93, + "learning_rate": 9.522001747206186e-05, + "loss": 0.0001, + "step": 13556 + }, + { + "epoch": 57.95, + "learning_rate": 9.521504352782287e-05, + "loss": 0.0009, + "step": 13560 + }, + { + "epoch": 57.97, + "learning_rate": 9.521006712709001e-05, + "loss": 0.0022, + "step": 13564 + }, + { + "epoch": 57.98, + "learning_rate": 9.520508827013366e-05, + "loss": 0.0015, + "step": 13568 + }, + { + "epoch": 58.0, + "learning_rate": 9.520010695722432e-05, + "loss": 0.0005, + "step": 13572 + }, + { + "epoch": 58.02, + "learning_rate": 9.519512318863263e-05, + "loss": 0.0001, + "step": 13576 + }, + { + "epoch": 58.03, + "learning_rate": 9.519013696462934e-05, + "loss": 0.0006, + "step": 13580 + }, + { + "epoch": 58.05, + "learning_rate": 9.518514828548535e-05, + "loss": 0.0002, + "step": 13584 + }, + { + "epoch": 58.07, + "learning_rate": 9.518015715147168e-05, + "loss": 0.0044, + "step": 13588 + }, + { + "epoch": 58.09, + "learning_rate": 9.517516356285951e-05, + "loss": 0.0003, + "step": 13592 + }, + { + "epoch": 58.1, + "learning_rate": 9.517016751992013e-05, + "loss": 0.0006, + "step": 13596 + }, + { + "epoch": 58.12, + "learning_rate": 9.516516902292498e-05, + "loss": 0.0052, + "step": 13600 + }, + { + "epoch": 58.14, + "learning_rate": 9.51601680721456e-05, + "loss": 0.0003, + "step": 13604 + }, + { + "epoch": 58.15, + "learning_rate": 9.515516466785373e-05, + "loss": 0.0003, + "step": 13608 + }, + { + "epoch": 58.17, + "learning_rate": 9.515015881032116e-05, + "loss": 0.0005, + "step": 13612 + }, + { + "epoch": 58.19, + "learning_rate": 9.514515049981987e-05, + "loss": 0.0002, + "step": 13616 + }, + { + "epoch": 58.21, + "learning_rate": 9.514013973662195e-05, + "loss": 0.0002, + "step": 13620 + }, + { + "epoch": 58.22, + "learning_rate": 9.513512652099963e-05, + "loss": 0.0006, + "step": 13624 + }, + { + "epoch": 58.24, + "learning_rate": 9.51301108532253e-05, + "loss": 0.0015, + "step": 13628 + }, + { + "epoch": 58.26, + "learning_rate": 9.512509273357141e-05, + "loss": 0.0006, + "step": 13632 + }, + { + "epoch": 58.27, + "learning_rate": 9.512007216231063e-05, + "loss": 0.0003, + "step": 13636 + }, + { + "epoch": 58.29, + "learning_rate": 9.511504913971572e-05, + "loss": 0.0006, + "step": 13640 + }, + { + "epoch": 58.31, + "learning_rate": 9.511002366605954e-05, + "loss": 0.0003, + "step": 13644 + }, + { + "epoch": 58.32, + "learning_rate": 9.510499574161517e-05, + "loss": 0.0002, + "step": 13648 + }, + { + "epoch": 58.34, + "learning_rate": 9.509996536665573e-05, + "loss": 0.0019, + "step": 13652 + }, + { + "epoch": 58.36, + "learning_rate": 9.509493254145455e-05, + "loss": 0.0013, + "step": 13656 + }, + { + "epoch": 58.38, + "learning_rate": 9.508989726628503e-05, + "loss": 0.0002, + "step": 13660 + }, + { + "epoch": 58.39, + "learning_rate": 9.508485954142076e-05, + "loss": 0.0001, + "step": 13664 + }, + { + "epoch": 58.41, + "learning_rate": 9.507981936713541e-05, + "loss": 0.0002, + "step": 13668 + }, + { + "epoch": 58.43, + "learning_rate": 9.507477674370282e-05, + "loss": 0.0004, + "step": 13672 + }, + { + "epoch": 58.44, + "learning_rate": 9.506973167139694e-05, + "loss": 0.0002, + "step": 13676 + }, + { + "epoch": 58.46, + "learning_rate": 9.50646841504919e-05, + "loss": 0.0006, + "step": 13680 + }, + { + "epoch": 58.48, + "learning_rate": 9.505963418126188e-05, + "loss": 0.0001, + "step": 13684 + }, + { + "epoch": 58.5, + "learning_rate": 9.505458176398128e-05, + "loss": 0.0004, + "step": 13688 + }, + { + "epoch": 58.51, + "learning_rate": 9.504952689892456e-05, + "loss": 0.0009, + "step": 13692 + }, + { + "epoch": 58.53, + "learning_rate": 9.504446958636635e-05, + "loss": 0.0003, + "step": 13696 + }, + { + "epoch": 58.55, + "learning_rate": 9.503940982658145e-05, + "loss": 0.0006, + "step": 13700 + }, + { + "epoch": 58.56, + "learning_rate": 9.50343476198447e-05, + "loss": 0.0001, + "step": 13704 + }, + { + "epoch": 58.58, + "learning_rate": 9.502928296643117e-05, + "loss": 0.0003, + "step": 13708 + }, + { + "epoch": 58.6, + "learning_rate": 9.502421586661598e-05, + "loss": 0.0004, + "step": 13712 + }, + { + "epoch": 58.62, + "learning_rate": 9.501914632067446e-05, + "loss": 0.0009, + "step": 13716 + }, + { + "epoch": 58.63, + "learning_rate": 9.501407432888199e-05, + "loss": 0.0002, + "step": 13720 + }, + { + "epoch": 58.65, + "learning_rate": 9.500899989151416e-05, + "loss": 0.0003, + "step": 13724 + }, + { + "epoch": 58.67, + "learning_rate": 9.500392300884665e-05, + "loss": 0.0002, + "step": 13728 + }, + { + "epoch": 58.68, + "learning_rate": 9.499884368115528e-05, + "loss": 0.0002, + "step": 13732 + }, + { + "epoch": 58.7, + "learning_rate": 9.4993761908716e-05, + "loss": 0.0002, + "step": 13736 + }, + { + "epoch": 58.72, + "learning_rate": 9.498867769180491e-05, + "loss": 0.0001, + "step": 13740 + }, + { + "epoch": 58.74, + "learning_rate": 9.498359103069823e-05, + "loss": 0.0001, + "step": 13744 + }, + { + "epoch": 58.75, + "learning_rate": 9.49785019256723e-05, + "loss": 0.0005, + "step": 13748 + }, + { + "epoch": 58.77, + "learning_rate": 9.497341037700363e-05, + "loss": 0.0006, + "step": 13752 + }, + { + "epoch": 58.79, + "learning_rate": 9.496831638496883e-05, + "loss": 0.0017, + "step": 13756 + }, + { + "epoch": 58.8, + "learning_rate": 9.496321994984463e-05, + "loss": 0.0002, + "step": 13760 + }, + { + "epoch": 58.82, + "learning_rate": 9.495812107190795e-05, + "loss": 0.0007, + "step": 13764 + }, + { + "epoch": 58.84, + "learning_rate": 9.495301975143578e-05, + "loss": 0.0004, + "step": 13768 + }, + { + "epoch": 58.85, + "learning_rate": 9.494791598870528e-05, + "loss": 0.0016, + "step": 13772 + }, + { + "epoch": 58.87, + "learning_rate": 9.494280978399374e-05, + "loss": 0.0001, + "step": 13776 + }, + { + "epoch": 58.89, + "learning_rate": 9.493770113757855e-05, + "loss": 0.0004, + "step": 13780 + }, + { + "epoch": 58.91, + "learning_rate": 9.493259004973732e-05, + "loss": 0.0002, + "step": 13784 + }, + { + "epoch": 58.92, + "learning_rate": 9.492747652074766e-05, + "loss": 0.0003, + "step": 13788 + }, + { + "epoch": 58.94, + "learning_rate": 9.492236055088741e-05, + "loss": 0.0019, + "step": 13792 + }, + { + "epoch": 58.96, + "learning_rate": 9.491724214043453e-05, + "loss": 0.0003, + "step": 13796 + }, + { + "epoch": 58.97, + "learning_rate": 9.491212128966709e-05, + "loss": 0.0005, + "step": 13800 + }, + { + "epoch": 58.99, + "learning_rate": 9.49069979988633e-05, + "loss": 0.0002, + "step": 13804 + }, + { + "epoch": 59.01, + "learning_rate": 9.490187226830148e-05, + "loss": 0.0001, + "step": 13808 + }, + { + "epoch": 59.03, + "learning_rate": 9.489674409826015e-05, + "loss": 0.0018, + "step": 13812 + }, + { + "epoch": 59.04, + "learning_rate": 9.48916134890179e-05, + "loss": 0.0022, + "step": 13816 + }, + { + "epoch": 59.06, + "learning_rate": 9.488648044085345e-05, + "loss": 0.0004, + "step": 13820 + }, + { + "epoch": 59.08, + "learning_rate": 9.488134495404572e-05, + "loss": 0.0002, + "step": 13824 + }, + { + "epoch": 59.09, + "learning_rate": 9.487620702887368e-05, + "loss": 0.0005, + "step": 13828 + }, + { + "epoch": 59.11, + "learning_rate": 9.487106666561647e-05, + "loss": 0.0006, + "step": 13832 + }, + { + "epoch": 59.13, + "learning_rate": 9.486592386455338e-05, + "loss": 0.0002, + "step": 13836 + }, + { + "epoch": 59.15, + "learning_rate": 9.486077862596379e-05, + "loss": 0.0003, + "step": 13840 + }, + { + "epoch": 59.16, + "learning_rate": 9.485563095012727e-05, + "loss": 0.0006, + "step": 13844 + }, + { + "epoch": 59.18, + "learning_rate": 9.485048083732346e-05, + "loss": 0.0017, + "step": 13848 + }, + { + "epoch": 59.2, + "learning_rate": 9.484532828783217e-05, + "loss": 0.0001, + "step": 13852 + }, + { + "epoch": 59.21, + "learning_rate": 9.484017330193333e-05, + "loss": 0.0005, + "step": 13856 + }, + { + "epoch": 59.23, + "learning_rate": 9.483501587990703e-05, + "loss": 0.0004, + "step": 13860 + }, + { + "epoch": 59.25, + "learning_rate": 9.482985602203343e-05, + "loss": 0.0009, + "step": 13864 + }, + { + "epoch": 59.26, + "learning_rate": 9.482469372859288e-05, + "loss": 0.0009, + "step": 13868 + }, + { + "epoch": 59.28, + "learning_rate": 9.481952899986586e-05, + "loss": 0.0015, + "step": 13872 + }, + { + "epoch": 59.3, + "learning_rate": 9.481436183613293e-05, + "loss": 0.0009, + "step": 13876 + }, + { + "epoch": 59.32, + "learning_rate": 9.480919223767484e-05, + "loss": 0.0002, + "step": 13880 + }, + { + "epoch": 59.33, + "learning_rate": 9.480402020477243e-05, + "loss": 0.0015, + "step": 13884 + }, + { + "epoch": 59.35, + "learning_rate": 9.479884573770674e-05, + "loss": 0.0029, + "step": 13888 + }, + { + "epoch": 59.37, + "learning_rate": 9.479366883675884e-05, + "loss": 0.0001, + "step": 13892 + }, + { + "epoch": 59.38, + "learning_rate": 9.478848950221e-05, + "loss": 0.0019, + "step": 13896 + }, + { + "epoch": 59.4, + "learning_rate": 9.478330773434163e-05, + "loss": 0.0006, + "step": 13900 + }, + { + "epoch": 59.42, + "learning_rate": 9.477812353343522e-05, + "loss": 0.0002, + "step": 13904 + }, + { + "epoch": 59.44, + "learning_rate": 9.477293689977245e-05, + "loss": 0.0003, + "step": 13908 + }, + { + "epoch": 59.45, + "learning_rate": 9.476774783363511e-05, + "loss": 0.0022, + "step": 13912 + }, + { + "epoch": 59.47, + "learning_rate": 9.476255633530508e-05, + "loss": 0.0003, + "step": 13916 + }, + { + "epoch": 59.49, + "learning_rate": 9.475736240506444e-05, + "loss": 0.0001, + "step": 13920 + }, + { + "epoch": 59.5, + "learning_rate": 9.475216604319536e-05, + "loss": 0.0004, + "step": 13924 + }, + { + "epoch": 59.52, + "learning_rate": 9.474696724998017e-05, + "loss": 0.0004, + "step": 13928 + }, + { + "epoch": 59.54, + "learning_rate": 9.47417660257013e-05, + "loss": 0.0003, + "step": 13932 + }, + { + "epoch": 59.56, + "learning_rate": 9.473656237064133e-05, + "loss": 0.001, + "step": 13936 + }, + { + "epoch": 59.57, + "learning_rate": 9.473135628508296e-05, + "loss": 0.0003, + "step": 13940 + }, + { + "epoch": 59.59, + "learning_rate": 9.472614776930906e-05, + "loss": 0.0003, + "step": 13944 + }, + { + "epoch": 59.61, + "learning_rate": 9.47209368236026e-05, + "loss": 0.0006, + "step": 13948 + }, + { + "epoch": 59.62, + "learning_rate": 9.471572344824664e-05, + "loss": 0.0011, + "step": 13952 + }, + { + "epoch": 59.64, + "learning_rate": 9.471050764352447e-05, + "loss": 0.0007, + "step": 13956 + }, + { + "epoch": 59.66, + "learning_rate": 9.470528940971944e-05, + "loss": 0.0001, + "step": 13960 + }, + { + "epoch": 59.68, + "learning_rate": 9.470006874711505e-05, + "loss": 0.0019, + "step": 13964 + }, + { + "epoch": 59.69, + "learning_rate": 9.469484565599495e-05, + "loss": 0.0002, + "step": 13968 + }, + { + "epoch": 59.71, + "learning_rate": 9.468962013664288e-05, + "loss": 0.0001, + "step": 13972 + }, + { + "epoch": 59.73, + "learning_rate": 9.468439218934276e-05, + "loss": 0.0005, + "step": 13976 + }, + { + "epoch": 59.74, + "learning_rate": 9.467916181437862e-05, + "loss": 0.0017, + "step": 13980 + }, + { + "epoch": 59.76, + "learning_rate": 9.46739290120346e-05, + "loss": 0.0001, + "step": 13984 + }, + { + "epoch": 59.78, + "learning_rate": 9.466869378259502e-05, + "loss": 0.0001, + "step": 13988 + }, + { + "epoch": 59.79, + "learning_rate": 9.466345612634428e-05, + "loss": 0.0012, + "step": 13992 + }, + { + "epoch": 59.81, + "learning_rate": 9.465821604356696e-05, + "loss": 0.0004, + "step": 13996 + }, + { + "epoch": 59.83, + "learning_rate": 9.465297353454774e-05, + "loss": 0.0006, + "step": 14000 + }, + { + "epoch": 59.83, + "eval_exact_match": 0.524948024948025, + "eval_loss": 0.8439415097236633, + "eval_runtime": 156.3849, + "eval_samples_per_second": 6.151, + "step": 14000 + }, + { + "epoch": 59.85, + "learning_rate": 9.464772859957143e-05, + "loss": 0.0005, + "step": 14004 + }, + { + "epoch": 59.86, + "learning_rate": 9.4642481238923e-05, + "loss": 0.0002, + "step": 14008 + }, + { + "epoch": 59.88, + "learning_rate": 9.463723145288752e-05, + "loss": 0.0002, + "step": 14012 + }, + { + "epoch": 59.9, + "learning_rate": 9.463197924175022e-05, + "loss": 0.0005, + "step": 14016 + }, + { + "epoch": 59.91, + "learning_rate": 9.462672460579645e-05, + "loss": 0.0009, + "step": 14020 + }, + { + "epoch": 59.93, + "learning_rate": 9.462146754531167e-05, + "loss": 0.0002, + "step": 14024 + }, + { + "epoch": 59.95, + "learning_rate": 9.461620806058152e-05, + "loss": 0.001, + "step": 14028 + }, + { + "epoch": 59.97, + "learning_rate": 9.46109461518917e-05, + "loss": 0.0003, + "step": 14032 + }, + { + "epoch": 59.98, + "learning_rate": 9.460568181952813e-05, + "loss": 0.0004, + "step": 14036 + }, + { + "epoch": 60.0, + "learning_rate": 9.460041506377679e-05, + "loss": 0.0001, + "step": 14040 + }, + { + "epoch": 60.02, + "learning_rate": 9.459514588492382e-05, + "loss": 0.0002, + "step": 14044 + }, + { + "epoch": 60.03, + "learning_rate": 9.45898742832555e-05, + "loss": 0.0001, + "step": 14048 + }, + { + "epoch": 60.05, + "learning_rate": 9.458460025905824e-05, + "loss": 0.0016, + "step": 14052 + }, + { + "epoch": 60.07, + "learning_rate": 9.457932381261855e-05, + "loss": 0.0012, + "step": 14056 + }, + { + "epoch": 60.09, + "learning_rate": 9.45740449442231e-05, + "loss": 0.0002, + "step": 14060 + }, + { + "epoch": 60.1, + "learning_rate": 9.456876365415871e-05, + "loss": 0.0013, + "step": 14064 + }, + { + "epoch": 60.12, + "learning_rate": 9.456347994271229e-05, + "loss": 0.0001, + "step": 14068 + }, + { + "epoch": 60.14, + "learning_rate": 9.455819381017089e-05, + "loss": 0.0002, + "step": 14072 + }, + { + "epoch": 60.15, + "learning_rate": 9.45529052568217e-05, + "loss": 0.001, + "step": 14076 + }, + { + "epoch": 60.17, + "learning_rate": 9.454761428295206e-05, + "loss": 0.0001, + "step": 14080 + }, + { + "epoch": 60.19, + "learning_rate": 9.454232088884942e-05, + "loss": 0.0002, + "step": 14084 + }, + { + "epoch": 60.21, + "learning_rate": 9.453702507480136e-05, + "loss": 0.0003, + "step": 14088 + }, + { + "epoch": 60.22, + "learning_rate": 9.453172684109559e-05, + "loss": 0.0002, + "step": 14092 + }, + { + "epoch": 60.24, + "learning_rate": 9.452642618801997e-05, + "loss": 0.0007, + "step": 14096 + }, + { + "epoch": 60.26, + "learning_rate": 9.452112311586247e-05, + "loss": 0.001, + "step": 14100 + }, + { + "epoch": 60.27, + "learning_rate": 9.451581762491124e-05, + "loss": 0.0013, + "step": 14104 + }, + { + "epoch": 60.29, + "learning_rate": 9.451050971545447e-05, + "loss": 0.0014, + "step": 14108 + }, + { + "epoch": 60.31, + "learning_rate": 9.450519938778056e-05, + "loss": 0.0011, + "step": 14112 + }, + { + "epoch": 60.32, + "learning_rate": 9.449988664217799e-05, + "loss": 0.0006, + "step": 14116 + }, + { + "epoch": 60.34, + "learning_rate": 9.449457147893543e-05, + "loss": 0.001, + "step": 14120 + }, + { + "epoch": 60.36, + "learning_rate": 9.448925389834165e-05, + "loss": 0.0007, + "step": 14124 + }, + { + "epoch": 60.38, + "learning_rate": 9.448393390068553e-05, + "loss": 0.001, + "step": 14128 + }, + { + "epoch": 60.39, + "learning_rate": 9.447861148625609e-05, + "loss": 0.0005, + "step": 14132 + }, + { + "epoch": 60.41, + "learning_rate": 9.447328665534253e-05, + "loss": 0.0002, + "step": 14136 + }, + { + "epoch": 60.43, + "learning_rate": 9.446795940823411e-05, + "loss": 0.002, + "step": 14140 + }, + { + "epoch": 60.44, + "learning_rate": 9.446262974522026e-05, + "loss": 0.0011, + "step": 14144 + }, + { + "epoch": 60.46, + "learning_rate": 9.445729766659055e-05, + "loss": 0.0004, + "step": 14148 + }, + { + "epoch": 60.48, + "learning_rate": 9.445196317263467e-05, + "loss": 0.0003, + "step": 14152 + }, + { + "epoch": 60.5, + "learning_rate": 9.44466262636424e-05, + "loss": 0.0003, + "step": 14156 + }, + { + "epoch": 60.51, + "learning_rate": 9.444128693990375e-05, + "loss": 0.0001, + "step": 14160 + }, + { + "epoch": 60.53, + "learning_rate": 9.443594520170876e-05, + "loss": 0.0003, + "step": 14164 + }, + { + "epoch": 60.55, + "learning_rate": 9.443060104934762e-05, + "loss": 0.0013, + "step": 14168 + }, + { + "epoch": 60.56, + "learning_rate": 9.442525448311072e-05, + "loss": 0.0007, + "step": 14172 + }, + { + "epoch": 60.58, + "learning_rate": 9.441990550328854e-05, + "loss": 0.0001, + "step": 14176 + }, + { + "epoch": 60.6, + "learning_rate": 9.441455411017164e-05, + "loss": 0.0003, + "step": 14180 + }, + { + "epoch": 60.62, + "learning_rate": 9.440920030405078e-05, + "loss": 0.0002, + "step": 14184 + }, + { + "epoch": 60.63, + "learning_rate": 9.440384408521684e-05, + "loss": 0.0001, + "step": 14188 + }, + { + "epoch": 60.65, + "learning_rate": 9.439848545396079e-05, + "loss": 0.0005, + "step": 14192 + }, + { + "epoch": 60.67, + "learning_rate": 9.439312441057379e-05, + "loss": 0.0001, + "step": 14196 + }, + { + "epoch": 60.68, + "learning_rate": 9.438776095534708e-05, + "loss": 0.0004, + "step": 14200 + }, + { + "epoch": 60.7, + "learning_rate": 9.438239508857207e-05, + "loss": 0.0016, + "step": 14204 + }, + { + "epoch": 60.72, + "learning_rate": 9.437702681054025e-05, + "loss": 0.0005, + "step": 14208 + }, + { + "epoch": 60.74, + "learning_rate": 9.437165612154331e-05, + "loss": 0.0004, + "step": 14212 + }, + { + "epoch": 60.75, + "learning_rate": 9.4366283021873e-05, + "loss": 0.001, + "step": 14216 + }, + { + "epoch": 60.77, + "learning_rate": 9.436090751182128e-05, + "loss": 0.0016, + "step": 14220 + }, + { + "epoch": 60.79, + "learning_rate": 9.435552959168016e-05, + "loss": 0.0014, + "step": 14224 + }, + { + "epoch": 60.8, + "learning_rate": 9.435014926174183e-05, + "loss": 0.0003, + "step": 14228 + }, + { + "epoch": 60.82, + "learning_rate": 9.43447665222986e-05, + "loss": 0.0003, + "step": 14232 + }, + { + "epoch": 60.84, + "learning_rate": 9.43393813736429e-05, + "loss": 0.0004, + "step": 14236 + }, + { + "epoch": 60.85, + "learning_rate": 9.433399381606733e-05, + "loss": 0.0003, + "step": 14240 + }, + { + "epoch": 60.87, + "learning_rate": 9.432860384986456e-05, + "loss": 0.0008, + "step": 14244 + }, + { + "epoch": 60.89, + "learning_rate": 9.432321147532743e-05, + "loss": 0.0015, + "step": 14248 + }, + { + "epoch": 60.91, + "learning_rate": 9.431781669274891e-05, + "loss": 0.0003, + "step": 14252 + }, + { + "epoch": 60.92, + "learning_rate": 9.431241950242207e-05, + "loss": 0.0002, + "step": 14256 + }, + { + "epoch": 60.94, + "learning_rate": 9.430701990464017e-05, + "loss": 0.0003, + "step": 14260 + }, + { + "epoch": 60.96, + "learning_rate": 9.430161789969654e-05, + "loss": 0.0011, + "step": 14264 + }, + { + "epoch": 60.97, + "learning_rate": 9.429621348788468e-05, + "loss": 0.0001, + "step": 14268 + }, + { + "epoch": 60.99, + "learning_rate": 9.429080666949821e-05, + "loss": 0.0009, + "step": 14272 + }, + { + "epoch": 61.01, + "learning_rate": 9.428539744483085e-05, + "loss": 0.0005, + "step": 14276 + }, + { + "epoch": 61.03, + "learning_rate": 9.427998581417652e-05, + "loss": 0.001, + "step": 14280 + }, + { + "epoch": 61.04, + "learning_rate": 9.42745717778292e-05, + "loss": 0.0003, + "step": 14284 + }, + { + "epoch": 61.06, + "learning_rate": 9.426915533608304e-05, + "loss": 0.0011, + "step": 14288 + }, + { + "epoch": 61.08, + "learning_rate": 9.42637364892323e-05, + "loss": 0.0006, + "step": 14292 + }, + { + "epoch": 61.09, + "learning_rate": 9.425831523757139e-05, + "loss": 0.0002, + "step": 14296 + }, + { + "epoch": 61.11, + "learning_rate": 9.425289158139486e-05, + "loss": 0.0001, + "step": 14300 + }, + { + "epoch": 61.13, + "learning_rate": 9.424746552099733e-05, + "loss": 0.0002, + "step": 14304 + }, + { + "epoch": 61.15, + "learning_rate": 9.424203705667363e-05, + "loss": 0.0003, + "step": 14308 + }, + { + "epoch": 61.16, + "learning_rate": 9.423660618871866e-05, + "loss": 0.0001, + "step": 14312 + }, + { + "epoch": 61.18, + "learning_rate": 9.423117291742749e-05, + "loss": 0.0013, + "step": 14316 + }, + { + "epoch": 61.2, + "learning_rate": 9.42257372430953e-05, + "loss": 0.0006, + "step": 14320 + }, + { + "epoch": 61.21, + "learning_rate": 9.42202991660174e-05, + "loss": 0.0003, + "step": 14324 + }, + { + "epoch": 61.23, + "learning_rate": 9.421485868648925e-05, + "loss": 0.0002, + "step": 14328 + }, + { + "epoch": 61.25, + "learning_rate": 9.420941580480642e-05, + "loss": 0.0009, + "step": 14332 + }, + { + "epoch": 61.26, + "learning_rate": 9.42039705212646e-05, + "loss": 0.0004, + "step": 14336 + }, + { + "epoch": 61.28, + "learning_rate": 9.419852283615964e-05, + "loss": 0.0001, + "step": 14340 + }, + { + "epoch": 61.3, + "learning_rate": 9.419307274978753e-05, + "loss": 0.0004, + "step": 14344 + }, + { + "epoch": 61.32, + "learning_rate": 9.418762026244434e-05, + "loss": 0.0002, + "step": 14348 + }, + { + "epoch": 61.33, + "learning_rate": 9.41821653744263e-05, + "loss": 0.0008, + "step": 14352 + }, + { + "epoch": 61.35, + "learning_rate": 9.417670808602978e-05, + "loss": 0.0034, + "step": 14356 + }, + { + "epoch": 61.37, + "learning_rate": 9.417124839755127e-05, + "loss": 0.0001, + "step": 14360 + }, + { + "epoch": 61.38, + "learning_rate": 9.416578630928738e-05, + "loss": 0.0018, + "step": 14364 + }, + { + "epoch": 61.4, + "learning_rate": 9.416032182153488e-05, + "loss": 0.0002, + "step": 14368 + }, + { + "epoch": 61.42, + "learning_rate": 9.415485493459063e-05, + "loss": 0.0002, + "step": 14372 + }, + { + "epoch": 61.44, + "learning_rate": 9.414938564875164e-05, + "loss": 0.001, + "step": 14376 + }, + { + "epoch": 61.45, + "learning_rate": 9.414391396431507e-05, + "loss": 0.0015, + "step": 14380 + }, + { + "epoch": 61.47, + "learning_rate": 9.41384398815782e-05, + "loss": 0.0003, + "step": 14384 + }, + { + "epoch": 61.49, + "learning_rate": 9.41329634008384e-05, + "loss": 0.0021, + "step": 14388 + }, + { + "epoch": 61.5, + "learning_rate": 9.412748452239321e-05, + "loss": 0.0008, + "step": 14392 + }, + { + "epoch": 61.52, + "learning_rate": 9.41220032465403e-05, + "loss": 0.0006, + "step": 14396 + }, + { + "epoch": 61.54, + "learning_rate": 9.411651957357746e-05, + "loss": 0.0003, + "step": 14400 + }, + { + "epoch": 61.56, + "learning_rate": 9.411103350380263e-05, + "loss": 0.0031, + "step": 14404 + }, + { + "epoch": 61.57, + "learning_rate": 9.410554503751383e-05, + "loss": 0.0003, + "step": 14408 + }, + { + "epoch": 61.59, + "learning_rate": 9.410005417500928e-05, + "loss": 0.0004, + "step": 14412 + }, + { + "epoch": 61.61, + "learning_rate": 9.409456091658726e-05, + "loss": 0.0019, + "step": 14416 + }, + { + "epoch": 61.62, + "learning_rate": 9.408906526254624e-05, + "loss": 0.0001, + "step": 14420 + }, + { + "epoch": 61.64, + "learning_rate": 9.408356721318479e-05, + "loss": 0.0002, + "step": 14424 + }, + { + "epoch": 61.66, + "learning_rate": 9.40780667688016e-05, + "loss": 0.0003, + "step": 14428 + }, + { + "epoch": 61.68, + "learning_rate": 9.407256392969552e-05, + "loss": 0.0002, + "step": 14432 + }, + { + "epoch": 61.69, + "learning_rate": 9.40670586961655e-05, + "loss": 0.0013, + "step": 14436 + }, + { + "epoch": 61.71, + "learning_rate": 9.406155106851064e-05, + "loss": 0.0003, + "step": 14440 + }, + { + "epoch": 61.73, + "learning_rate": 9.405604104703019e-05, + "loss": 0.0003, + "step": 14444 + }, + { + "epoch": 61.74, + "learning_rate": 9.405052863202346e-05, + "loss": 0.0003, + "step": 14448 + }, + { + "epoch": 61.76, + "learning_rate": 9.404501382378996e-05, + "loss": 0.0004, + "step": 14452 + }, + { + "epoch": 61.78, + "learning_rate": 9.40394966226293e-05, + "loss": 0.0019, + "step": 14456 + }, + { + "epoch": 61.79, + "learning_rate": 9.403397702884123e-05, + "loss": 0.0004, + "step": 14460 + }, + { + "epoch": 61.81, + "learning_rate": 9.40284550427256e-05, + "loss": 0.0008, + "step": 14464 + }, + { + "epoch": 61.83, + "learning_rate": 9.402293066458246e-05, + "loss": 0.0001, + "step": 14468 + }, + { + "epoch": 61.85, + "learning_rate": 9.401740389471191e-05, + "loss": 0.0005, + "step": 14472 + }, + { + "epoch": 61.86, + "learning_rate": 9.401187473341422e-05, + "loss": 0.0002, + "step": 14476 + }, + { + "epoch": 61.88, + "learning_rate": 9.400634318098979e-05, + "loss": 0.0002, + "step": 14480 + }, + { + "epoch": 61.9, + "learning_rate": 9.400080923773915e-05, + "loss": 0.0005, + "step": 14484 + }, + { + "epoch": 61.91, + "learning_rate": 9.399527290396293e-05, + "loss": 0.0002, + "step": 14488 + }, + { + "epoch": 61.93, + "learning_rate": 9.398973417996195e-05, + "loss": 0.0007, + "step": 14492 + }, + { + "epoch": 61.95, + "learning_rate": 9.39841930660371e-05, + "loss": 0.0002, + "step": 14496 + }, + { + "epoch": 61.97, + "learning_rate": 9.397864956248942e-05, + "loss": 0.001, + "step": 14500 + }, + { + "epoch": 61.98, + "learning_rate": 9.39731036696201e-05, + "loss": 0.0002, + "step": 14504 + }, + { + "epoch": 62.0, + "learning_rate": 9.396755538773045e-05, + "loss": 0.0014, + "step": 14508 + }, + { + "epoch": 62.02, + "learning_rate": 9.396200471712188e-05, + "loss": 0.0001, + "step": 14512 + }, + { + "epoch": 62.03, + "learning_rate": 9.395645165809597e-05, + "loss": 0.0004, + "step": 14516 + }, + { + "epoch": 62.05, + "learning_rate": 9.395089621095439e-05, + "loss": 0.0003, + "step": 14520 + }, + { + "epoch": 62.07, + "learning_rate": 9.3945338375999e-05, + "loss": 0.0008, + "step": 14524 + }, + { + "epoch": 62.09, + "learning_rate": 9.393977815353169e-05, + "loss": 0.0001, + "step": 14528 + }, + { + "epoch": 62.1, + "learning_rate": 9.393421554385461e-05, + "loss": 0.0003, + "step": 14532 + }, + { + "epoch": 62.12, + "learning_rate": 9.392865054726996e-05, + "loss": 0.0004, + "step": 14536 + }, + { + "epoch": 62.14, + "learning_rate": 9.392308316408005e-05, + "loss": 0.0001, + "step": 14540 + }, + { + "epoch": 62.15, + "learning_rate": 9.391751339458737e-05, + "loss": 0.0014, + "step": 14544 + }, + { + "epoch": 62.17, + "learning_rate": 9.391194123909451e-05, + "loss": 0.0002, + "step": 14548 + }, + { + "epoch": 62.19, + "learning_rate": 9.390636669790422e-05, + "loss": 0.0001, + "step": 14552 + }, + { + "epoch": 62.21, + "learning_rate": 9.390078977131933e-05, + "loss": 0.0005, + "step": 14556 + }, + { + "epoch": 62.22, + "learning_rate": 9.389521045964287e-05, + "loss": 0.0005, + "step": 14560 + }, + { + "epoch": 62.24, + "learning_rate": 9.388962876317793e-05, + "loss": 0.0003, + "step": 14564 + }, + { + "epoch": 62.26, + "learning_rate": 9.388404468222775e-05, + "loss": 0.0002, + "step": 14568 + }, + { + "epoch": 62.27, + "learning_rate": 9.387845821709573e-05, + "loss": 0.0003, + "step": 14572 + }, + { + "epoch": 62.29, + "learning_rate": 9.387286936808539e-05, + "loss": 0.0001, + "step": 14576 + }, + { + "epoch": 62.31, + "learning_rate": 9.386727813550034e-05, + "loss": 0.001, + "step": 14580 + }, + { + "epoch": 62.32, + "learning_rate": 9.386168451964434e-05, + "loss": 0.0002, + "step": 14584 + }, + { + "epoch": 62.34, + "learning_rate": 9.38560885208213e-05, + "loss": 0.0004, + "step": 14588 + }, + { + "epoch": 62.36, + "learning_rate": 9.385049013933526e-05, + "loss": 0.0005, + "step": 14592 + }, + { + "epoch": 62.38, + "learning_rate": 9.384488937549036e-05, + "loss": 0.0004, + "step": 14596 + }, + { + "epoch": 62.39, + "learning_rate": 9.383928622959091e-05, + "loss": 0.0003, + "step": 14600 + }, + { + "epoch": 62.41, + "learning_rate": 9.383368070194127e-05, + "loss": 0.0001, + "step": 14604 + }, + { + "epoch": 62.43, + "learning_rate": 9.382807279284602e-05, + "loss": 0.0005, + "step": 14608 + }, + { + "epoch": 62.44, + "learning_rate": 9.382246250260984e-05, + "loss": 0.0026, + "step": 14612 + }, + { + "epoch": 62.46, + "learning_rate": 9.381684983153751e-05, + "loss": 0.0001, + "step": 14616 + }, + { + "epoch": 62.48, + "learning_rate": 9.381123477993398e-05, + "loss": 0.0002, + "step": 14620 + }, + { + "epoch": 62.5, + "learning_rate": 9.380561734810429e-05, + "loss": 0.0002, + "step": 14624 + }, + { + "epoch": 62.51, + "learning_rate": 9.379999753635365e-05, + "loss": 0.0001, + "step": 14628 + }, + { + "epoch": 62.53, + "learning_rate": 9.379437534498737e-05, + "loss": 0.0002, + "step": 14632 + }, + { + "epoch": 62.55, + "learning_rate": 9.37887507743109e-05, + "loss": 0.0002, + "step": 14636 + }, + { + "epoch": 62.56, + "learning_rate": 9.378312382462981e-05, + "loss": 0.0022, + "step": 14640 + }, + { + "epoch": 62.58, + "learning_rate": 9.377749449624984e-05, + "loss": 0.0001, + "step": 14644 + }, + { + "epoch": 62.6, + "learning_rate": 9.377186278947678e-05, + "loss": 0.002, + "step": 14648 + }, + { + "epoch": 62.62, + "learning_rate": 9.376622870461663e-05, + "loss": 0.0005, + "step": 14652 + }, + { + "epoch": 62.63, + "learning_rate": 9.376059224197547e-05, + "loss": 0.0003, + "step": 14656 + }, + { + "epoch": 62.65, + "learning_rate": 9.375495340185952e-05, + "loss": 0.0012, + "step": 14660 + }, + { + "epoch": 62.67, + "learning_rate": 9.374931218457515e-05, + "loss": 0.0001, + "step": 14664 + }, + { + "epoch": 62.68, + "learning_rate": 9.374366859042883e-05, + "loss": 0.0001, + "step": 14668 + }, + { + "epoch": 62.7, + "learning_rate": 9.373802261972717e-05, + "loss": 0.0002, + "step": 14672 + }, + { + "epoch": 62.72, + "learning_rate": 9.373237427277692e-05, + "loss": 0.0001, + "step": 14676 + }, + { + "epoch": 62.74, + "learning_rate": 9.372672354988494e-05, + "loss": 0.0003, + "step": 14680 + }, + { + "epoch": 62.75, + "learning_rate": 9.372107045135823e-05, + "loss": 0.0001, + "step": 14684 + }, + { + "epoch": 62.77, + "learning_rate": 9.371541497750393e-05, + "loss": 0.0016, + "step": 14688 + }, + { + "epoch": 62.79, + "learning_rate": 9.370975712862929e-05, + "loss": 0.0001, + "step": 14692 + }, + { + "epoch": 62.8, + "learning_rate": 9.370409690504169e-05, + "loss": 0.0005, + "step": 14696 + }, + { + "epoch": 62.82, + "learning_rate": 9.369843430704864e-05, + "loss": 0.0001, + "step": 14700 + }, + { + "epoch": 62.84, + "learning_rate": 9.369276933495781e-05, + "loss": 0.0009, + "step": 14704 + }, + { + "epoch": 62.85, + "learning_rate": 9.368710198907695e-05, + "loss": 0.0001, + "step": 14708 + }, + { + "epoch": 62.87, + "learning_rate": 9.368143226971395e-05, + "loss": 0.0001, + "step": 14712 + }, + { + "epoch": 62.89, + "learning_rate": 9.367576017717689e-05, + "loss": 0.0002, + "step": 14716 + }, + { + "epoch": 62.91, + "learning_rate": 9.367008571177388e-05, + "loss": 0.0023, + "step": 14720 + }, + { + "epoch": 62.92, + "learning_rate": 9.366440887381324e-05, + "loss": 0.0011, + "step": 14724 + }, + { + "epoch": 62.94, + "learning_rate": 9.365872966360336e-05, + "loss": 0.0002, + "step": 14728 + }, + { + "epoch": 62.96, + "learning_rate": 9.36530480814528e-05, + "loss": 0.0006, + "step": 14732 + }, + { + "epoch": 62.97, + "learning_rate": 9.364736412767026e-05, + "loss": 0.0002, + "step": 14736 + }, + { + "epoch": 62.99, + "learning_rate": 9.364167780256452e-05, + "loss": 0.0006, + "step": 14740 + }, + { + "epoch": 63.01, + "learning_rate": 9.363598910644449e-05, + "loss": 0.0007, + "step": 14744 + }, + { + "epoch": 63.03, + "learning_rate": 9.363029803961927e-05, + "loss": 0.0003, + "step": 14748 + }, + { + "epoch": 63.04, + "learning_rate": 9.362460460239804e-05, + "loss": 0.0001, + "step": 14752 + }, + { + "epoch": 63.06, + "learning_rate": 9.361890879509012e-05, + "loss": 0.0001, + "step": 14756 + }, + { + "epoch": 63.08, + "learning_rate": 9.361321061800494e-05, + "loss": 0.0003, + "step": 14760 + }, + { + "epoch": 63.09, + "learning_rate": 9.36075100714521e-05, + "loss": 0.0003, + "step": 14764 + }, + { + "epoch": 63.11, + "learning_rate": 9.36018071557413e-05, + "loss": 0.0007, + "step": 14768 + }, + { + "epoch": 63.13, + "learning_rate": 9.359610187118238e-05, + "loss": 0.0001, + "step": 14772 + }, + { + "epoch": 63.15, + "learning_rate": 9.359039421808529e-05, + "loss": 0.0002, + "step": 14776 + }, + { + "epoch": 63.16, + "learning_rate": 9.358468419676012e-05, + "loss": 0.0013, + "step": 14780 + }, + { + "epoch": 63.18, + "learning_rate": 9.357897180751709e-05, + "loss": 0.0001, + "step": 14784 + }, + { + "epoch": 63.2, + "learning_rate": 9.357325705066658e-05, + "loss": 0.0001, + "step": 14788 + }, + { + "epoch": 63.21, + "learning_rate": 9.356753992651903e-05, + "loss": 0.0008, + "step": 14792 + }, + { + "epoch": 63.23, + "learning_rate": 9.356182043538506e-05, + "loss": 0.0001, + "step": 14796 + }, + { + "epoch": 63.25, + "learning_rate": 9.355609857757541e-05, + "loss": 0.0001, + "step": 14800 + }, + { + "epoch": 63.26, + "learning_rate": 9.355037435340095e-05, + "loss": 0.0002, + "step": 14804 + }, + { + "epoch": 63.28, + "learning_rate": 9.354464776317265e-05, + "loss": 0.0002, + "step": 14808 + }, + { + "epoch": 63.3, + "learning_rate": 9.353891880720163e-05, + "loss": 0.0002, + "step": 14812 + }, + { + "epoch": 63.32, + "learning_rate": 9.353318748579916e-05, + "loss": 0.0008, + "step": 14816 + }, + { + "epoch": 63.33, + "learning_rate": 9.352745379927661e-05, + "loss": 0.0008, + "step": 14820 + }, + { + "epoch": 63.35, + "learning_rate": 9.35217177479455e-05, + "loss": 0.0002, + "step": 14824 + }, + { + "epoch": 63.37, + "learning_rate": 9.351597933211742e-05, + "loss": 0.0002, + "step": 14828 + }, + { + "epoch": 63.38, + "learning_rate": 9.35102385521042e-05, + "loss": 0.0001, + "step": 14832 + }, + { + "epoch": 63.4, + "learning_rate": 9.350449540821767e-05, + "loss": 0.0002, + "step": 14836 + }, + { + "epoch": 63.42, + "learning_rate": 9.349874990076988e-05, + "loss": 0.0002, + "step": 14840 + }, + { + "epoch": 63.44, + "learning_rate": 9.349300203007296e-05, + "loss": 0.0021, + "step": 14844 + }, + { + "epoch": 63.45, + "learning_rate": 9.348725179643922e-05, + "loss": 0.0005, + "step": 14848 + }, + { + "epoch": 63.47, + "learning_rate": 9.348149920018104e-05, + "loss": 0.0001, + "step": 14852 + }, + { + "epoch": 63.49, + "learning_rate": 9.347574424161093e-05, + "loss": 0.0002, + "step": 14856 + }, + { + "epoch": 63.5, + "learning_rate": 9.346998692104162e-05, + "loss": 0.0009, + "step": 14860 + }, + { + "epoch": 63.52, + "learning_rate": 9.346422723878585e-05, + "loss": 0.0012, + "step": 14864 + }, + { + "epoch": 63.54, + "learning_rate": 9.345846519515654e-05, + "loss": 0.0001, + "step": 14868 + }, + { + "epoch": 63.56, + "learning_rate": 9.345270079046675e-05, + "loss": 0.001, + "step": 14872 + }, + { + "epoch": 63.57, + "learning_rate": 9.344693402502964e-05, + "loss": 0.0013, + "step": 14876 + }, + { + "epoch": 63.59, + "learning_rate": 9.344116489915855e-05, + "loss": 0.0005, + "step": 14880 + }, + { + "epoch": 63.61, + "learning_rate": 9.343539341316687e-05, + "loss": 0.0007, + "step": 14884 + }, + { + "epoch": 63.62, + "learning_rate": 9.342961956736817e-05, + "loss": 0.0009, + "step": 14888 + }, + { + "epoch": 63.64, + "learning_rate": 9.342384336207614e-05, + "loss": 0.0002, + "step": 14892 + }, + { + "epoch": 63.66, + "learning_rate": 9.341806479760463e-05, + "loss": 0.0014, + "step": 14896 + }, + { + "epoch": 63.68, + "learning_rate": 9.341228387426752e-05, + "loss": 0.0001, + "step": 14900 + }, + { + "epoch": 63.69, + "learning_rate": 9.340650059237893e-05, + "loss": 0.0002, + "step": 14904 + }, + { + "epoch": 63.71, + "learning_rate": 9.340071495225305e-05, + "loss": 0.0008, + "step": 14908 + }, + { + "epoch": 63.73, + "learning_rate": 9.33949269542042e-05, + "loss": 0.001, + "step": 14912 + }, + { + "epoch": 63.74, + "learning_rate": 9.338913659854684e-05, + "loss": 0.0001, + "step": 14916 + }, + { + "epoch": 63.76, + "learning_rate": 9.338334388559557e-05, + "loss": 0.0004, + "step": 14920 + }, + { + "epoch": 63.78, + "learning_rate": 9.33775488156651e-05, + "loss": 0.0003, + "step": 14924 + }, + { + "epoch": 63.79, + "learning_rate": 9.337175138907024e-05, + "loss": 0.0007, + "step": 14928 + }, + { + "epoch": 63.81, + "learning_rate": 9.336595160612598e-05, + "loss": 0.0004, + "step": 14932 + }, + { + "epoch": 63.83, + "learning_rate": 9.336014946714743e-05, + "loss": 0.0002, + "step": 14936 + }, + { + "epoch": 63.85, + "learning_rate": 9.335434497244979e-05, + "loss": 0.0003, + "step": 14940 + }, + { + "epoch": 63.86, + "learning_rate": 9.334853812234845e-05, + "loss": 0.0003, + "step": 14944 + }, + { + "epoch": 63.88, + "learning_rate": 9.334272891715885e-05, + "loss": 0.0005, + "step": 14948 + }, + { + "epoch": 63.9, + "learning_rate": 9.333691735719662e-05, + "loss": 0.0001, + "step": 14952 + }, + { + "epoch": 63.91, + "learning_rate": 9.333110344277749e-05, + "loss": 0.0002, + "step": 14956 + }, + { + "epoch": 63.93, + "learning_rate": 9.332528717421734e-05, + "loss": 0.001, + "step": 14960 + }, + { + "epoch": 63.95, + "learning_rate": 9.331946855183215e-05, + "loss": 0.0001, + "step": 14964 + }, + { + "epoch": 63.97, + "learning_rate": 9.331364757593803e-05, + "loss": 0.0008, + "step": 14968 + }, + { + "epoch": 63.98, + "learning_rate": 9.330782424685125e-05, + "loss": 0.0002, + "step": 14972 + }, + { + "epoch": 64.0, + "learning_rate": 9.330199856488817e-05, + "loss": 0.0005, + "step": 14976 + }, + { + "epoch": 64.02, + "learning_rate": 9.329617053036531e-05, + "loss": 0.0001, + "step": 14980 + }, + { + "epoch": 64.03, + "learning_rate": 9.329034014359929e-05, + "loss": 0.0002, + "step": 14984 + }, + { + "epoch": 64.05, + "learning_rate": 9.328450740490687e-05, + "loss": 0.0001, + "step": 14988 + }, + { + "epoch": 64.07, + "learning_rate": 9.327867231460495e-05, + "loss": 0.0004, + "step": 14992 + }, + { + "epoch": 64.09, + "learning_rate": 9.327283487301052e-05, + "loss": 0.0003, + "step": 14996 + }, + { + "epoch": 64.1, + "learning_rate": 9.326699508044075e-05, + "loss": 0.0028, + "step": 15000 + }, + { + "epoch": 64.1, + "eval_exact_match": 0.501039501039501, + "eval_loss": 0.8530089259147644, + "eval_runtime": 139.8575, + "eval_samples_per_second": 6.878, + "step": 15000 + }, + { + "epoch": 64.12, + "learning_rate": 9.326115293721289e-05, + "loss": 0.0002, + "step": 15004 + }, + { + "epoch": 64.14, + "learning_rate": 9.325530844364437e-05, + "loss": 0.0003, + "step": 15008 + }, + { + "epoch": 64.15, + "learning_rate": 9.324946160005268e-05, + "loss": 0.0002, + "step": 15012 + }, + { + "epoch": 64.17, + "learning_rate": 9.324361240675548e-05, + "loss": 0.0002, + "step": 15016 + }, + { + "epoch": 64.19, + "learning_rate": 9.323776086407058e-05, + "loss": 0.0002, + "step": 15020 + }, + { + "epoch": 64.21, + "learning_rate": 9.323190697231586e-05, + "loss": 0.0001, + "step": 15024 + }, + { + "epoch": 64.22, + "learning_rate": 9.322605073180937e-05, + "loss": 0.0003, + "step": 15028 + }, + { + "epoch": 64.24, + "learning_rate": 9.322019214286926e-05, + "loss": 0.0001, + "step": 15032 + }, + { + "epoch": 64.26, + "learning_rate": 9.321433120581384e-05, + "loss": 0.0007, + "step": 15036 + }, + { + "epoch": 64.27, + "learning_rate": 9.320846792096152e-05, + "loss": 0.0002, + "step": 15040 + }, + { + "epoch": 64.29, + "learning_rate": 9.320260228863084e-05, + "loss": 0.0002, + "step": 15044 + }, + { + "epoch": 64.31, + "learning_rate": 9.31967343091405e-05, + "loss": 0.0007, + "step": 15048 + }, + { + "epoch": 64.32, + "learning_rate": 9.319086398280928e-05, + "loss": 0.0001, + "step": 15052 + }, + { + "epoch": 64.34, + "learning_rate": 9.318499130995613e-05, + "loss": 0.0007, + "step": 15056 + }, + { + "epoch": 64.36, + "learning_rate": 9.317911629090007e-05, + "loss": 0.0001, + "step": 15060 + }, + { + "epoch": 64.38, + "learning_rate": 9.317323892596033e-05, + "loss": 0.0003, + "step": 15064 + }, + { + "epoch": 64.39, + "learning_rate": 9.316735921545618e-05, + "loss": 0.0001, + "step": 15068 + }, + { + "epoch": 64.41, + "learning_rate": 9.316147715970709e-05, + "loss": 0.0001, + "step": 15072 + }, + { + "epoch": 64.43, + "learning_rate": 9.315559275903261e-05, + "loss": 0.0005, + "step": 15076 + }, + { + "epoch": 64.44, + "learning_rate": 9.314970601375245e-05, + "loss": 0.0005, + "step": 15080 + }, + { + "epoch": 64.46, + "learning_rate": 9.314381692418642e-05, + "loss": 0.0001, + "step": 15084 + }, + { + "epoch": 64.48, + "learning_rate": 9.313792549065446e-05, + "loss": 0.0003, + "step": 15088 + }, + { + "epoch": 64.5, + "learning_rate": 9.313203171347667e-05, + "loss": 0.0002, + "step": 15092 + }, + { + "epoch": 64.51, + "learning_rate": 9.312613559297325e-05, + "loss": 0.002, + "step": 15096 + }, + { + "epoch": 64.53, + "learning_rate": 9.312023712946452e-05, + "loss": 0.0001, + "step": 15100 + }, + { + "epoch": 64.55, + "learning_rate": 9.311433632327093e-05, + "loss": 0.0001, + "step": 15104 + }, + { + "epoch": 64.56, + "learning_rate": 9.310843317471308e-05, + "loss": 0.0005, + "step": 15108 + }, + { + "epoch": 64.58, + "learning_rate": 9.310252768411167e-05, + "loss": 0.0017, + "step": 15112 + }, + { + "epoch": 64.6, + "learning_rate": 9.309661985178756e-05, + "loss": 0.0002, + "step": 15116 + }, + { + "epoch": 64.62, + "learning_rate": 9.309070967806171e-05, + "loss": 0.0001, + "step": 15120 + }, + { + "epoch": 64.63, + "learning_rate": 9.308479716325522e-05, + "loss": 0.0001, + "step": 15124 + }, + { + "epoch": 64.65, + "learning_rate": 9.307888230768929e-05, + "loss": 0.0001, + "step": 15128 + }, + { + "epoch": 64.67, + "learning_rate": 9.307296511168528e-05, + "loss": 0.0001, + "step": 15132 + }, + { + "epoch": 64.68, + "learning_rate": 9.306704557556468e-05, + "loss": 0.0014, + "step": 15136 + }, + { + "epoch": 64.7, + "learning_rate": 9.306112369964908e-05, + "loss": 0.0001, + "step": 15140 + }, + { + "epoch": 64.72, + "learning_rate": 9.305519948426022e-05, + "loss": 0.0001, + "step": 15144 + }, + { + "epoch": 64.74, + "learning_rate": 9.304927292971994e-05, + "loss": 0.0002, + "step": 15148 + }, + { + "epoch": 64.75, + "learning_rate": 9.304334403635023e-05, + "loss": 0.0003, + "step": 15152 + }, + { + "epoch": 64.77, + "learning_rate": 9.303741280447322e-05, + "loss": 0.0007, + "step": 15156 + }, + { + "epoch": 64.79, + "learning_rate": 9.303147923441112e-05, + "loss": 0.0001, + "step": 15160 + }, + { + "epoch": 64.8, + "learning_rate": 9.302554332648633e-05, + "loss": 0.0001, + "step": 15164 + }, + { + "epoch": 64.82, + "learning_rate": 9.30196050810213e-05, + "loss": 0.0003, + "step": 15168 + }, + { + "epoch": 64.84, + "learning_rate": 9.301366449833868e-05, + "loss": 0.0007, + "step": 15172 + }, + { + "epoch": 64.85, + "learning_rate": 9.30077215787612e-05, + "loss": 0.0002, + "step": 15176 + }, + { + "epoch": 64.87, + "learning_rate": 9.300177632261176e-05, + "loss": 0.0001, + "step": 15180 + }, + { + "epoch": 64.89, + "learning_rate": 9.299582873021333e-05, + "loss": 0.0021, + "step": 15184 + }, + { + "epoch": 64.91, + "learning_rate": 9.298987880188907e-05, + "loss": 0.0002, + "step": 15188 + }, + { + "epoch": 64.92, + "learning_rate": 9.29839265379622e-05, + "loss": 0.0003, + "step": 15192 + }, + { + "epoch": 64.94, + "learning_rate": 9.297797193875612e-05, + "loss": 0.0003, + "step": 15196 + }, + { + "epoch": 64.96, + "learning_rate": 9.297201500459433e-05, + "loss": 0.0001, + "step": 15200 + }, + { + "epoch": 64.97, + "learning_rate": 9.296605573580049e-05, + "loss": 0.0009, + "step": 15204 + }, + { + "epoch": 64.99, + "learning_rate": 9.296009413269833e-05, + "loss": 0.0003, + "step": 15208 + }, + { + "epoch": 65.01, + "learning_rate": 9.295413019561174e-05, + "loss": 0.0018, + "step": 15212 + }, + { + "epoch": 65.03, + "learning_rate": 9.294816392486476e-05, + "loss": 0.0001, + "step": 15216 + }, + { + "epoch": 65.04, + "learning_rate": 9.294219532078151e-05, + "loss": 0.0002, + "step": 15220 + }, + { + "epoch": 65.06, + "learning_rate": 9.293622438368628e-05, + "loss": 0.0002, + "step": 15224 + }, + { + "epoch": 65.08, + "learning_rate": 9.293025111390343e-05, + "loss": 0.0001, + "step": 15228 + }, + { + "epoch": 65.09, + "learning_rate": 9.292427551175753e-05, + "loss": 0.0001, + "step": 15232 + }, + { + "epoch": 65.11, + "learning_rate": 9.291829757757319e-05, + "loss": 0.0003, + "step": 15236 + }, + { + "epoch": 65.13, + "learning_rate": 9.291231731167521e-05, + "loss": 0.0008, + "step": 15240 + }, + { + "epoch": 65.15, + "learning_rate": 9.290633471438847e-05, + "loss": 0.0004, + "step": 15244 + }, + { + "epoch": 65.16, + "learning_rate": 9.290034978603803e-05, + "loss": 0.0008, + "step": 15248 + }, + { + "epoch": 65.18, + "learning_rate": 9.289436252694902e-05, + "loss": 0.0009, + "step": 15252 + }, + { + "epoch": 65.2, + "learning_rate": 9.288837293744673e-05, + "loss": 0.0004, + "step": 15256 + }, + { + "epoch": 65.21, + "learning_rate": 9.288238101785658e-05, + "loss": 0.0007, + "step": 15260 + }, + { + "epoch": 65.23, + "learning_rate": 9.287638676850409e-05, + "loss": 0.0002, + "step": 15264 + }, + { + "epoch": 65.25, + "learning_rate": 9.287039018971493e-05, + "loss": 0.0006, + "step": 15268 + }, + { + "epoch": 65.26, + "learning_rate": 9.286439128181488e-05, + "loss": 0.0001, + "step": 15272 + }, + { + "epoch": 65.28, + "learning_rate": 9.285839004512986e-05, + "loss": 0.0006, + "step": 15276 + }, + { + "epoch": 65.3, + "learning_rate": 9.285238647998592e-05, + "loss": 0.0001, + "step": 15280 + }, + { + "epoch": 65.32, + "learning_rate": 9.284638058670924e-05, + "loss": 0.0003, + "step": 15284 + }, + { + "epoch": 65.33, + "learning_rate": 9.284037236562608e-05, + "loss": 0.0002, + "step": 15288 + }, + { + "epoch": 65.35, + "learning_rate": 9.283436181706288e-05, + "loss": 0.0003, + "step": 15292 + }, + { + "epoch": 65.37, + "learning_rate": 9.28283489413462e-05, + "loss": 0.0007, + "step": 15296 + }, + { + "epoch": 65.38, + "learning_rate": 9.28223337388027e-05, + "loss": 0.0004, + "step": 15300 + }, + { + "epoch": 65.4, + "learning_rate": 9.281631620975917e-05, + "loss": 0.0001, + "step": 15304 + }, + { + "epoch": 65.42, + "learning_rate": 9.281029635454256e-05, + "loss": 0.0004, + "step": 15308 + }, + { + "epoch": 65.44, + "learning_rate": 9.280427417347991e-05, + "loss": 0.0004, + "step": 15312 + }, + { + "epoch": 65.45, + "learning_rate": 9.27982496668984e-05, + "loss": 0.0006, + "step": 15316 + }, + { + "epoch": 65.47, + "learning_rate": 9.279222283512534e-05, + "loss": 0.0002, + "step": 15320 + }, + { + "epoch": 65.49, + "learning_rate": 9.278619367848818e-05, + "loss": 0.0008, + "step": 15324 + }, + { + "epoch": 65.5, + "learning_rate": 9.278016219731442e-05, + "loss": 0.0005, + "step": 15328 + }, + { + "epoch": 65.52, + "learning_rate": 9.277412839193183e-05, + "loss": 0.0011, + "step": 15332 + }, + { + "epoch": 65.54, + "learning_rate": 9.276809226266814e-05, + "loss": 0.0004, + "step": 15336 + }, + { + "epoch": 65.56, + "learning_rate": 9.276205380985134e-05, + "loss": 0.0008, + "step": 15340 + }, + { + "epoch": 65.57, + "learning_rate": 9.27560130338095e-05, + "loss": 0.0004, + "step": 15344 + }, + { + "epoch": 65.59, + "learning_rate": 9.274996993487074e-05, + "loss": 0.0003, + "step": 15348 + }, + { + "epoch": 65.61, + "learning_rate": 9.274392451336346e-05, + "loss": 0.0013, + "step": 15352 + }, + { + "epoch": 65.62, + "learning_rate": 9.273787676961607e-05, + "loss": 0.0002, + "step": 15356 + }, + { + "epoch": 65.64, + "learning_rate": 9.273182670395713e-05, + "loss": 0.0005, + "step": 15360 + }, + { + "epoch": 65.66, + "learning_rate": 9.272577431671534e-05, + "loss": 0.0003, + "step": 15364 + }, + { + "epoch": 65.68, + "learning_rate": 9.271971960821952e-05, + "loss": 0.0006, + "step": 15368 + }, + { + "epoch": 65.69, + "learning_rate": 9.271366257879861e-05, + "loss": 0.0004, + "step": 15372 + }, + { + "epoch": 65.71, + "learning_rate": 9.27076032287817e-05, + "loss": 0.0003, + "step": 15376 + }, + { + "epoch": 65.73, + "learning_rate": 9.270154155849799e-05, + "loss": 0.0013, + "step": 15380 + }, + { + "epoch": 65.74, + "learning_rate": 9.269547756827679e-05, + "loss": 0.001, + "step": 15384 + }, + { + "epoch": 65.76, + "learning_rate": 9.268941125844755e-05, + "loss": 0.0001, + "step": 15388 + }, + { + "epoch": 65.78, + "learning_rate": 9.268334262933986e-05, + "loss": 0.0002, + "step": 15392 + }, + { + "epoch": 65.79, + "learning_rate": 9.267727168128342e-05, + "loss": 0.0002, + "step": 15396 + }, + { + "epoch": 65.81, + "learning_rate": 9.267119841460805e-05, + "loss": 0.0002, + "step": 15400 + }, + { + "epoch": 65.83, + "learning_rate": 9.266512282964372e-05, + "loss": 0.0006, + "step": 15404 + }, + { + "epoch": 65.85, + "learning_rate": 9.26590449267205e-05, + "loss": 0.0001, + "step": 15408 + }, + { + "epoch": 65.86, + "learning_rate": 9.265296470616861e-05, + "loss": 0.0001, + "step": 15412 + }, + { + "epoch": 65.88, + "learning_rate": 9.264688216831836e-05, + "loss": 0.0002, + "step": 15416 + }, + { + "epoch": 65.9, + "learning_rate": 9.264079731350024e-05, + "loss": 0.0001, + "step": 15420 + }, + { + "epoch": 65.91, + "learning_rate": 9.263471014204481e-05, + "loss": 0.0001, + "step": 15424 + }, + { + "epoch": 65.93, + "learning_rate": 9.262862065428278e-05, + "loss": 0.0001, + "step": 15428 + }, + { + "epoch": 65.95, + "learning_rate": 9.2622528850545e-05, + "loss": 0.0001, + "step": 15432 + }, + { + "epoch": 65.97, + "learning_rate": 9.261643473116244e-05, + "loss": 0.0003, + "step": 15436 + }, + { + "epoch": 65.98, + "learning_rate": 9.261033829646617e-05, + "loss": 0.0002, + "step": 15440 + }, + { + "epoch": 66.0, + "learning_rate": 9.26042395467874e-05, + "loss": 0.0001, + "step": 15444 + }, + { + "epoch": 66.02, + "learning_rate": 9.259813848245749e-05, + "loss": 0.0004, + "step": 15448 + }, + { + "epoch": 66.03, + "learning_rate": 9.259203510380789e-05, + "loss": 0.0001, + "step": 15452 + }, + { + "epoch": 66.05, + "learning_rate": 9.25859294111702e-05, + "loss": 0.0008, + "step": 15456 + }, + { + "epoch": 66.07, + "learning_rate": 9.257982140487613e-05, + "loss": 0.0002, + "step": 15460 + }, + { + "epoch": 66.09, + "learning_rate": 9.257371108525753e-05, + "loss": 0.0001, + "step": 15464 + }, + { + "epoch": 66.1, + "learning_rate": 9.256759845264636e-05, + "loss": 0.0004, + "step": 15468 + }, + { + "epoch": 66.12, + "learning_rate": 9.256148350737472e-05, + "loss": 0.0002, + "step": 15472 + }, + { + "epoch": 66.14, + "learning_rate": 9.255536624977484e-05, + "loss": 0.0001, + "step": 15476 + }, + { + "epoch": 66.15, + "learning_rate": 9.254924668017903e-05, + "loss": 0.0004, + "step": 15480 + }, + { + "epoch": 66.17, + "learning_rate": 9.25431247989198e-05, + "loss": 0.0001, + "step": 15484 + }, + { + "epoch": 66.19, + "learning_rate": 9.253700060632972e-05, + "loss": 0.0005, + "step": 15488 + }, + { + "epoch": 66.21, + "learning_rate": 9.253087410274153e-05, + "loss": 0.0001, + "step": 15492 + }, + { + "epoch": 66.22, + "learning_rate": 9.252474528848807e-05, + "loss": 0.0001, + "step": 15496 + }, + { + "epoch": 66.24, + "learning_rate": 9.251861416390232e-05, + "loss": 0.0002, + "step": 15500 + }, + { + "epoch": 66.26, + "learning_rate": 9.251248072931737e-05, + "loss": 0.0005, + "step": 15504 + }, + { + "epoch": 66.27, + "learning_rate": 9.250634498506643e-05, + "loss": 0.0001, + "step": 15508 + }, + { + "epoch": 66.29, + "learning_rate": 9.25002069314829e-05, + "loss": 0.0011, + "step": 15512 + }, + { + "epoch": 66.31, + "learning_rate": 9.249406656890019e-05, + "loss": 0.0002, + "step": 15516 + }, + { + "epoch": 66.32, + "learning_rate": 9.248792389765193e-05, + "loss": 0.0, + "step": 15520 + }, + { + "epoch": 66.34, + "learning_rate": 9.248177891807186e-05, + "loss": 0.0002, + "step": 15524 + }, + { + "epoch": 66.36, + "learning_rate": 9.247563163049381e-05, + "loss": 0.0012, + "step": 15528 + }, + { + "epoch": 66.38, + "learning_rate": 9.246948203525176e-05, + "loss": 0.0001, + "step": 15532 + }, + { + "epoch": 66.39, + "learning_rate": 9.246333013267983e-05, + "loss": 0.0001, + "step": 15536 + }, + { + "epoch": 66.41, + "learning_rate": 9.245717592311225e-05, + "loss": 0.0003, + "step": 15540 + }, + { + "epoch": 66.43, + "learning_rate": 9.245101940688333e-05, + "loss": 0.0001, + "step": 15544 + }, + { + "epoch": 66.44, + "learning_rate": 9.24448605843276e-05, + "loss": 0.0014, + "step": 15548 + }, + { + "epoch": 66.46, + "learning_rate": 9.243869945577962e-05, + "loss": 0.0013, + "step": 15552 + }, + { + "epoch": 66.48, + "learning_rate": 9.243253602157413e-05, + "loss": 0.0001, + "step": 15556 + }, + { + "epoch": 66.5, + "learning_rate": 9.242637028204602e-05, + "loss": 0.0004, + "step": 15560 + }, + { + "epoch": 66.51, + "learning_rate": 9.242020223753023e-05, + "loss": 0.0001, + "step": 15564 + }, + { + "epoch": 66.53, + "learning_rate": 9.241403188836188e-05, + "loss": 0.0011, + "step": 15568 + }, + { + "epoch": 66.55, + "learning_rate": 9.240785923487619e-05, + "loss": 0.0002, + "step": 15572 + }, + { + "epoch": 66.56, + "learning_rate": 9.240168427740852e-05, + "loss": 0.0004, + "step": 15576 + }, + { + "epoch": 66.58, + "learning_rate": 9.239550701629436e-05, + "loss": 0.0001, + "step": 15580 + }, + { + "epoch": 66.6, + "learning_rate": 9.23893274518693e-05, + "loss": 0.0001, + "step": 15584 + }, + { + "epoch": 66.62, + "learning_rate": 9.238314558446907e-05, + "loss": 0.0012, + "step": 15588 + }, + { + "epoch": 66.63, + "learning_rate": 9.237696141442952e-05, + "loss": 0.0015, + "step": 15592 + }, + { + "epoch": 66.65, + "learning_rate": 9.237077494208668e-05, + "loss": 0.0014, + "step": 15596 + }, + { + "epoch": 66.67, + "learning_rate": 9.236458616777659e-05, + "loss": 0.0001, + "step": 15600 + }, + { + "epoch": 66.68, + "learning_rate": 9.235839509183551e-05, + "loss": 0.0001, + "step": 15604 + }, + { + "epoch": 66.7, + "learning_rate": 9.23522017145998e-05, + "loss": 0.0002, + "step": 15608 + }, + { + "epoch": 66.72, + "learning_rate": 9.234600603640594e-05, + "loss": 0.0013, + "step": 15612 + }, + { + "epoch": 66.74, + "learning_rate": 9.233980805759052e-05, + "loss": 0.0002, + "step": 15616 + }, + { + "epoch": 66.75, + "learning_rate": 9.23336077784903e-05, + "loss": 0.0004, + "step": 15620 + }, + { + "epoch": 66.77, + "learning_rate": 9.23274051994421e-05, + "loss": 0.0002, + "step": 15624 + }, + { + "epoch": 66.79, + "learning_rate": 9.232120032078292e-05, + "loss": 0.0003, + "step": 15628 + }, + { + "epoch": 66.8, + "learning_rate": 9.231499314284987e-05, + "loss": 0.0001, + "step": 15632 + }, + { + "epoch": 66.82, + "learning_rate": 9.230878366598017e-05, + "loss": 0.0001, + "step": 15636 + }, + { + "epoch": 66.84, + "learning_rate": 9.230257189051118e-05, + "loss": 0.0003, + "step": 15640 + }, + { + "epoch": 66.85, + "learning_rate": 9.229635781678038e-05, + "loss": 0.0003, + "step": 15644 + }, + { + "epoch": 66.87, + "learning_rate": 9.229014144512537e-05, + "loss": 0.0002, + "step": 15648 + }, + { + "epoch": 66.89, + "learning_rate": 9.22839227758839e-05, + "loss": 0.0004, + "step": 15652 + }, + { + "epoch": 66.91, + "learning_rate": 9.22777018093938e-05, + "loss": 0.0004, + "step": 15656 + }, + { + "epoch": 66.92, + "learning_rate": 9.227147854599306e-05, + "loss": 0.0017, + "step": 15660 + }, + { + "epoch": 66.94, + "learning_rate": 9.226525298601979e-05, + "loss": 0.0001, + "step": 15664 + }, + { + "epoch": 66.96, + "learning_rate": 9.225902512981221e-05, + "loss": 0.0003, + "step": 15668 + }, + { + "epoch": 66.97, + "learning_rate": 9.225279497770867e-05, + "loss": 0.0001, + "step": 15672 + }, + { + "epoch": 66.99, + "learning_rate": 9.224656253004766e-05, + "loss": 0.0015, + "step": 15676 + }, + { + "epoch": 67.01, + "learning_rate": 9.224032778716781e-05, + "loss": 0.0005, + "step": 15680 + }, + { + "epoch": 67.03, + "learning_rate": 9.223409074940777e-05, + "loss": 0.0001, + "step": 15684 + }, + { + "epoch": 67.04, + "learning_rate": 9.222785141710646e-05, + "loss": 0.0002, + "step": 15688 + }, + { + "epoch": 67.06, + "learning_rate": 9.222160979060286e-05, + "loss": 0.0003, + "step": 15692 + }, + { + "epoch": 67.08, + "learning_rate": 9.221536587023603e-05, + "loss": 0.0005, + "step": 15696 + }, + { + "epoch": 67.09, + "learning_rate": 9.220911965634521e-05, + "loss": 0.0008, + "step": 15700 + }, + { + "epoch": 67.11, + "learning_rate": 9.220287114926978e-05, + "loss": 0.0001, + "step": 15704 + }, + { + "epoch": 67.13, + "learning_rate": 9.219662034934917e-05, + "loss": 0.0, + "step": 15708 + }, + { + "epoch": 67.15, + "learning_rate": 9.219036725692302e-05, + "loss": 0.0002, + "step": 15712 + }, + { + "epoch": 67.16, + "learning_rate": 9.218411187233103e-05, + "loss": 0.0, + "step": 15716 + }, + { + "epoch": 67.18, + "learning_rate": 9.217785419591307e-05, + "loss": 0.0012, + "step": 15720 + }, + { + "epoch": 67.2, + "learning_rate": 9.217159422800909e-05, + "loss": 0.0002, + "step": 15724 + }, + { + "epoch": 67.21, + "learning_rate": 9.216533196895922e-05, + "loss": 0.0004, + "step": 15728 + }, + { + "epoch": 67.23, + "learning_rate": 9.215906741910365e-05, + "loss": 0.0005, + "step": 15732 + }, + { + "epoch": 67.25, + "learning_rate": 9.215280057878276e-05, + "loss": 0.0001, + "step": 15736 + }, + { + "epoch": 67.26, + "learning_rate": 9.214653144833699e-05, + "loss": 0.0001, + "step": 15740 + }, + { + "epoch": 67.28, + "learning_rate": 9.214026002810697e-05, + "loss": 0.0001, + "step": 15744 + }, + { + "epoch": 67.3, + "learning_rate": 9.213398631843337e-05, + "loss": 0.0005, + "step": 15748 + }, + { + "epoch": 67.32, + "learning_rate": 9.212771031965709e-05, + "loss": 0.0008, + "step": 15752 + }, + { + "epoch": 67.33, + "learning_rate": 9.212143203211907e-05, + "loss": 0.0003, + "step": 15756 + }, + { + "epoch": 67.35, + "learning_rate": 9.21151514561604e-05, + "loss": 0.0001, + "step": 15760 + }, + { + "epoch": 67.37, + "learning_rate": 9.210886859212233e-05, + "loss": 0.0001, + "step": 15764 + }, + { + "epoch": 67.38, + "learning_rate": 9.210258344034617e-05, + "loss": 0.0001, + "step": 15768 + }, + { + "epoch": 67.4, + "learning_rate": 9.209629600117341e-05, + "loss": 0.0001, + "step": 15772 + }, + { + "epoch": 67.42, + "learning_rate": 9.209000627494562e-05, + "loss": 0.0001, + "step": 15776 + }, + { + "epoch": 67.44, + "learning_rate": 9.208371426200453e-05, + "loss": 0.0002, + "step": 15780 + }, + { + "epoch": 67.45, + "learning_rate": 9.207741996269197e-05, + "loss": 0.0001, + "step": 15784 + }, + { + "epoch": 67.47, + "learning_rate": 9.207112337734989e-05, + "loss": 0.0014, + "step": 15788 + }, + { + "epoch": 67.49, + "learning_rate": 9.206482450632041e-05, + "loss": 0.0001, + "step": 15792 + }, + { + "epoch": 67.5, + "learning_rate": 9.205852334994574e-05, + "loss": 0.0001, + "step": 15796 + }, + { + "epoch": 67.52, + "learning_rate": 9.20522199085682e-05, + "loss": 0.0005, + "step": 15800 + }, + { + "epoch": 67.54, + "learning_rate": 9.204591418253023e-05, + "loss": 0.0004, + "step": 15804 + }, + { + "epoch": 67.56, + "learning_rate": 9.203960617217446e-05, + "loss": 0.0001, + "step": 15808 + }, + { + "epoch": 67.57, + "learning_rate": 9.203329587784359e-05, + "loss": 0.0003, + "step": 15812 + }, + { + "epoch": 67.59, + "learning_rate": 9.202698329988042e-05, + "loss": 0.0002, + "step": 15816 + }, + { + "epoch": 67.61, + "learning_rate": 9.202066843862794e-05, + "loss": 0.0003, + "step": 15820 + }, + { + "epoch": 67.62, + "learning_rate": 9.201435129442922e-05, + "loss": 0.0006, + "step": 15824 + }, + { + "epoch": 67.64, + "learning_rate": 9.200803186762745e-05, + "loss": 0.0001, + "step": 15828 + }, + { + "epoch": 67.66, + "learning_rate": 9.200171015856597e-05, + "loss": 0.0001, + "step": 15832 + }, + { + "epoch": 67.68, + "learning_rate": 9.199538616758827e-05, + "loss": 0.0003, + "step": 15836 + }, + { + "epoch": 67.69, + "learning_rate": 9.198905989503785e-05, + "loss": 0.0001, + "step": 15840 + }, + { + "epoch": 67.71, + "learning_rate": 9.198273134125848e-05, + "loss": 0.0005, + "step": 15844 + }, + { + "epoch": 67.73, + "learning_rate": 9.197640050659396e-05, + "loss": 0.0006, + "step": 15848 + }, + { + "epoch": 67.74, + "learning_rate": 9.197006739138822e-05, + "loss": 0.0003, + "step": 15852 + }, + { + "epoch": 67.76, + "learning_rate": 9.196373199598536e-05, + "loss": 0.0006, + "step": 15856 + }, + { + "epoch": 67.78, + "learning_rate": 9.195739432072955e-05, + "loss": 0.0002, + "step": 15860 + }, + { + "epoch": 67.79, + "learning_rate": 9.195105436596514e-05, + "loss": 0.0003, + "step": 15864 + }, + { + "epoch": 67.81, + "learning_rate": 9.194471213203655e-05, + "loss": 0.0001, + "step": 15868 + }, + { + "epoch": 67.83, + "learning_rate": 9.193836761928838e-05, + "loss": 0.0001, + "step": 15872 + }, + { + "epoch": 67.85, + "learning_rate": 9.193202082806528e-05, + "loss": 0.0003, + "step": 15876 + }, + { + "epoch": 67.86, + "learning_rate": 9.192567175871209e-05, + "loss": 0.0001, + "step": 15880 + }, + { + "epoch": 67.88, + "learning_rate": 9.191932041157375e-05, + "loss": 0.0001, + "step": 15884 + }, + { + "epoch": 67.9, + "learning_rate": 9.191296678699532e-05, + "loss": 0.0005, + "step": 15888 + }, + { + "epoch": 67.91, + "learning_rate": 9.190661088532198e-05, + "loss": 0.0006, + "step": 15892 + }, + { + "epoch": 67.93, + "learning_rate": 9.190025270689905e-05, + "loss": 0.0001, + "step": 15896 + }, + { + "epoch": 67.95, + "learning_rate": 9.189389225207196e-05, + "loss": 0.0001, + "step": 15900 + }, + { + "epoch": 67.97, + "learning_rate": 9.188752952118627e-05, + "loss": 0.0002, + "step": 15904 + }, + { + "epoch": 67.98, + "learning_rate": 9.188116451458765e-05, + "loss": 0.0024, + "step": 15908 + }, + { + "epoch": 68.0, + "learning_rate": 9.187479723262193e-05, + "loss": 0.0001, + "step": 15912 + }, + { + "epoch": 68.02, + "learning_rate": 9.186842767563501e-05, + "loss": 0.0005, + "step": 15916 + }, + { + "epoch": 68.03, + "learning_rate": 9.186205584397296e-05, + "loss": 0.0006, + "step": 15920 + }, + { + "epoch": 68.05, + "learning_rate": 9.185568173798195e-05, + "loss": 0.0001, + "step": 15924 + }, + { + "epoch": 68.07, + "learning_rate": 9.184930535800828e-05, + "loss": 0.0001, + "step": 15928 + }, + { + "epoch": 68.09, + "learning_rate": 9.184292670439839e-05, + "loss": 0.0004, + "step": 15932 + }, + { + "epoch": 68.1, + "learning_rate": 9.18365457774988e-05, + "loss": 0.0004, + "step": 15936 + }, + { + "epoch": 68.12, + "learning_rate": 9.183016257765619e-05, + "loss": 0.0001, + "step": 15940 + }, + { + "epoch": 68.14, + "learning_rate": 9.182377710521737e-05, + "loss": 0.0001, + "step": 15944 + }, + { + "epoch": 68.15, + "learning_rate": 9.181738936052924e-05, + "loss": 0.0001, + "step": 15948 + }, + { + "epoch": 68.17, + "learning_rate": 9.181099934393884e-05, + "loss": 0.0005, + "step": 15952 + }, + { + "epoch": 68.19, + "learning_rate": 9.180460705579333e-05, + "loss": 0.0003, + "step": 15956 + }, + { + "epoch": 68.21, + "learning_rate": 9.179821249644e-05, + "loss": 0.0001, + "step": 15960 + }, + { + "epoch": 68.22, + "learning_rate": 9.179181566622628e-05, + "loss": 0.0001, + "step": 15964 + }, + { + "epoch": 68.24, + "learning_rate": 9.178541656549971e-05, + "loss": 0.0001, + "step": 15968 + }, + { + "epoch": 68.26, + "learning_rate": 9.17790151946079e-05, + "loss": 0.0004, + "step": 15972 + }, + { + "epoch": 68.27, + "learning_rate": 9.177261155389864e-05, + "loss": 0.0006, + "step": 15976 + }, + { + "epoch": 68.29, + "learning_rate": 9.17662056437199e-05, + "loss": 0.001, + "step": 15980 + }, + { + "epoch": 68.31, + "learning_rate": 9.175979746441963e-05, + "loss": 0.0002, + "step": 15984 + }, + { + "epoch": 68.32, + "learning_rate": 9.175338701634601e-05, + "loss": 0.0002, + "step": 15988 + }, + { + "epoch": 68.34, + "learning_rate": 9.174697429984732e-05, + "loss": 0.0001, + "step": 15992 + }, + { + "epoch": 68.36, + "learning_rate": 9.174055931527197e-05, + "loss": 0.0008, + "step": 15996 + }, + { + "epoch": 68.38, + "learning_rate": 9.173414206296844e-05, + "loss": 0.0014, + "step": 16000 + }, + { + "epoch": 68.38, + "eval_exact_match": 0.5135135135135135, + "eval_loss": 0.9037817120552063, + "eval_runtime": 142.7035, + "eval_samples_per_second": 6.741, + "step": 16000 + }, + { + "epoch": 68.39, + "learning_rate": 9.17277225432854e-05, + "loss": 0.0002, + "step": 16004 + }, + { + "epoch": 68.41, + "learning_rate": 9.172130075657161e-05, + "loss": 0.0001, + "step": 16008 + }, + { + "epoch": 68.43, + "learning_rate": 9.171487670317598e-05, + "loss": 0.0015, + "step": 16012 + }, + { + "epoch": 68.44, + "learning_rate": 9.170845038344749e-05, + "loss": 0.0004, + "step": 16016 + }, + { + "epoch": 68.46, + "learning_rate": 9.17020217977353e-05, + "loss": 0.0005, + "step": 16020 + }, + { + "epoch": 68.48, + "learning_rate": 9.169559094638866e-05, + "loss": 0.0002, + "step": 16024 + }, + { + "epoch": 68.5, + "learning_rate": 9.168915782975696e-05, + "loss": 0.0001, + "step": 16028 + }, + { + "epoch": 68.51, + "learning_rate": 9.168272244818971e-05, + "loss": 0.0002, + "step": 16032 + }, + { + "epoch": 68.53, + "learning_rate": 9.167628480203652e-05, + "loss": 0.0009, + "step": 16036 + }, + { + "epoch": 68.55, + "learning_rate": 9.166984489164714e-05, + "loss": 0.0, + "step": 16040 + }, + { + "epoch": 68.56, + "learning_rate": 9.166340271737146e-05, + "loss": 0.0012, + "step": 16044 + }, + { + "epoch": 68.58, + "learning_rate": 9.16569582795595e-05, + "loss": 0.0001, + "step": 16048 + }, + { + "epoch": 68.6, + "learning_rate": 9.165051157856132e-05, + "loss": 0.0002, + "step": 16052 + }, + { + "epoch": 68.62, + "learning_rate": 9.164406261472721e-05, + "loss": 0.0001, + "step": 16056 + }, + { + "epoch": 68.63, + "learning_rate": 9.163761138840752e-05, + "loss": 0.0002, + "step": 16060 + }, + { + "epoch": 68.65, + "learning_rate": 9.163115789995276e-05, + "loss": 0.0002, + "step": 16064 + }, + { + "epoch": 68.67, + "learning_rate": 9.162470214971351e-05, + "loss": 0.002, + "step": 16068 + }, + { + "epoch": 68.68, + "learning_rate": 9.161824413804052e-05, + "loss": 0.0009, + "step": 16072 + }, + { + "epoch": 68.7, + "learning_rate": 9.161178386528467e-05, + "loss": 0.0001, + "step": 16076 + }, + { + "epoch": 68.72, + "learning_rate": 9.160532133179689e-05, + "loss": 0.0012, + "step": 16080 + }, + { + "epoch": 68.74, + "learning_rate": 9.159885653792834e-05, + "loss": 0.0001, + "step": 16084 + }, + { + "epoch": 68.75, + "learning_rate": 9.159238948403022e-05, + "loss": 0.0001, + "step": 16088 + }, + { + "epoch": 68.77, + "learning_rate": 9.158592017045387e-05, + "loss": 0.0002, + "step": 16092 + }, + { + "epoch": 68.79, + "learning_rate": 9.157944859755076e-05, + "loss": 0.0003, + "step": 16096 + }, + { + "epoch": 68.8, + "learning_rate": 9.157297476567253e-05, + "loss": 0.0002, + "step": 16100 + }, + { + "epoch": 68.82, + "learning_rate": 9.156649867517084e-05, + "loss": 0.0002, + "step": 16104 + }, + { + "epoch": 68.84, + "learning_rate": 9.156002032639757e-05, + "loss": 0.0007, + "step": 16108 + }, + { + "epoch": 68.85, + "learning_rate": 9.155353971970466e-05, + "loss": 0.0001, + "step": 16112 + }, + { + "epoch": 68.87, + "learning_rate": 9.15470568554442e-05, + "loss": 0.0042, + "step": 16116 + }, + { + "epoch": 68.89, + "learning_rate": 9.154057173396842e-05, + "loss": 0.0009, + "step": 16120 + }, + { + "epoch": 68.91, + "learning_rate": 9.153408435562961e-05, + "loss": 0.0009, + "step": 16124 + }, + { + "epoch": 68.92, + "learning_rate": 9.152759472078027e-05, + "loss": 0.0022, + "step": 16128 + }, + { + "epoch": 68.94, + "learning_rate": 9.152110282977293e-05, + "loss": 0.0005, + "step": 16132 + }, + { + "epoch": 68.96, + "learning_rate": 9.151460868296034e-05, + "loss": 0.0013, + "step": 16136 + }, + { + "epoch": 68.97, + "learning_rate": 9.150811228069527e-05, + "loss": 0.0002, + "step": 16140 + }, + { + "epoch": 68.99, + "learning_rate": 9.15016136233307e-05, + "loss": 0.0009, + "step": 16144 + }, + { + "epoch": 69.01, + "learning_rate": 9.149511271121967e-05, + "loss": 0.0008, + "step": 16148 + }, + { + "epoch": 69.03, + "learning_rate": 9.14886095447154e-05, + "loss": 0.0001, + "step": 16152 + }, + { + "epoch": 69.04, + "learning_rate": 9.148210412417117e-05, + "loss": 0.0003, + "step": 16156 + }, + { + "epoch": 69.06, + "learning_rate": 9.147559644994044e-05, + "loss": 0.0031, + "step": 16160 + }, + { + "epoch": 69.08, + "learning_rate": 9.146908652237676e-05, + "loss": 0.0001, + "step": 16164 + }, + { + "epoch": 69.09, + "learning_rate": 9.146257434183379e-05, + "loss": 0.0004, + "step": 16168 + }, + { + "epoch": 69.11, + "learning_rate": 9.145605990866534e-05, + "loss": 0.0001, + "step": 16172 + }, + { + "epoch": 69.13, + "learning_rate": 9.144954322322534e-05, + "loss": 0.0001, + "step": 16176 + }, + { + "epoch": 69.15, + "learning_rate": 9.144302428586783e-05, + "loss": 0.0004, + "step": 16180 + }, + { + "epoch": 69.16, + "learning_rate": 9.143650309694699e-05, + "loss": 0.0001, + "step": 16184 + }, + { + "epoch": 69.18, + "learning_rate": 9.14299796568171e-05, + "loss": 0.0002, + "step": 16188 + }, + { + "epoch": 69.2, + "learning_rate": 9.142345396583257e-05, + "loss": 0.0007, + "step": 16192 + }, + { + "epoch": 69.21, + "learning_rate": 9.141692602434795e-05, + "loss": 0.0001, + "step": 16196 + }, + { + "epoch": 69.23, + "learning_rate": 9.141039583271787e-05, + "loss": 0.0001, + "step": 16200 + }, + { + "epoch": 69.25, + "learning_rate": 9.140386339129714e-05, + "loss": 0.0018, + "step": 16204 + }, + { + "epoch": 69.26, + "learning_rate": 9.139732870044064e-05, + "loss": 0.0001, + "step": 16208 + }, + { + "epoch": 69.28, + "learning_rate": 9.139079176050341e-05, + "loss": 0.0004, + "step": 16212 + }, + { + "epoch": 69.3, + "learning_rate": 9.138425257184059e-05, + "loss": 0.0019, + "step": 16216 + }, + { + "epoch": 69.32, + "learning_rate": 9.137771113480744e-05, + "loss": 0.0013, + "step": 16220 + }, + { + "epoch": 69.33, + "learning_rate": 9.137116744975937e-05, + "loss": 0.0011, + "step": 16224 + }, + { + "epoch": 69.35, + "learning_rate": 9.136462151705187e-05, + "loss": 0.0002, + "step": 16228 + }, + { + "epoch": 69.37, + "learning_rate": 9.135807333704058e-05, + "loss": 0.0001, + "step": 16232 + }, + { + "epoch": 69.38, + "learning_rate": 9.135152291008128e-05, + "loss": 0.0001, + "step": 16236 + }, + { + "epoch": 69.4, + "learning_rate": 9.134497023652982e-05, + "loss": 0.0002, + "step": 16240 + }, + { + "epoch": 69.42, + "learning_rate": 9.13384153167422e-05, + "loss": 0.0001, + "step": 16244 + }, + { + "epoch": 69.44, + "learning_rate": 9.133185815107458e-05, + "loss": 0.0001, + "step": 16248 + }, + { + "epoch": 69.45, + "learning_rate": 9.132529873988317e-05, + "loss": 0.0, + "step": 16252 + }, + { + "epoch": 69.47, + "learning_rate": 9.131873708352434e-05, + "loss": 0.0017, + "step": 16256 + }, + { + "epoch": 69.49, + "learning_rate": 9.13121731823546e-05, + "loss": 0.0003, + "step": 16260 + }, + { + "epoch": 69.5, + "learning_rate": 9.130560703673054e-05, + "loss": 0.0002, + "step": 16264 + }, + { + "epoch": 69.52, + "learning_rate": 9.12990386470089e-05, + "loss": 0.0001, + "step": 16268 + }, + { + "epoch": 69.54, + "learning_rate": 9.129246801354654e-05, + "loss": 0.0002, + "step": 16272 + }, + { + "epoch": 69.56, + "learning_rate": 9.128589513670043e-05, + "loss": 0.0001, + "step": 16276 + }, + { + "epoch": 69.57, + "learning_rate": 9.127932001682766e-05, + "loss": 0.0003, + "step": 16280 + }, + { + "epoch": 69.59, + "learning_rate": 9.127274265428548e-05, + "loss": 0.0001, + "step": 16284 + }, + { + "epoch": 69.61, + "learning_rate": 9.12661630494312e-05, + "loss": 0.0004, + "step": 16288 + }, + { + "epoch": 69.62, + "learning_rate": 9.12595812026223e-05, + "loss": 0.0006, + "step": 16292 + }, + { + "epoch": 69.64, + "learning_rate": 9.125299711421638e-05, + "loss": 0.0001, + "step": 16296 + }, + { + "epoch": 69.66, + "learning_rate": 9.12464107845711e-05, + "loss": 0.0002, + "step": 16300 + }, + { + "epoch": 69.68, + "learning_rate": 9.123982221404435e-05, + "loss": 0.0001, + "step": 16304 + }, + { + "epoch": 69.69, + "learning_rate": 9.123323140299404e-05, + "loss": 0.0002, + "step": 16308 + }, + { + "epoch": 69.71, + "learning_rate": 9.122663835177825e-05, + "loss": 0.001, + "step": 16312 + }, + { + "epoch": 69.73, + "learning_rate": 9.12200430607552e-05, + "loss": 0.0004, + "step": 16316 + }, + { + "epoch": 69.74, + "learning_rate": 9.121344553028317e-05, + "loss": 0.0001, + "step": 16320 + }, + { + "epoch": 69.76, + "learning_rate": 9.120684576072062e-05, + "loss": 0.0017, + "step": 16324 + }, + { + "epoch": 69.78, + "learning_rate": 9.12002437524261e-05, + "loss": 0.0008, + "step": 16328 + }, + { + "epoch": 69.79, + "learning_rate": 9.11936395057583e-05, + "loss": 0.0001, + "step": 16332 + }, + { + "epoch": 69.81, + "learning_rate": 9.118703302107603e-05, + "loss": 0.0006, + "step": 16336 + }, + { + "epoch": 69.83, + "learning_rate": 9.118042429873818e-05, + "loss": 0.0005, + "step": 16340 + }, + { + "epoch": 69.85, + "learning_rate": 9.117381333910385e-05, + "loss": 0.0001, + "step": 16344 + }, + { + "epoch": 69.86, + "learning_rate": 9.116720014253215e-05, + "loss": 0.0001, + "step": 16348 + }, + { + "epoch": 69.88, + "learning_rate": 9.116058470938241e-05, + "loss": 0.0021, + "step": 16352 + }, + { + "epoch": 69.9, + "learning_rate": 9.115396704001401e-05, + "loss": 0.0004, + "step": 16356 + }, + { + "epoch": 69.91, + "learning_rate": 9.11473471347865e-05, + "loss": 0.0001, + "step": 16360 + }, + { + "epoch": 69.93, + "learning_rate": 9.114072499405955e-05, + "loss": 0.0002, + "step": 16364 + }, + { + "epoch": 69.95, + "learning_rate": 9.113410061819292e-05, + "loss": 0.0001, + "step": 16368 + }, + { + "epoch": 69.97, + "learning_rate": 9.112747400754648e-05, + "loss": 0.0001, + "step": 16372 + }, + { + "epoch": 69.98, + "learning_rate": 9.112084516248029e-05, + "loss": 0.0002, + "step": 16376 + }, + { + "epoch": 70.0, + "learning_rate": 9.111421408335447e-05, + "loss": 0.0002, + "step": 16380 + }, + { + "epoch": 70.02, + "learning_rate": 9.11075807705293e-05, + "loss": 0.0007, + "step": 16384 + }, + { + "epoch": 70.03, + "learning_rate": 9.110094522436512e-05, + "loss": 0.0002, + "step": 16388 + }, + { + "epoch": 70.05, + "learning_rate": 9.109430744522247e-05, + "loss": 0.0017, + "step": 16392 + }, + { + "epoch": 70.07, + "learning_rate": 9.108766743346195e-05, + "loss": 0.0003, + "step": 16396 + }, + { + "epoch": 70.09, + "learning_rate": 9.108102518944433e-05, + "loss": 0.0001, + "step": 16400 + }, + { + "epoch": 70.1, + "learning_rate": 9.107438071353047e-05, + "loss": 0.0004, + "step": 16404 + }, + { + "epoch": 70.12, + "learning_rate": 9.106773400608135e-05, + "loss": 0.0003, + "step": 16408 + }, + { + "epoch": 70.14, + "learning_rate": 9.106108506745807e-05, + "loss": 0.0002, + "step": 16412 + }, + { + "epoch": 70.15, + "learning_rate": 9.105443389802191e-05, + "loss": 0.0003, + "step": 16416 + }, + { + "epoch": 70.17, + "learning_rate": 9.104778049813417e-05, + "loss": 0.0001, + "step": 16420 + }, + { + "epoch": 70.19, + "learning_rate": 9.104112486815633e-05, + "loss": 0.0019, + "step": 16424 + }, + { + "epoch": 70.21, + "learning_rate": 9.103446700845e-05, + "loss": 0.0001, + "step": 16428 + }, + { + "epoch": 70.22, + "learning_rate": 9.102780691937689e-05, + "loss": 0.0015, + "step": 16432 + }, + { + "epoch": 70.24, + "learning_rate": 9.102114460129885e-05, + "loss": 0.0001, + "step": 16436 + }, + { + "epoch": 70.26, + "learning_rate": 9.101448005457783e-05, + "loss": 0.0002, + "step": 16440 + }, + { + "epoch": 70.27, + "learning_rate": 9.100781327957588e-05, + "loss": 0.0001, + "step": 16444 + }, + { + "epoch": 70.29, + "learning_rate": 9.100114427665524e-05, + "loss": 0.0001, + "step": 16448 + }, + { + "epoch": 70.31, + "learning_rate": 9.099447304617823e-05, + "loss": 0.0001, + "step": 16452 + }, + { + "epoch": 70.32, + "learning_rate": 9.098779958850728e-05, + "loss": 0.0001, + "step": 16456 + }, + { + "epoch": 70.34, + "learning_rate": 9.098112390400492e-05, + "loss": 0.0012, + "step": 16460 + }, + { + "epoch": 70.36, + "learning_rate": 9.097444599303389e-05, + "loss": 0.0002, + "step": 16464 + }, + { + "epoch": 70.38, + "learning_rate": 9.096776585595697e-05, + "loss": 0.0001, + "step": 16468 + }, + { + "epoch": 70.39, + "learning_rate": 9.096108349313707e-05, + "loss": 0.0006, + "step": 16472 + }, + { + "epoch": 70.41, + "learning_rate": 9.095439890493728e-05, + "loss": 0.0002, + "step": 16476 + }, + { + "epoch": 70.43, + "learning_rate": 9.094771209172072e-05, + "loss": 0.0003, + "step": 16480 + }, + { + "epoch": 70.44, + "learning_rate": 9.094102305385072e-05, + "loss": 0.0003, + "step": 16484 + }, + { + "epoch": 70.46, + "learning_rate": 9.093433179169065e-05, + "loss": 0.0002, + "step": 16488 + }, + { + "epoch": 70.48, + "learning_rate": 9.092763830560407e-05, + "loss": 0.0002, + "step": 16492 + }, + { + "epoch": 70.5, + "learning_rate": 9.092094259595462e-05, + "loss": 0.0001, + "step": 16496 + }, + { + "epoch": 70.51, + "learning_rate": 9.091424466310608e-05, + "loss": 0.0002, + "step": 16500 + }, + { + "epoch": 70.53, + "learning_rate": 9.090754450742233e-05, + "loss": 0.0001, + "step": 16504 + }, + { + "epoch": 70.55, + "learning_rate": 9.090084212926739e-05, + "loss": 0.0001, + "step": 16508 + }, + { + "epoch": 70.56, + "learning_rate": 9.08941375290054e-05, + "loss": 0.0002, + "step": 16512 + }, + { + "epoch": 70.58, + "learning_rate": 9.088743070700061e-05, + "loss": 0.0001, + "step": 16516 + }, + { + "epoch": 70.6, + "learning_rate": 9.088072166361738e-05, + "loss": 0.0001, + "step": 16520 + }, + { + "epoch": 70.62, + "learning_rate": 9.087401039922025e-05, + "loss": 0.0002, + "step": 16524 + }, + { + "epoch": 70.63, + "learning_rate": 9.08672969141738e-05, + "loss": 0.0001, + "step": 16528 + }, + { + "epoch": 70.65, + "learning_rate": 9.086058120884275e-05, + "loss": 0.0005, + "step": 16532 + }, + { + "epoch": 70.67, + "learning_rate": 9.085386328359202e-05, + "loss": 0.0001, + "step": 16536 + }, + { + "epoch": 70.68, + "learning_rate": 9.084714313878653e-05, + "loss": 0.0007, + "step": 16540 + }, + { + "epoch": 70.7, + "learning_rate": 9.084042077479142e-05, + "loss": 0.0003, + "step": 16544 + }, + { + "epoch": 70.72, + "learning_rate": 9.083369619197189e-05, + "loss": 0.0, + "step": 16548 + }, + { + "epoch": 70.74, + "learning_rate": 9.082696939069329e-05, + "loss": 0.0001, + "step": 16552 + }, + { + "epoch": 70.75, + "learning_rate": 9.082024037132106e-05, + "loss": 0.0005, + "step": 16556 + }, + { + "epoch": 70.77, + "learning_rate": 9.08135091342208e-05, + "loss": 0.0002, + "step": 16560 + }, + { + "epoch": 70.79, + "learning_rate": 9.080677567975823e-05, + "loss": 0.0002, + "step": 16564 + }, + { + "epoch": 70.8, + "learning_rate": 9.080004000829913e-05, + "loss": 0.0002, + "step": 16568 + }, + { + "epoch": 70.82, + "learning_rate": 9.079330212020949e-05, + "loss": 0.0001, + "step": 16572 + }, + { + "epoch": 70.84, + "learning_rate": 9.078656201585533e-05, + "loss": 0.0001, + "step": 16576 + }, + { + "epoch": 70.85, + "learning_rate": 9.077981969560285e-05, + "loss": 0.0012, + "step": 16580 + }, + { + "epoch": 70.87, + "learning_rate": 9.077307515981837e-05, + "loss": 0.0005, + "step": 16584 + }, + { + "epoch": 70.89, + "learning_rate": 9.07663284088683e-05, + "loss": 0.0001, + "step": 16588 + }, + { + "epoch": 70.91, + "learning_rate": 9.07595794431192e-05, + "loss": 0.0007, + "step": 16592 + }, + { + "epoch": 70.92, + "learning_rate": 9.075282826293769e-05, + "loss": 0.0006, + "step": 16596 + }, + { + "epoch": 70.94, + "learning_rate": 9.074607486869062e-05, + "loss": 0.0001, + "step": 16600 + }, + { + "epoch": 70.96, + "learning_rate": 9.073931926074486e-05, + "loss": 0.0003, + "step": 16604 + }, + { + "epoch": 70.97, + "learning_rate": 9.073256143946742e-05, + "loss": 0.0008, + "step": 16608 + }, + { + "epoch": 70.99, + "learning_rate": 9.072580140522549e-05, + "loss": 0.0003, + "step": 16612 + }, + { + "epoch": 71.01, + "learning_rate": 9.071903915838631e-05, + "loss": 0.0003, + "step": 16616 + }, + { + "epoch": 71.03, + "learning_rate": 9.071227469931727e-05, + "loss": 0.0003, + "step": 16620 + }, + { + "epoch": 71.04, + "learning_rate": 9.070550802838587e-05, + "loss": 0.0001, + "step": 16624 + }, + { + "epoch": 71.06, + "learning_rate": 9.069873914595975e-05, + "loss": 0.0001, + "step": 16628 + }, + { + "epoch": 71.08, + "learning_rate": 9.069196805240666e-05, + "loss": 0.0016, + "step": 16632 + }, + { + "epoch": 71.09, + "learning_rate": 9.068519474809446e-05, + "loss": 0.0001, + "step": 16636 + }, + { + "epoch": 71.11, + "learning_rate": 9.067841923339113e-05, + "loss": 0.0001, + "step": 16640 + }, + { + "epoch": 71.13, + "learning_rate": 9.06716415086648e-05, + "loss": 0.0001, + "step": 16644 + }, + { + "epoch": 71.15, + "learning_rate": 9.066486157428368e-05, + "loss": 0.0003, + "step": 16648 + }, + { + "epoch": 71.16, + "learning_rate": 9.065807943061612e-05, + "loss": 0.0004, + "step": 16652 + }, + { + "epoch": 71.18, + "learning_rate": 9.065129507803058e-05, + "loss": 0.0003, + "step": 16656 + }, + { + "epoch": 71.2, + "learning_rate": 9.064450851689567e-05, + "loss": 0.0007, + "step": 16660 + }, + { + "epoch": 71.21, + "learning_rate": 9.063771974758009e-05, + "loss": 0.0005, + "step": 16664 + }, + { + "epoch": 71.23, + "learning_rate": 9.063092877045265e-05, + "loss": 0.0034, + "step": 16668 + }, + { + "epoch": 71.25, + "learning_rate": 9.062413558588232e-05, + "loss": 0.0002, + "step": 16672 + }, + { + "epoch": 71.26, + "learning_rate": 9.061734019423817e-05, + "loss": 0.0001, + "step": 16676 + }, + { + "epoch": 71.28, + "learning_rate": 9.061054259588936e-05, + "loss": 0.0001, + "step": 16680 + }, + { + "epoch": 71.3, + "learning_rate": 9.060374279120523e-05, + "loss": 0.0001, + "step": 16684 + }, + { + "epoch": 71.32, + "learning_rate": 9.059694078055517e-05, + "loss": 0.0001, + "step": 16688 + }, + { + "epoch": 71.33, + "learning_rate": 9.059013656430877e-05, + "loss": 0.0001, + "step": 16692 + }, + { + "epoch": 71.35, + "learning_rate": 9.058333014283566e-05, + "loss": 0.0001, + "step": 16696 + }, + { + "epoch": 71.37, + "learning_rate": 9.057652151650566e-05, + "loss": 0.0013, + "step": 16700 + }, + { + "epoch": 71.38, + "learning_rate": 9.056971068568866e-05, + "loss": 0.0001, + "step": 16704 + }, + { + "epoch": 71.4, + "learning_rate": 9.056289765075468e-05, + "loss": 0.0003, + "step": 16708 + }, + { + "epoch": 71.42, + "learning_rate": 9.055608241207387e-05, + "loss": 0.0003, + "step": 16712 + }, + { + "epoch": 71.44, + "learning_rate": 9.054926497001651e-05, + "loss": 0.0001, + "step": 16716 + }, + { + "epoch": 71.45, + "learning_rate": 9.054244532495297e-05, + "loss": 0.0001, + "step": 16720 + }, + { + "epoch": 71.47, + "learning_rate": 9.053562347725377e-05, + "loss": 0.0003, + "step": 16724 + }, + { + "epoch": 71.49, + "learning_rate": 9.05287994272895e-05, + "loss": 0.0001, + "step": 16728 + }, + { + "epoch": 71.5, + "learning_rate": 9.052197317543096e-05, + "loss": 0.0008, + "step": 16732 + }, + { + "epoch": 71.52, + "learning_rate": 9.051514472204896e-05, + "loss": 0.002, + "step": 16736 + }, + { + "epoch": 71.54, + "learning_rate": 9.050831406751452e-05, + "loss": 0.0001, + "step": 16740 + }, + { + "epoch": 71.56, + "learning_rate": 9.050148121219873e-05, + "loss": 0.0001, + "step": 16744 + }, + { + "epoch": 71.57, + "learning_rate": 9.04946461564728e-05, + "loss": 0.0002, + "step": 16748 + }, + { + "epoch": 71.59, + "learning_rate": 9.048780890070811e-05, + "loss": 0.0007, + "step": 16752 + }, + { + "epoch": 71.61, + "learning_rate": 9.048096944527609e-05, + "loss": 0.0001, + "step": 16756 + }, + { + "epoch": 71.62, + "learning_rate": 9.047412779054834e-05, + "loss": 0.0001, + "step": 16760 + }, + { + "epoch": 71.64, + "learning_rate": 9.046728393689654e-05, + "loss": 0.0007, + "step": 16764 + }, + { + "epoch": 71.66, + "learning_rate": 9.046043788469252e-05, + "loss": 0.0005, + "step": 16768 + }, + { + "epoch": 71.68, + "learning_rate": 9.045358963430824e-05, + "loss": 0.0006, + "step": 16772 + }, + { + "epoch": 71.69, + "learning_rate": 9.044673918611573e-05, + "loss": 0.0002, + "step": 16776 + }, + { + "epoch": 71.71, + "learning_rate": 9.043988654048719e-05, + "loss": 0.0004, + "step": 16780 + }, + { + "epoch": 71.73, + "learning_rate": 9.04330316977949e-05, + "loss": 0.0006, + "step": 16784 + }, + { + "epoch": 71.74, + "learning_rate": 9.042617465841127e-05, + "loss": 0.0009, + "step": 16788 + }, + { + "epoch": 71.76, + "learning_rate": 9.041931542270888e-05, + "loss": 0.0001, + "step": 16792 + }, + { + "epoch": 71.78, + "learning_rate": 9.041245399106036e-05, + "loss": 0.0003, + "step": 16796 + }, + { + "epoch": 71.79, + "learning_rate": 9.040559036383848e-05, + "loss": 0.0001, + "step": 16800 + }, + { + "epoch": 71.81, + "learning_rate": 9.039872454141613e-05, + "loss": 0.0001, + "step": 16804 + }, + { + "epoch": 71.83, + "learning_rate": 9.039185652416635e-05, + "loss": 0.0002, + "step": 16808 + }, + { + "epoch": 71.85, + "learning_rate": 9.038498631246227e-05, + "loss": 0.0003, + "step": 16812 + }, + { + "epoch": 71.86, + "learning_rate": 9.037811390667711e-05, + "loss": 0.0002, + "step": 16816 + }, + { + "epoch": 71.88, + "learning_rate": 9.037123930718426e-05, + "loss": 0.0001, + "step": 16820 + }, + { + "epoch": 71.9, + "learning_rate": 9.036436251435723e-05, + "loss": 0.0001, + "step": 16824 + }, + { + "epoch": 71.91, + "learning_rate": 9.03574835285696e-05, + "loss": 0.0002, + "step": 16828 + }, + { + "epoch": 71.93, + "learning_rate": 9.035060235019513e-05, + "loss": 0.0001, + "step": 16832 + }, + { + "epoch": 71.95, + "learning_rate": 9.034371897960763e-05, + "loss": 0.0011, + "step": 16836 + }, + { + "epoch": 71.97, + "learning_rate": 9.033683341718112e-05, + "loss": 0.0001, + "step": 16840 + }, + { + "epoch": 71.98, + "learning_rate": 9.032994566328963e-05, + "loss": 0.0005, + "step": 16844 + }, + { + "epoch": 72.0, + "learning_rate": 9.032305571830739e-05, + "loss": 0.0001, + "step": 16848 + }, + { + "epoch": 72.02, + "learning_rate": 9.031616358260873e-05, + "loss": 0.0006, + "step": 16852 + }, + { + "epoch": 72.03, + "learning_rate": 9.03092692565681e-05, + "loss": 0.0006, + "step": 16856 + }, + { + "epoch": 72.05, + "learning_rate": 9.030237274056003e-05, + "loss": 0.0001, + "step": 16860 + }, + { + "epoch": 72.07, + "learning_rate": 9.029547403495925e-05, + "loss": 0.0001, + "step": 16864 + }, + { + "epoch": 72.09, + "learning_rate": 9.028857314014052e-05, + "loss": 0.0001, + "step": 16868 + }, + { + "epoch": 72.1, + "learning_rate": 9.028167005647878e-05, + "loss": 0.0001, + "step": 16872 + }, + { + "epoch": 72.12, + "learning_rate": 9.027476478434906e-05, + "loss": 0.001, + "step": 16876 + }, + { + "epoch": 72.14, + "learning_rate": 9.026785732412652e-05, + "loss": 0.0002, + "step": 16880 + }, + { + "epoch": 72.15, + "learning_rate": 9.026094767618645e-05, + "loss": 0.0001, + "step": 16884 + }, + { + "epoch": 72.17, + "learning_rate": 9.025403584090421e-05, + "loss": 0.0007, + "step": 16888 + }, + { + "epoch": 72.19, + "learning_rate": 9.024712181865536e-05, + "loss": 0.0001, + "step": 16892 + }, + { + "epoch": 72.21, + "learning_rate": 9.02402056098155e-05, + "loss": 0.0001, + "step": 16896 + }, + { + "epoch": 72.22, + "learning_rate": 9.023328721476037e-05, + "loss": 0.0006, + "step": 16900 + }, + { + "epoch": 72.24, + "learning_rate": 9.022636663386587e-05, + "loss": 0.004, + "step": 16904 + }, + { + "epoch": 72.26, + "learning_rate": 9.0219443867508e-05, + "loss": 0.0001, + "step": 16908 + }, + { + "epoch": 72.27, + "learning_rate": 9.021251891606284e-05, + "loss": 0.0001, + "step": 16912 + }, + { + "epoch": 72.29, + "learning_rate": 9.020559177990662e-05, + "loss": 0.0001, + "step": 16916 + }, + { + "epoch": 72.31, + "learning_rate": 9.019866245941569e-05, + "loss": 0.0001, + "step": 16920 + }, + { + "epoch": 72.32, + "learning_rate": 9.01917309549665e-05, + "loss": 0.0001, + "step": 16924 + }, + { + "epoch": 72.34, + "learning_rate": 9.018479726693565e-05, + "loss": 0.0001, + "step": 16928 + }, + { + "epoch": 72.36, + "learning_rate": 9.017786139569985e-05, + "loss": 0.0001, + "step": 16932 + }, + { + "epoch": 72.38, + "learning_rate": 9.017092334163591e-05, + "loss": 0.0001, + "step": 16936 + }, + { + "epoch": 72.39, + "learning_rate": 9.016398310512075e-05, + "loss": 0.0001, + "step": 16940 + }, + { + "epoch": 72.41, + "learning_rate": 9.015704068653144e-05, + "loss": 0.0002, + "step": 16944 + }, + { + "epoch": 72.43, + "learning_rate": 9.015009608624516e-05, + "loss": 0.0001, + "step": 16948 + }, + { + "epoch": 72.44, + "learning_rate": 9.01431493046392e-05, + "loss": 0.0001, + "step": 16952 + }, + { + "epoch": 72.46, + "learning_rate": 9.0136200342091e-05, + "loss": 0.0006, + "step": 16956 + }, + { + "epoch": 72.48, + "learning_rate": 9.012924919897805e-05, + "loss": 0.0003, + "step": 16960 + }, + { + "epoch": 72.5, + "learning_rate": 9.0122295875678e-05, + "loss": 0.0002, + "step": 16964 + }, + { + "epoch": 72.51, + "learning_rate": 9.011534037256866e-05, + "loss": 0.0002, + "step": 16968 + }, + { + "epoch": 72.53, + "learning_rate": 9.010838269002787e-05, + "loss": 0.0004, + "step": 16972 + }, + { + "epoch": 72.55, + "learning_rate": 9.010142282843365e-05, + "loss": 0.0009, + "step": 16976 + }, + { + "epoch": 72.56, + "learning_rate": 9.009446078816414e-05, + "loss": 0.0002, + "step": 16980 + }, + { + "epoch": 72.58, + "learning_rate": 9.008749656959758e-05, + "loss": 0.0004, + "step": 16984 + }, + { + "epoch": 72.6, + "learning_rate": 9.00805301731123e-05, + "loss": 0.0, + "step": 16988 + }, + { + "epoch": 72.62, + "learning_rate": 9.00735615990868e-05, + "loss": 0.0005, + "step": 16992 + }, + { + "epoch": 72.63, + "learning_rate": 9.006659084789967e-05, + "loss": 0.0002, + "step": 16996 + }, + { + "epoch": 72.65, + "learning_rate": 9.005961791992965e-05, + "loss": 0.0018, + "step": 17000 + }, + { + "epoch": 72.65, + "eval_exact_match": 0.5197505197505198, + "eval_loss": 0.90228271484375, + "eval_runtime": 140.6077, + "eval_samples_per_second": 6.842, + "step": 17000 + }, + { + "epoch": 72.67, + "learning_rate": 9.005264281555554e-05, + "loss": 0.0001, + "step": 17004 + }, + { + "epoch": 72.68, + "learning_rate": 9.004566553515629e-05, + "loss": 0.0001, + "step": 17008 + }, + { + "epoch": 72.7, + "learning_rate": 9.0038686079111e-05, + "loss": 0.0002, + "step": 17012 + }, + { + "epoch": 72.72, + "learning_rate": 9.003170444779882e-05, + "loss": 0.0006, + "step": 17016 + }, + { + "epoch": 72.74, + "learning_rate": 9.00247206415991e-05, + "loss": 0.0011, + "step": 17020 + }, + { + "epoch": 72.75, + "learning_rate": 9.001773466089123e-05, + "loss": 0.0013, + "step": 17024 + }, + { + "epoch": 72.77, + "learning_rate": 9.001074650605477e-05, + "loss": 0.0001, + "step": 17028 + }, + { + "epoch": 72.79, + "learning_rate": 9.000375617746937e-05, + "loss": 0.0012, + "step": 17032 + }, + { + "epoch": 72.8, + "learning_rate": 8.999676367551479e-05, + "loss": 0.0004, + "step": 17036 + }, + { + "epoch": 72.82, + "learning_rate": 8.998976900057097e-05, + "loss": 0.0001, + "step": 17040 + }, + { + "epoch": 72.84, + "learning_rate": 8.99827721530179e-05, + "loss": 0.0001, + "step": 17044 + }, + { + "epoch": 72.85, + "learning_rate": 8.997577313323571e-05, + "loss": 0.0001, + "step": 17048 + }, + { + "epoch": 72.87, + "learning_rate": 8.996877194160466e-05, + "loss": 0.0002, + "step": 17052 + }, + { + "epoch": 72.89, + "learning_rate": 8.996176857850511e-05, + "loss": 0.0001, + "step": 17056 + }, + { + "epoch": 72.91, + "learning_rate": 8.995476304431756e-05, + "loss": 0.0002, + "step": 17060 + }, + { + "epoch": 72.92, + "learning_rate": 8.99477553394226e-05, + "loss": 0.0008, + "step": 17064 + }, + { + "epoch": 72.94, + "learning_rate": 8.994074546420096e-05, + "loss": 0.0001, + "step": 17068 + }, + { + "epoch": 72.96, + "learning_rate": 8.993373341903348e-05, + "loss": 0.0, + "step": 17072 + }, + { + "epoch": 72.97, + "learning_rate": 8.992671920430111e-05, + "loss": 0.0011, + "step": 17076 + }, + { + "epoch": 72.99, + "learning_rate": 8.991970282038493e-05, + "loss": 0.0002, + "step": 17080 + }, + { + "epoch": 73.01, + "learning_rate": 8.991268426766616e-05, + "loss": 0.0002, + "step": 17084 + }, + { + "epoch": 73.03, + "learning_rate": 8.990566354652606e-05, + "loss": 0.0002, + "step": 17088 + }, + { + "epoch": 73.04, + "learning_rate": 8.98986406573461e-05, + "loss": 0.0001, + "step": 17092 + }, + { + "epoch": 73.06, + "learning_rate": 8.989161560050782e-05, + "loss": 0.0015, + "step": 17096 + }, + { + "epoch": 73.08, + "learning_rate": 8.988458837639289e-05, + "loss": 0.0009, + "step": 17100 + }, + { + "epoch": 73.09, + "learning_rate": 8.987755898538307e-05, + "loss": 0.0001, + "step": 17104 + }, + { + "epoch": 73.11, + "learning_rate": 8.987052742786028e-05, + "loss": 0.0001, + "step": 17108 + }, + { + "epoch": 73.13, + "learning_rate": 8.986349370420652e-05, + "loss": 0.0029, + "step": 17112 + }, + { + "epoch": 73.15, + "learning_rate": 8.985645781480396e-05, + "loss": 0.0005, + "step": 17116 + }, + { + "epoch": 73.16, + "learning_rate": 8.984941976003481e-05, + "loss": 0.0, + "step": 17120 + }, + { + "epoch": 73.18, + "learning_rate": 8.984237954028148e-05, + "loss": 0.0001, + "step": 17124 + }, + { + "epoch": 73.2, + "learning_rate": 8.983533715592645e-05, + "loss": 0.0002, + "step": 17128 + }, + { + "epoch": 73.21, + "learning_rate": 8.982829260735231e-05, + "loss": 0.0008, + "step": 17132 + }, + { + "epoch": 73.23, + "learning_rate": 8.982124589494178e-05, + "loss": 0.0001, + "step": 17136 + }, + { + "epoch": 73.25, + "learning_rate": 8.981419701907773e-05, + "loss": 0.0015, + "step": 17140 + }, + { + "epoch": 73.26, + "learning_rate": 8.980714598014311e-05, + "loss": 0.0002, + "step": 17144 + }, + { + "epoch": 73.28, + "learning_rate": 8.980009277852099e-05, + "loss": 0.0001, + "step": 17148 + }, + { + "epoch": 73.3, + "learning_rate": 8.979303741459457e-05, + "loss": 0.0001, + "step": 17152 + }, + { + "epoch": 73.32, + "learning_rate": 8.978597988874715e-05, + "loss": 0.0001, + "step": 17156 + }, + { + "epoch": 73.33, + "learning_rate": 8.977892020136216e-05, + "loss": 0.0, + "step": 17160 + }, + { + "epoch": 73.35, + "learning_rate": 8.977185835282316e-05, + "loss": 0.0001, + "step": 17164 + }, + { + "epoch": 73.37, + "learning_rate": 8.976479434351382e-05, + "loss": 0.0003, + "step": 17168 + }, + { + "epoch": 73.38, + "learning_rate": 8.975772817381789e-05, + "loss": 0.0001, + "step": 17172 + }, + { + "epoch": 73.4, + "learning_rate": 8.97506598441193e-05, + "loss": 0.0002, + "step": 17176 + }, + { + "epoch": 73.42, + "learning_rate": 8.974358935480205e-05, + "loss": 0.0002, + "step": 17180 + }, + { + "epoch": 73.44, + "learning_rate": 8.973651670625028e-05, + "loss": 0.0001, + "step": 17184 + }, + { + "epoch": 73.45, + "learning_rate": 8.972944189884824e-05, + "loss": 0.0001, + "step": 17188 + }, + { + "epoch": 73.47, + "learning_rate": 8.97223649329803e-05, + "loss": 0.0001, + "step": 17192 + }, + { + "epoch": 73.49, + "learning_rate": 8.971528580903093e-05, + "loss": 0.0001, + "step": 17196 + }, + { + "epoch": 73.5, + "learning_rate": 8.970820452738475e-05, + "loss": 0.0003, + "step": 17200 + }, + { + "epoch": 73.52, + "learning_rate": 8.970112108842649e-05, + "loss": 0.0001, + "step": 17204 + }, + { + "epoch": 73.54, + "learning_rate": 8.969403549254097e-05, + "loss": 0.0001, + "step": 17208 + }, + { + "epoch": 73.56, + "learning_rate": 8.968694774011312e-05, + "loss": 0.0001, + "step": 17212 + }, + { + "epoch": 73.57, + "learning_rate": 8.967985783152805e-05, + "loss": 0.0, + "step": 17216 + }, + { + "epoch": 73.59, + "learning_rate": 8.967276576717094e-05, + "loss": 0.0001, + "step": 17220 + }, + { + "epoch": 73.61, + "learning_rate": 8.966567154742709e-05, + "loss": 0.0001, + "step": 17224 + }, + { + "epoch": 73.62, + "learning_rate": 8.965857517268193e-05, + "loss": 0.0, + "step": 17228 + }, + { + "epoch": 73.64, + "learning_rate": 8.965147664332099e-05, + "loss": 0.0005, + "step": 17232 + }, + { + "epoch": 73.66, + "learning_rate": 8.964437595972993e-05, + "loss": 0.0001, + "step": 17236 + }, + { + "epoch": 73.68, + "learning_rate": 8.963727312229452e-05, + "loss": 0.0005, + "step": 17240 + }, + { + "epoch": 73.69, + "learning_rate": 8.963016813140066e-05, + "loss": 0.0002, + "step": 17244 + }, + { + "epoch": 73.71, + "learning_rate": 8.962306098743435e-05, + "loss": 0.0003, + "step": 17248 + }, + { + "epoch": 73.73, + "learning_rate": 8.961595169078172e-05, + "loss": 0.0001, + "step": 17252 + }, + { + "epoch": 73.74, + "learning_rate": 8.960884024182902e-05, + "loss": 0.0001, + "step": 17256 + }, + { + "epoch": 73.76, + "learning_rate": 8.96017266409626e-05, + "loss": 0.0003, + "step": 17260 + }, + { + "epoch": 73.78, + "learning_rate": 8.959461088856893e-05, + "loss": 0.0, + "step": 17264 + }, + { + "epoch": 73.79, + "learning_rate": 8.95874929850346e-05, + "loss": 0.0, + "step": 17268 + }, + { + "epoch": 73.81, + "learning_rate": 8.958037293074634e-05, + "loss": 0.0004, + "step": 17272 + }, + { + "epoch": 73.83, + "learning_rate": 8.957325072609095e-05, + "loss": 0.0001, + "step": 17276 + }, + { + "epoch": 73.85, + "learning_rate": 8.95661263714554e-05, + "loss": 0.0001, + "step": 17280 + }, + { + "epoch": 73.86, + "learning_rate": 8.955899986722673e-05, + "loss": 0.001, + "step": 17284 + }, + { + "epoch": 73.88, + "learning_rate": 8.955187121379213e-05, + "loss": 0.0003, + "step": 17288 + }, + { + "epoch": 73.9, + "learning_rate": 8.954474041153889e-05, + "loss": 0.0001, + "step": 17292 + }, + { + "epoch": 73.91, + "learning_rate": 8.95376074608544e-05, + "loss": 0.0007, + "step": 17296 + }, + { + "epoch": 73.93, + "learning_rate": 8.953047236212621e-05, + "loss": 0.0001, + "step": 17300 + }, + { + "epoch": 73.95, + "learning_rate": 8.952333511574196e-05, + "loss": 0.0001, + "step": 17304 + }, + { + "epoch": 73.97, + "learning_rate": 8.951619572208942e-05, + "loss": 0.0001, + "step": 17308 + }, + { + "epoch": 73.98, + "learning_rate": 8.950905418155644e-05, + "loss": 0.0024, + "step": 17312 + }, + { + "epoch": 74.0, + "learning_rate": 8.950191049453104e-05, + "loss": 0.0003, + "step": 17316 + }, + { + "epoch": 74.02, + "learning_rate": 8.949476466140132e-05, + "loss": 0.0022, + "step": 17320 + }, + { + "epoch": 74.03, + "learning_rate": 8.94876166825555e-05, + "loss": 0.0001, + "step": 17324 + }, + { + "epoch": 74.05, + "learning_rate": 8.948046655838193e-05, + "loss": 0.0001, + "step": 17328 + }, + { + "epoch": 74.07, + "learning_rate": 8.947331428926906e-05, + "loss": 0.0008, + "step": 17332 + }, + { + "epoch": 74.09, + "learning_rate": 8.94661598756055e-05, + "loss": 0.0004, + "step": 17336 + }, + { + "epoch": 74.1, + "learning_rate": 8.94590033177799e-05, + "loss": 0.0002, + "step": 17340 + }, + { + "epoch": 74.12, + "learning_rate": 8.945184461618111e-05, + "loss": 0.0008, + "step": 17344 + }, + { + "epoch": 74.14, + "learning_rate": 8.944468377119801e-05, + "loss": 0.0002, + "step": 17348 + }, + { + "epoch": 74.15, + "learning_rate": 8.943752078321968e-05, + "loss": 0.0005, + "step": 17352 + }, + { + "epoch": 74.17, + "learning_rate": 8.943035565263526e-05, + "loss": 0.0011, + "step": 17356 + }, + { + "epoch": 74.19, + "learning_rate": 8.942318837983403e-05, + "loss": 0.0002, + "step": 17360 + }, + { + "epoch": 74.21, + "learning_rate": 8.94160189652054e-05, + "loss": 0.0031, + "step": 17364 + }, + { + "epoch": 74.22, + "learning_rate": 8.940884740913884e-05, + "loss": 0.0001, + "step": 17368 + }, + { + "epoch": 74.24, + "learning_rate": 8.940167371202401e-05, + "loss": 0.0001, + "step": 17372 + }, + { + "epoch": 74.26, + "learning_rate": 8.939449787425062e-05, + "loss": 0.0028, + "step": 17376 + }, + { + "epoch": 74.27, + "learning_rate": 8.938731989620857e-05, + "loss": 0.0, + "step": 17380 + }, + { + "epoch": 74.29, + "learning_rate": 8.938013977828778e-05, + "loss": 0.0, + "step": 17384 + }, + { + "epoch": 74.31, + "learning_rate": 8.937295752087839e-05, + "loss": 0.0001, + "step": 17388 + }, + { + "epoch": 74.32, + "learning_rate": 8.936577312437056e-05, + "loss": 0.0016, + "step": 17392 + }, + { + "epoch": 74.34, + "learning_rate": 8.935858658915466e-05, + "loss": 0.0001, + "step": 17396 + }, + { + "epoch": 74.36, + "learning_rate": 8.935139791562109e-05, + "loss": 0.0, + "step": 17400 + }, + { + "epoch": 74.38, + "learning_rate": 8.93442071041604e-05, + "loss": 0.0002, + "step": 17404 + }, + { + "epoch": 74.39, + "learning_rate": 8.933701415516329e-05, + "loss": 0.0018, + "step": 17408 + }, + { + "epoch": 74.41, + "learning_rate": 8.932981906902053e-05, + "loss": 0.0, + "step": 17412 + }, + { + "epoch": 74.43, + "learning_rate": 8.932262184612304e-05, + "loss": 0.0001, + "step": 17416 + }, + { + "epoch": 74.44, + "learning_rate": 8.93154224868618e-05, + "loss": 0.0001, + "step": 17420 + }, + { + "epoch": 74.46, + "learning_rate": 8.930822099162798e-05, + "loss": 0.0002, + "step": 17424 + }, + { + "epoch": 74.48, + "learning_rate": 8.930101736081284e-05, + "loss": 0.0001, + "step": 17428 + }, + { + "epoch": 74.5, + "learning_rate": 8.929381159480772e-05, + "loss": 0.0001, + "step": 17432 + }, + { + "epoch": 74.51, + "learning_rate": 8.92866036940041e-05, + "loss": 0.0001, + "step": 17436 + }, + { + "epoch": 74.53, + "learning_rate": 8.927939365879359e-05, + "loss": 0.0003, + "step": 17440 + }, + { + "epoch": 74.55, + "learning_rate": 8.92721814895679e-05, + "loss": 0.0001, + "step": 17444 + }, + { + "epoch": 74.56, + "learning_rate": 8.926496718671889e-05, + "loss": 0.0005, + "step": 17448 + }, + { + "epoch": 74.58, + "learning_rate": 8.925775075063847e-05, + "loss": 0.0005, + "step": 17452 + }, + { + "epoch": 74.6, + "learning_rate": 8.925053218171871e-05, + "loss": 0.0001, + "step": 17456 + }, + { + "epoch": 74.62, + "learning_rate": 8.924331148035179e-05, + "loss": 0.0001, + "step": 17460 + }, + { + "epoch": 74.63, + "learning_rate": 8.923608864693003e-05, + "loss": 0.0001, + "step": 17464 + }, + { + "epoch": 74.65, + "learning_rate": 8.92288636818458e-05, + "loss": 0.0001, + "step": 17468 + }, + { + "epoch": 74.67, + "learning_rate": 8.922163658549164e-05, + "loss": 0.0001, + "step": 17472 + }, + { + "epoch": 74.68, + "learning_rate": 8.92144073582602e-05, + "loss": 0.0001, + "step": 17476 + }, + { + "epoch": 74.7, + "learning_rate": 8.920717600054425e-05, + "loss": 0.0005, + "step": 17480 + }, + { + "epoch": 74.72, + "learning_rate": 8.919994251273664e-05, + "loss": 0.0001, + "step": 17484 + }, + { + "epoch": 74.74, + "learning_rate": 8.919270689523036e-05, + "loss": 0.0001, + "step": 17488 + }, + { + "epoch": 74.75, + "learning_rate": 8.918546914841853e-05, + "loss": 0.0001, + "step": 17492 + }, + { + "epoch": 74.77, + "learning_rate": 8.917822927269434e-05, + "loss": 0.0009, + "step": 17496 + }, + { + "epoch": 74.79, + "learning_rate": 8.917098726845117e-05, + "loss": 0.0, + "step": 17500 + }, + { + "epoch": 74.8, + "learning_rate": 8.916374313608244e-05, + "loss": 0.0009, + "step": 17504 + }, + { + "epoch": 74.82, + "learning_rate": 8.915649687598174e-05, + "loss": 0.0001, + "step": 17508 + }, + { + "epoch": 74.84, + "learning_rate": 8.914924848854271e-05, + "loss": 0.0001, + "step": 17512 + }, + { + "epoch": 74.85, + "learning_rate": 8.914199797415921e-05, + "loss": 0.0004, + "step": 17516 + }, + { + "epoch": 74.87, + "learning_rate": 8.91347453332251e-05, + "loss": 0.0001, + "step": 17520 + }, + { + "epoch": 74.89, + "learning_rate": 8.912749056613444e-05, + "loss": 0.0023, + "step": 17524 + }, + { + "epoch": 74.91, + "learning_rate": 8.912023367328137e-05, + "loss": 0.0002, + "step": 17528 + }, + { + "epoch": 74.92, + "learning_rate": 8.911297465506015e-05, + "loss": 0.0001, + "step": 17532 + }, + { + "epoch": 74.94, + "learning_rate": 8.910571351186515e-05, + "loss": 0.0003, + "step": 17536 + }, + { + "epoch": 74.96, + "learning_rate": 8.909845024409086e-05, + "loss": 0.0011, + "step": 17540 + }, + { + "epoch": 74.97, + "learning_rate": 8.90911848521319e-05, + "loss": 0.0001, + "step": 17544 + }, + { + "epoch": 74.99, + "learning_rate": 8.908391733638299e-05, + "loss": 0.0001, + "step": 17548 + }, + { + "epoch": 75.01, + "learning_rate": 8.907664769723895e-05, + "loss": 0.0001, + "step": 17552 + }, + { + "epoch": 75.03, + "learning_rate": 8.906937593509476e-05, + "loss": 0.0001, + "step": 17556 + }, + { + "epoch": 75.04, + "learning_rate": 8.906210205034546e-05, + "loss": 0.0003, + "step": 17560 + }, + { + "epoch": 75.06, + "learning_rate": 8.905482604338626e-05, + "loss": 0.0012, + "step": 17564 + }, + { + "epoch": 75.08, + "learning_rate": 8.904754791461245e-05, + "loss": 0.0001, + "step": 17568 + }, + { + "epoch": 75.09, + "learning_rate": 8.904026766441944e-05, + "loss": 0.0002, + "step": 17572 + }, + { + "epoch": 75.11, + "learning_rate": 8.903298529320275e-05, + "loss": 0.0004, + "step": 17576 + }, + { + "epoch": 75.13, + "learning_rate": 8.902570080135805e-05, + "loss": 0.0003, + "step": 17580 + }, + { + "epoch": 75.15, + "learning_rate": 8.901841418928108e-05, + "loss": 0.0, + "step": 17584 + }, + { + "epoch": 75.16, + "learning_rate": 8.901112545736771e-05, + "loss": 0.0002, + "step": 17588 + }, + { + "epoch": 75.18, + "learning_rate": 8.900383460601395e-05, + "loss": 0.0002, + "step": 17592 + }, + { + "epoch": 75.2, + "learning_rate": 8.899654163561592e-05, + "loss": 0.0013, + "step": 17596 + }, + { + "epoch": 75.21, + "learning_rate": 8.898924654656979e-05, + "loss": 0.0003, + "step": 17600 + }, + { + "epoch": 75.23, + "learning_rate": 8.898194933927194e-05, + "loss": 0.0001, + "step": 17604 + }, + { + "epoch": 75.25, + "learning_rate": 8.89746500141188e-05, + "loss": 0.0002, + "step": 17608 + }, + { + "epoch": 75.26, + "learning_rate": 8.896734857150695e-05, + "loss": 0.0004, + "step": 17612 + }, + { + "epoch": 75.28, + "learning_rate": 8.896004501183305e-05, + "loss": 0.0003, + "step": 17616 + }, + { + "epoch": 75.3, + "learning_rate": 8.895273933549391e-05, + "loss": 0.0006, + "step": 17620 + }, + { + "epoch": 75.32, + "learning_rate": 8.894543154288644e-05, + "loss": 0.0005, + "step": 17624 + }, + { + "epoch": 75.33, + "learning_rate": 8.893812163440767e-05, + "loss": 0.001, + "step": 17628 + }, + { + "epoch": 75.35, + "learning_rate": 8.893080961045472e-05, + "loss": 0.0002, + "step": 17632 + }, + { + "epoch": 75.37, + "learning_rate": 8.892349547142488e-05, + "loss": 0.0001, + "step": 17636 + }, + { + "epoch": 75.38, + "learning_rate": 8.891617921771548e-05, + "loss": 0.0001, + "step": 17640 + }, + { + "epoch": 75.4, + "learning_rate": 8.890886084972406e-05, + "loss": 0.001, + "step": 17644 + }, + { + "epoch": 75.42, + "learning_rate": 8.890154036784817e-05, + "loss": 0.0003, + "step": 17648 + }, + { + "epoch": 75.44, + "learning_rate": 8.889421777248556e-05, + "loss": 0.0002, + "step": 17652 + }, + { + "epoch": 75.45, + "learning_rate": 8.888689306403402e-05, + "loss": 0.001, + "step": 17656 + }, + { + "epoch": 75.47, + "learning_rate": 8.887956624289154e-05, + "loss": 0.0001, + "step": 17660 + }, + { + "epoch": 75.49, + "learning_rate": 8.887223730945616e-05, + "loss": 0.0001, + "step": 17664 + }, + { + "epoch": 75.5, + "learning_rate": 8.886490626412604e-05, + "loss": 0.0004, + "step": 17668 + }, + { + "epoch": 75.52, + "learning_rate": 8.885757310729948e-05, + "loss": 0.0002, + "step": 17672 + }, + { + "epoch": 75.54, + "learning_rate": 8.885023783937491e-05, + "loss": 0.0007, + "step": 17676 + }, + { + "epoch": 75.56, + "learning_rate": 8.88429004607508e-05, + "loss": 0.0001, + "step": 17680 + }, + { + "epoch": 75.57, + "learning_rate": 8.883556097182582e-05, + "loss": 0.0003, + "step": 17684 + }, + { + "epoch": 75.59, + "learning_rate": 8.882821937299873e-05, + "loss": 0.0004, + "step": 17688 + }, + { + "epoch": 75.61, + "learning_rate": 8.882087566466833e-05, + "loss": 0.0008, + "step": 17692 + }, + { + "epoch": 75.62, + "learning_rate": 8.881352984723365e-05, + "loss": 0.0001, + "step": 17696 + }, + { + "epoch": 75.64, + "learning_rate": 8.880618192109379e-05, + "loss": 0.0001, + "step": 17700 + }, + { + "epoch": 75.66, + "learning_rate": 8.879883188664793e-05, + "loss": 0.0003, + "step": 17704 + }, + { + "epoch": 75.68, + "learning_rate": 8.879147974429538e-05, + "loss": 0.0001, + "step": 17708 + }, + { + "epoch": 75.69, + "learning_rate": 8.87841254944356e-05, + "loss": 0.0013, + "step": 17712 + }, + { + "epoch": 75.71, + "learning_rate": 8.877676913746813e-05, + "loss": 0.0001, + "step": 17716 + }, + { + "epoch": 75.73, + "learning_rate": 8.876941067379264e-05, + "loss": 0.0001, + "step": 17720 + }, + { + "epoch": 75.74, + "learning_rate": 8.876205010380891e-05, + "loss": 0.0002, + "step": 17724 + }, + { + "epoch": 75.76, + "learning_rate": 8.875468742791682e-05, + "loss": 0.0002, + "step": 17728 + }, + { + "epoch": 75.78, + "learning_rate": 8.874732264651639e-05, + "loss": 0.0001, + "step": 17732 + }, + { + "epoch": 75.79, + "learning_rate": 8.873995576000774e-05, + "loss": 0.0004, + "step": 17736 + }, + { + "epoch": 75.81, + "learning_rate": 8.87325867687911e-05, + "loss": 0.0001, + "step": 17740 + }, + { + "epoch": 75.83, + "learning_rate": 8.872521567326683e-05, + "loss": 0.0001, + "step": 17744 + }, + { + "epoch": 75.85, + "learning_rate": 8.871784247383539e-05, + "loss": 0.0002, + "step": 17748 + }, + { + "epoch": 75.86, + "learning_rate": 8.871046717089735e-05, + "loss": 0.0001, + "step": 17752 + }, + { + "epoch": 75.88, + "learning_rate": 8.870308976485344e-05, + "loss": 0.0, + "step": 17756 + }, + { + "epoch": 75.9, + "learning_rate": 8.869571025610442e-05, + "loss": 0.0002, + "step": 17760 + }, + { + "epoch": 75.91, + "learning_rate": 8.868832864505125e-05, + "loss": 0.0, + "step": 17764 + }, + { + "epoch": 75.93, + "learning_rate": 8.868094493209493e-05, + "loss": 0.0001, + "step": 17768 + }, + { + "epoch": 75.95, + "learning_rate": 8.867355911763667e-05, + "loss": 0.0005, + "step": 17772 + }, + { + "epoch": 75.97, + "learning_rate": 8.866617120207767e-05, + "loss": 0.0002, + "step": 17776 + }, + { + "epoch": 75.98, + "learning_rate": 8.865878118581932e-05, + "loss": 0.0005, + "step": 17780 + }, + { + "epoch": 76.0, + "learning_rate": 8.865138906926316e-05, + "loss": 0.0015, + "step": 17784 + }, + { + "epoch": 76.02, + "learning_rate": 8.864399485281074e-05, + "loss": 0.0001, + "step": 17788 + }, + { + "epoch": 76.03, + "learning_rate": 8.863659853686384e-05, + "loss": 0.0001, + "step": 17792 + }, + { + "epoch": 76.05, + "learning_rate": 8.862920012182423e-05, + "loss": 0.0004, + "step": 17796 + }, + { + "epoch": 76.07, + "learning_rate": 8.862179960809391e-05, + "loss": 0.0004, + "step": 17800 + }, + { + "epoch": 76.09, + "learning_rate": 8.861439699607492e-05, + "loss": 0.0001, + "step": 17804 + }, + { + "epoch": 76.1, + "learning_rate": 8.860699228616945e-05, + "loss": 0.0001, + "step": 17808 + }, + { + "epoch": 76.12, + "learning_rate": 8.859958547877978e-05, + "loss": 0.0006, + "step": 17812 + }, + { + "epoch": 76.14, + "learning_rate": 8.859217657430831e-05, + "loss": 0.0012, + "step": 17816 + }, + { + "epoch": 76.15, + "learning_rate": 8.858476557315758e-05, + "loss": 0.0001, + "step": 17820 + }, + { + "epoch": 76.17, + "learning_rate": 8.857735247573022e-05, + "loss": 0.0005, + "step": 17824 + }, + { + "epoch": 76.19, + "learning_rate": 8.856993728242896e-05, + "loss": 0.0001, + "step": 17828 + }, + { + "epoch": 76.21, + "learning_rate": 8.856251999365666e-05, + "loss": 0.0002, + "step": 17832 + }, + { + "epoch": 76.22, + "learning_rate": 8.855510060981633e-05, + "loss": 0.0001, + "step": 17836 + }, + { + "epoch": 76.24, + "learning_rate": 8.8547679131311e-05, + "loss": 0.0004, + "step": 17840 + }, + { + "epoch": 76.26, + "learning_rate": 8.854025555854395e-05, + "loss": 0.0001, + "step": 17844 + }, + { + "epoch": 76.27, + "learning_rate": 8.853282989191842e-05, + "loss": 0.0001, + "step": 17848 + }, + { + "epoch": 76.29, + "learning_rate": 8.852540213183789e-05, + "loss": 0.0001, + "step": 17852 + }, + { + "epoch": 76.31, + "learning_rate": 8.851797227870589e-05, + "loss": 0.001, + "step": 17856 + }, + { + "epoch": 76.32, + "learning_rate": 8.851054033292604e-05, + "loss": 0.0002, + "step": 17860 + }, + { + "epoch": 76.34, + "learning_rate": 8.850310629490218e-05, + "loss": 0.0001, + "step": 17864 + }, + { + "epoch": 76.36, + "learning_rate": 8.849567016503814e-05, + "loss": 0.0005, + "step": 17868 + }, + { + "epoch": 76.38, + "learning_rate": 8.848823194373796e-05, + "loss": 0.0002, + "step": 17872 + }, + { + "epoch": 76.39, + "learning_rate": 8.848079163140573e-05, + "loss": 0.0002, + "step": 17876 + }, + { + "epoch": 76.41, + "learning_rate": 8.847334922844566e-05, + "loss": 0.0001, + "step": 17880 + }, + { + "epoch": 76.43, + "learning_rate": 8.846590473526209e-05, + "loss": 0.0004, + "step": 17884 + }, + { + "epoch": 76.44, + "learning_rate": 8.845845815225951e-05, + "loss": 0.0012, + "step": 17888 + }, + { + "epoch": 76.46, + "learning_rate": 8.845100947984246e-05, + "loss": 0.0001, + "step": 17892 + }, + { + "epoch": 76.48, + "learning_rate": 8.844355871841563e-05, + "loss": 0.0002, + "step": 17896 + }, + { + "epoch": 76.5, + "learning_rate": 8.84361058683838e-05, + "loss": 0.0005, + "step": 17900 + }, + { + "epoch": 76.51, + "learning_rate": 8.842865093015189e-05, + "loss": 0.0001, + "step": 17904 + }, + { + "epoch": 76.53, + "learning_rate": 8.84211939041249e-05, + "loss": 0.0005, + "step": 17908 + }, + { + "epoch": 76.55, + "learning_rate": 8.8413734790708e-05, + "loss": 0.0001, + "step": 17912 + }, + { + "epoch": 76.56, + "learning_rate": 8.840627359030642e-05, + "loss": 0.0012, + "step": 17916 + }, + { + "epoch": 76.58, + "learning_rate": 8.83988103033255e-05, + "loss": 0.0001, + "step": 17920 + }, + { + "epoch": 76.6, + "learning_rate": 8.839134493017074e-05, + "loss": 0.0003, + "step": 17924 + }, + { + "epoch": 76.62, + "learning_rate": 8.83838774712477e-05, + "loss": 0.0002, + "step": 17928 + }, + { + "epoch": 76.63, + "learning_rate": 8.837640792696212e-05, + "loss": 0.0002, + "step": 17932 + }, + { + "epoch": 76.65, + "learning_rate": 8.836893629771977e-05, + "loss": 0.0006, + "step": 17936 + }, + { + "epoch": 76.67, + "learning_rate": 8.836146258392661e-05, + "loss": 0.0004, + "step": 17940 + }, + { + "epoch": 76.68, + "learning_rate": 8.835398678598867e-05, + "loss": 0.0008, + "step": 17944 + }, + { + "epoch": 76.7, + "learning_rate": 8.83465089043121e-05, + "loss": 0.0001, + "step": 17948 + }, + { + "epoch": 76.72, + "learning_rate": 8.833902893930317e-05, + "loss": 0.0006, + "step": 17952 + }, + { + "epoch": 76.74, + "learning_rate": 8.833154689136826e-05, + "loss": 0.0016, + "step": 17956 + }, + { + "epoch": 76.75, + "learning_rate": 8.832406276091386e-05, + "loss": 0.0003, + "step": 17960 + }, + { + "epoch": 76.77, + "learning_rate": 8.831657654834658e-05, + "loss": 0.0006, + "step": 17964 + }, + { + "epoch": 76.79, + "learning_rate": 8.830908825407314e-05, + "loss": 0.0005, + "step": 17968 + }, + { + "epoch": 76.8, + "learning_rate": 8.830159787850036e-05, + "loss": 0.0008, + "step": 17972 + }, + { + "epoch": 76.82, + "learning_rate": 8.829410542203522e-05, + "loss": 0.0011, + "step": 17976 + }, + { + "epoch": 76.84, + "learning_rate": 8.828661088508473e-05, + "loss": 0.0003, + "step": 17980 + }, + { + "epoch": 76.85, + "learning_rate": 8.82791142680561e-05, + "loss": 0.0014, + "step": 17984 + }, + { + "epoch": 76.87, + "learning_rate": 8.827161557135659e-05, + "loss": 0.0001, + "step": 17988 + }, + { + "epoch": 76.89, + "learning_rate": 8.826411479539363e-05, + "loss": 0.0001, + "step": 17992 + }, + { + "epoch": 76.91, + "learning_rate": 8.825661194057469e-05, + "loss": 0.001, + "step": 17996 + }, + { + "epoch": 76.92, + "learning_rate": 8.824910700730742e-05, + "loss": 0.0001, + "step": 18000 + }, + { + "epoch": 76.92, + "eval_exact_match": 0.5135135135135135, + "eval_loss": 0.9052860736846924, + "eval_runtime": 171.5376, + "eval_samples_per_second": 5.608, + "step": 18000 + }, + { + "epoch": 76.94, + "learning_rate": 8.824159999599955e-05, + "loss": 0.0002, + "step": 18004 + }, + { + "epoch": 76.96, + "learning_rate": 8.823409090705892e-05, + "loss": 0.0014, + "step": 18008 + }, + { + "epoch": 76.97, + "learning_rate": 8.822657974089352e-05, + "loss": 0.0001, + "step": 18012 + }, + { + "epoch": 76.99, + "learning_rate": 8.82190664979114e-05, + "loss": 0.0014, + "step": 18016 + }, + { + "epoch": 77.01, + "learning_rate": 8.821155117852074e-05, + "loss": 0.0003, + "step": 18020 + }, + { + "epoch": 77.03, + "learning_rate": 8.820403378312987e-05, + "loss": 0.0002, + "step": 18024 + }, + { + "epoch": 77.04, + "learning_rate": 8.819651431214717e-05, + "loss": 0.0, + "step": 18028 + }, + { + "epoch": 77.06, + "learning_rate": 8.81889927659812e-05, + "loss": 0.0001, + "step": 18032 + }, + { + "epoch": 77.08, + "learning_rate": 8.818146914504058e-05, + "loss": 0.0001, + "step": 18036 + }, + { + "epoch": 77.09, + "learning_rate": 8.817394344973406e-05, + "loss": 0.0001, + "step": 18040 + }, + { + "epoch": 77.11, + "learning_rate": 8.816641568047052e-05, + "loss": 0.0004, + "step": 18044 + }, + { + "epoch": 77.13, + "learning_rate": 8.815888583765893e-05, + "loss": 0.0001, + "step": 18048 + }, + { + "epoch": 77.15, + "learning_rate": 8.815135392170836e-05, + "loss": 0.0001, + "step": 18052 + }, + { + "epoch": 77.16, + "learning_rate": 8.814381993302805e-05, + "loss": 0.0001, + "step": 18056 + }, + { + "epoch": 77.18, + "learning_rate": 8.813628387202728e-05, + "loss": 0.0002, + "step": 18060 + }, + { + "epoch": 77.2, + "learning_rate": 8.81287457391155e-05, + "loss": 0.0003, + "step": 18064 + }, + { + "epoch": 77.21, + "learning_rate": 8.812120553470223e-05, + "loss": 0.0001, + "step": 18068 + }, + { + "epoch": 77.23, + "learning_rate": 8.811366325919715e-05, + "loss": 0.0011, + "step": 18072 + }, + { + "epoch": 77.25, + "learning_rate": 8.810611891301e-05, + "loss": 0.0001, + "step": 18076 + }, + { + "epoch": 77.26, + "learning_rate": 8.809857249655066e-05, + "loss": 0.0003, + "step": 18080 + }, + { + "epoch": 77.28, + "learning_rate": 8.809102401022916e-05, + "loss": 0.0001, + "step": 18084 + }, + { + "epoch": 77.3, + "learning_rate": 8.808347345445555e-05, + "loss": 0.0001, + "step": 18088 + }, + { + "epoch": 77.32, + "learning_rate": 8.807592082964006e-05, + "loss": 0.0001, + "step": 18092 + }, + { + "epoch": 77.33, + "learning_rate": 8.806836613619303e-05, + "loss": 0.0001, + "step": 18096 + }, + { + "epoch": 77.35, + "learning_rate": 8.806080937452489e-05, + "loss": 0.0, + "step": 18100 + }, + { + "epoch": 77.37, + "learning_rate": 8.80532505450462e-05, + "loss": 0.0005, + "step": 18104 + }, + { + "epoch": 77.38, + "learning_rate": 8.804568964816762e-05, + "loss": 0.0009, + "step": 18108 + }, + { + "epoch": 77.4, + "learning_rate": 8.803812668429992e-05, + "loss": 0.0001, + "step": 18112 + }, + { + "epoch": 77.42, + "learning_rate": 8.8030561653854e-05, + "loss": 0.0025, + "step": 18116 + }, + { + "epoch": 77.44, + "learning_rate": 8.802299455724086e-05, + "loss": 0.0012, + "step": 18120 + }, + { + "epoch": 77.45, + "learning_rate": 8.80154253948716e-05, + "loss": 0.0001, + "step": 18124 + }, + { + "epoch": 77.47, + "learning_rate": 8.800785416715747e-05, + "loss": 0.0003, + "step": 18128 + }, + { + "epoch": 77.49, + "learning_rate": 8.80002808745098e-05, + "loss": 0.0005, + "step": 18132 + }, + { + "epoch": 77.5, + "learning_rate": 8.799270551734002e-05, + "loss": 0.0017, + "step": 18136 + }, + { + "epoch": 77.52, + "learning_rate": 8.798512809605973e-05, + "loss": 0.0009, + "step": 18140 + }, + { + "epoch": 77.54, + "learning_rate": 8.797754861108056e-05, + "loss": 0.0006, + "step": 18144 + }, + { + "epoch": 77.56, + "learning_rate": 8.796996706281433e-05, + "loss": 0.0003, + "step": 18148 + }, + { + "epoch": 77.57, + "learning_rate": 8.796238345167293e-05, + "loss": 0.0001, + "step": 18152 + }, + { + "epoch": 77.59, + "learning_rate": 8.795479777806838e-05, + "loss": 0.0001, + "step": 18156 + }, + { + "epoch": 77.61, + "learning_rate": 8.794721004241277e-05, + "loss": 0.0001, + "step": 18160 + }, + { + "epoch": 77.62, + "learning_rate": 8.79396202451184e-05, + "loss": 0.0007, + "step": 18164 + }, + { + "epoch": 77.64, + "learning_rate": 8.793202838659753e-05, + "loss": 0.0001, + "step": 18168 + }, + { + "epoch": 77.66, + "learning_rate": 8.792443446726268e-05, + "loss": 0.0017, + "step": 18172 + }, + { + "epoch": 77.68, + "learning_rate": 8.79168384875264e-05, + "loss": 0.0001, + "step": 18176 + }, + { + "epoch": 77.69, + "learning_rate": 8.790924044780139e-05, + "loss": 0.0001, + "step": 18180 + }, + { + "epoch": 77.71, + "learning_rate": 8.790164034850043e-05, + "loss": 0.0002, + "step": 18184 + }, + { + "epoch": 77.73, + "learning_rate": 8.789403819003642e-05, + "loss": 0.0003, + "step": 18188 + }, + { + "epoch": 77.74, + "learning_rate": 8.78864339728224e-05, + "loss": 0.0001, + "step": 18192 + }, + { + "epoch": 77.76, + "learning_rate": 8.78788276972715e-05, + "loss": 0.0002, + "step": 18196 + }, + { + "epoch": 77.78, + "learning_rate": 8.787121936379692e-05, + "loss": 0.0001, + "step": 18200 + }, + { + "epoch": 77.79, + "learning_rate": 8.786360897281206e-05, + "loss": 0.0013, + "step": 18204 + }, + { + "epoch": 77.81, + "learning_rate": 8.785599652473037e-05, + "loss": 0.0012, + "step": 18208 + }, + { + "epoch": 77.83, + "learning_rate": 8.784838201996544e-05, + "loss": 0.0003, + "step": 18212 + }, + { + "epoch": 77.85, + "learning_rate": 8.784076545893094e-05, + "loss": 0.0001, + "step": 18216 + }, + { + "epoch": 77.86, + "learning_rate": 8.783314684204067e-05, + "loss": 0.0001, + "step": 18220 + }, + { + "epoch": 77.88, + "learning_rate": 8.782552616970856e-05, + "loss": 0.0007, + "step": 18224 + }, + { + "epoch": 77.9, + "learning_rate": 8.781790344234863e-05, + "loss": 0.0005, + "step": 18228 + }, + { + "epoch": 77.91, + "learning_rate": 8.781027866037501e-05, + "loss": 0.0012, + "step": 18232 + }, + { + "epoch": 77.93, + "learning_rate": 8.780265182420195e-05, + "loss": 0.0, + "step": 18236 + }, + { + "epoch": 77.95, + "learning_rate": 8.779502293424381e-05, + "loss": 0.0002, + "step": 18240 + }, + { + "epoch": 77.97, + "learning_rate": 8.778739199091506e-05, + "loss": 0.0003, + "step": 18244 + }, + { + "epoch": 77.98, + "learning_rate": 8.777975899463029e-05, + "loss": 0.0002, + "step": 18248 + }, + { + "epoch": 78.0, + "learning_rate": 8.777212394580418e-05, + "loss": 0.0012, + "step": 18252 + }, + { + "epoch": 78.02, + "learning_rate": 8.776448684485155e-05, + "loss": 0.0001, + "step": 18256 + }, + { + "epoch": 78.03, + "learning_rate": 8.775684769218731e-05, + "loss": 0.0, + "step": 18260 + }, + { + "epoch": 78.05, + "learning_rate": 8.774920648822646e-05, + "loss": 0.0002, + "step": 18264 + }, + { + "epoch": 78.07, + "learning_rate": 8.77415632333842e-05, + "loss": 0.0002, + "step": 18268 + }, + { + "epoch": 78.09, + "learning_rate": 8.773391792807575e-05, + "loss": 0.0001, + "step": 18272 + }, + { + "epoch": 78.1, + "learning_rate": 8.772627057271646e-05, + "loss": 0.0002, + "step": 18276 + }, + { + "epoch": 78.12, + "learning_rate": 8.771862116772182e-05, + "loss": 0.0001, + "step": 18280 + }, + { + "epoch": 78.14, + "learning_rate": 8.771096971350741e-05, + "loss": 0.0, + "step": 18284 + }, + { + "epoch": 78.15, + "learning_rate": 8.770331621048893e-05, + "loss": 0.0001, + "step": 18288 + }, + { + "epoch": 78.17, + "learning_rate": 8.769566065908219e-05, + "loss": 0.0001, + "step": 18292 + }, + { + "epoch": 78.19, + "learning_rate": 8.76880030597031e-05, + "loss": 0.0001, + "step": 18296 + }, + { + "epoch": 78.21, + "learning_rate": 8.768034341276772e-05, + "loss": 0.0009, + "step": 18300 + }, + { + "epoch": 78.22, + "learning_rate": 8.767268171869214e-05, + "loss": 0.0002, + "step": 18304 + }, + { + "epoch": 78.24, + "learning_rate": 8.766501797789266e-05, + "loss": 0.0001, + "step": 18308 + }, + { + "epoch": 78.26, + "learning_rate": 8.765735219078561e-05, + "loss": 0.0, + "step": 18312 + }, + { + "epoch": 78.27, + "learning_rate": 8.764968435778751e-05, + "loss": 0.0001, + "step": 18316 + }, + { + "epoch": 78.29, + "learning_rate": 8.76420144793149e-05, + "loss": 0.0002, + "step": 18320 + }, + { + "epoch": 78.31, + "learning_rate": 8.763434255578449e-05, + "loss": 0.0004, + "step": 18324 + }, + { + "epoch": 78.32, + "learning_rate": 8.76266685876131e-05, + "loss": 0.0006, + "step": 18328 + }, + { + "epoch": 78.34, + "learning_rate": 8.761899257521766e-05, + "loss": 0.0001, + "step": 18332 + }, + { + "epoch": 78.36, + "learning_rate": 8.761131451901517e-05, + "loss": 0.0001, + "step": 18336 + }, + { + "epoch": 78.38, + "learning_rate": 8.760363441942279e-05, + "loss": 0.0, + "step": 18340 + }, + { + "epoch": 78.39, + "learning_rate": 8.759595227685778e-05, + "loss": 0.0001, + "step": 18344 + }, + { + "epoch": 78.41, + "learning_rate": 8.75882680917375e-05, + "loss": 0.0001, + "step": 18348 + }, + { + "epoch": 78.43, + "learning_rate": 8.758058186447942e-05, + "loss": 0.0014, + "step": 18352 + }, + { + "epoch": 78.44, + "learning_rate": 8.757289359550111e-05, + "loss": 0.0011, + "step": 18356 + }, + { + "epoch": 78.46, + "learning_rate": 8.75652032852203e-05, + "loss": 0.0004, + "step": 18360 + }, + { + "epoch": 78.48, + "learning_rate": 8.755751093405478e-05, + "loss": 0.0001, + "step": 18364 + }, + { + "epoch": 78.5, + "learning_rate": 8.754981654242246e-05, + "loss": 0.0001, + "step": 18368 + }, + { + "epoch": 78.51, + "learning_rate": 8.754212011074139e-05, + "loss": 0.0, + "step": 18372 + }, + { + "epoch": 78.53, + "learning_rate": 8.753442163942969e-05, + "loss": 0.0017, + "step": 18376 + }, + { + "epoch": 78.55, + "learning_rate": 8.752672112890563e-05, + "loss": 0.0001, + "step": 18380 + }, + { + "epoch": 78.56, + "learning_rate": 8.751901857958756e-05, + "loss": 0.0002, + "step": 18384 + }, + { + "epoch": 78.58, + "learning_rate": 8.751131399189396e-05, + "loss": 0.0001, + "step": 18388 + }, + { + "epoch": 78.6, + "learning_rate": 8.750360736624342e-05, + "loss": 0.0008, + "step": 18392 + }, + { + "epoch": 78.62, + "learning_rate": 8.749589870305462e-05, + "loss": 0.0021, + "step": 18396 + }, + { + "epoch": 78.63, + "learning_rate": 8.748818800274635e-05, + "loss": 0.0002, + "step": 18400 + }, + { + "epoch": 78.65, + "learning_rate": 8.748047526573755e-05, + "loss": 0.0001, + "step": 18404 + }, + { + "epoch": 78.67, + "learning_rate": 8.747276049244726e-05, + "loss": 0.0001, + "step": 18408 + }, + { + "epoch": 78.68, + "learning_rate": 8.746504368329457e-05, + "loss": 0.0001, + "step": 18412 + }, + { + "epoch": 78.7, + "learning_rate": 8.74573248386988e-05, + "loss": 0.0001, + "step": 18416 + }, + { + "epoch": 78.72, + "learning_rate": 8.744960395907922e-05, + "loss": 0.0003, + "step": 18420 + }, + { + "epoch": 78.74, + "learning_rate": 8.744188104485535e-05, + "loss": 0.0001, + "step": 18424 + }, + { + "epoch": 78.75, + "learning_rate": 8.743415609644678e-05, + "loss": 0.0001, + "step": 18428 + }, + { + "epoch": 78.77, + "learning_rate": 8.742642911427317e-05, + "loss": 0.0001, + "step": 18432 + }, + { + "epoch": 78.79, + "learning_rate": 8.741870009875434e-05, + "loss": 0.0, + "step": 18436 + }, + { + "epoch": 78.8, + "learning_rate": 8.741096905031017e-05, + "loss": 0.0, + "step": 18440 + }, + { + "epoch": 78.82, + "learning_rate": 8.740323596936073e-05, + "loss": 0.0001, + "step": 18444 + }, + { + "epoch": 78.84, + "learning_rate": 8.739550085632611e-05, + "loss": 0.0002, + "step": 18448 + }, + { + "epoch": 78.85, + "learning_rate": 8.738776371162657e-05, + "loss": 0.0004, + "step": 18452 + }, + { + "epoch": 78.87, + "learning_rate": 8.738002453568245e-05, + "loss": 0.0001, + "step": 18456 + }, + { + "epoch": 78.89, + "learning_rate": 8.737228332891423e-05, + "loss": 0.0007, + "step": 18460 + }, + { + "epoch": 78.91, + "learning_rate": 8.736454009174249e-05, + "loss": 0.0018, + "step": 18464 + }, + { + "epoch": 78.92, + "learning_rate": 8.735679482458787e-05, + "loss": 0.0001, + "step": 18468 + }, + { + "epoch": 78.94, + "learning_rate": 8.734904752787121e-05, + "loss": 0.0001, + "step": 18472 + }, + { + "epoch": 78.96, + "learning_rate": 8.734129820201339e-05, + "loss": 0.0005, + "step": 18476 + }, + { + "epoch": 78.97, + "learning_rate": 8.733354684743542e-05, + "loss": 0.0001, + "step": 18480 + }, + { + "epoch": 78.99, + "learning_rate": 8.732579346455844e-05, + "loss": 0.0006, + "step": 18484 + }, + { + "epoch": 79.01, + "learning_rate": 8.731803805380369e-05, + "loss": 0.0001, + "step": 18488 + }, + { + "epoch": 79.03, + "learning_rate": 8.731028061559249e-05, + "loss": 0.0001, + "step": 18492 + }, + { + "epoch": 79.04, + "learning_rate": 8.730252115034632e-05, + "loss": 0.0001, + "step": 18496 + }, + { + "epoch": 79.06, + "learning_rate": 8.729475965848673e-05, + "loss": 0.0001, + "step": 18500 + }, + { + "epoch": 79.08, + "learning_rate": 8.728699614043539e-05, + "loss": 0.0001, + "step": 18504 + }, + { + "epoch": 79.09, + "learning_rate": 8.72792305966141e-05, + "loss": 0.0002, + "step": 18508 + }, + { + "epoch": 79.11, + "learning_rate": 8.727146302744473e-05, + "loss": 0.0001, + "step": 18512 + }, + { + "epoch": 79.13, + "learning_rate": 8.726369343334932e-05, + "loss": 0.0001, + "step": 18516 + }, + { + "epoch": 79.15, + "learning_rate": 8.725592181474997e-05, + "loss": 0.0002, + "step": 18520 + }, + { + "epoch": 79.16, + "learning_rate": 8.72481481720689e-05, + "loss": 0.0004, + "step": 18524 + }, + { + "epoch": 79.18, + "learning_rate": 8.724037250572845e-05, + "loss": 0.0001, + "step": 18528 + }, + { + "epoch": 79.2, + "learning_rate": 8.723259481615107e-05, + "loss": 0.0007, + "step": 18532 + }, + { + "epoch": 79.21, + "learning_rate": 8.72248151037593e-05, + "loss": 0.0008, + "step": 18536 + }, + { + "epoch": 79.23, + "learning_rate": 8.721703336897582e-05, + "loss": 0.0001, + "step": 18540 + }, + { + "epoch": 79.25, + "learning_rate": 8.72092496122234e-05, + "loss": 0.0, + "step": 18544 + }, + { + "epoch": 79.26, + "learning_rate": 8.720146383392492e-05, + "loss": 0.0001, + "step": 18548 + }, + { + "epoch": 79.28, + "learning_rate": 8.719367603450338e-05, + "loss": 0.0001, + "step": 18552 + }, + { + "epoch": 79.3, + "learning_rate": 8.718588621438188e-05, + "loss": 0.0001, + "step": 18556 + }, + { + "epoch": 79.32, + "learning_rate": 8.717809437398366e-05, + "loss": 0.0003, + "step": 18560 + }, + { + "epoch": 79.33, + "learning_rate": 8.717030051373199e-05, + "loss": 0.0001, + "step": 18564 + }, + { + "epoch": 79.35, + "learning_rate": 8.716250463405034e-05, + "loss": 0.0002, + "step": 18568 + }, + { + "epoch": 79.37, + "learning_rate": 8.715470673536226e-05, + "loss": 0.0002, + "step": 18572 + }, + { + "epoch": 79.38, + "learning_rate": 8.714690681809138e-05, + "loss": 0.0001, + "step": 18576 + }, + { + "epoch": 79.4, + "learning_rate": 8.713910488266148e-05, + "loss": 0.0, + "step": 18580 + }, + { + "epoch": 79.42, + "learning_rate": 8.713130092949644e-05, + "loss": 0.0001, + "step": 18584 + }, + { + "epoch": 79.44, + "learning_rate": 8.712349495902021e-05, + "loss": 0.0002, + "step": 18588 + }, + { + "epoch": 79.45, + "learning_rate": 8.711568697165691e-05, + "loss": 0.0001, + "step": 18592 + }, + { + "epoch": 79.47, + "learning_rate": 8.710787696783072e-05, + "loss": 0.0001, + "step": 18596 + }, + { + "epoch": 79.49, + "learning_rate": 8.710006494796597e-05, + "loss": 0.0006, + "step": 18600 + }, + { + "epoch": 79.5, + "learning_rate": 8.709225091248708e-05, + "loss": 0.0005, + "step": 18604 + }, + { + "epoch": 79.52, + "learning_rate": 8.708443486181855e-05, + "loss": 0.0004, + "step": 18608 + }, + { + "epoch": 79.54, + "learning_rate": 8.707661679638505e-05, + "loss": 0.0002, + "step": 18612 + }, + { + "epoch": 79.56, + "learning_rate": 8.706879671661132e-05, + "loss": 0.0001, + "step": 18616 + }, + { + "epoch": 79.57, + "learning_rate": 8.706097462292223e-05, + "loss": 0.0001, + "step": 18620 + }, + { + "epoch": 79.59, + "learning_rate": 8.70531505157427e-05, + "loss": 0.0001, + "step": 18624 + }, + { + "epoch": 79.61, + "learning_rate": 8.704532439549787e-05, + "loss": 0.0003, + "step": 18628 + }, + { + "epoch": 79.62, + "learning_rate": 8.703749626261289e-05, + "loss": 0.0002, + "step": 18632 + }, + { + "epoch": 79.64, + "learning_rate": 8.702966611751306e-05, + "loss": 0.0001, + "step": 18636 + }, + { + "epoch": 79.66, + "learning_rate": 8.70218339606238e-05, + "loss": 0.0004, + "step": 18640 + }, + { + "epoch": 79.68, + "learning_rate": 8.70139997923706e-05, + "loss": 0.0001, + "step": 18644 + }, + { + "epoch": 79.69, + "learning_rate": 8.70061636131791e-05, + "loss": 0.0006, + "step": 18648 + }, + { + "epoch": 79.71, + "learning_rate": 8.699832542347504e-05, + "loss": 0.0001, + "step": 18652 + }, + { + "epoch": 79.73, + "learning_rate": 8.699048522368425e-05, + "loss": 0.0007, + "step": 18656 + }, + { + "epoch": 79.74, + "learning_rate": 8.698264301423267e-05, + "loss": 0.0013, + "step": 18660 + }, + { + "epoch": 79.76, + "learning_rate": 8.697479879554638e-05, + "loss": 0.0, + "step": 18664 + }, + { + "epoch": 79.78, + "learning_rate": 8.696695256805154e-05, + "loss": 0.0002, + "step": 18668 + }, + { + "epoch": 79.79, + "learning_rate": 8.695910433217443e-05, + "loss": 0.0004, + "step": 18672 + }, + { + "epoch": 79.81, + "learning_rate": 8.695125408834145e-05, + "loss": 0.0, + "step": 18676 + }, + { + "epoch": 79.83, + "learning_rate": 8.694340183697908e-05, + "loss": 0.0001, + "step": 18680 + }, + { + "epoch": 79.85, + "learning_rate": 8.693554757851392e-05, + "loss": 0.0005, + "step": 18684 + }, + { + "epoch": 79.86, + "learning_rate": 8.692769131337271e-05, + "loss": 0.0001, + "step": 18688 + }, + { + "epoch": 79.88, + "learning_rate": 8.691983304198225e-05, + "loss": 0.0003, + "step": 18692 + }, + { + "epoch": 79.9, + "learning_rate": 8.69119727647695e-05, + "loss": 0.0001, + "step": 18696 + }, + { + "epoch": 79.91, + "learning_rate": 8.690411048216147e-05, + "loss": 0.0001, + "step": 18700 + }, + { + "epoch": 79.93, + "learning_rate": 8.689624619458534e-05, + "loss": 0.0003, + "step": 18704 + }, + { + "epoch": 79.95, + "learning_rate": 8.688837990246834e-05, + "loss": 0.0001, + "step": 18708 + }, + { + "epoch": 79.97, + "learning_rate": 8.688051160623786e-05, + "loss": 0.0003, + "step": 18712 + }, + { + "epoch": 79.98, + "learning_rate": 8.68726413063214e-05, + "loss": 0.0008, + "step": 18716 + }, + { + "epoch": 80.0, + "learning_rate": 8.686476900314648e-05, + "loss": 0.0009, + "step": 18720 + }, + { + "epoch": 80.02, + "learning_rate": 8.685689469714086e-05, + "loss": 0.0, + "step": 18724 + }, + { + "epoch": 80.03, + "learning_rate": 8.684901838873232e-05, + "loss": 0.0003, + "step": 18728 + }, + { + "epoch": 80.05, + "learning_rate": 8.684114007834876e-05, + "loss": 0.0001, + "step": 18732 + }, + { + "epoch": 80.07, + "learning_rate": 8.683325976641823e-05, + "loss": 0.0, + "step": 18736 + }, + { + "epoch": 80.09, + "learning_rate": 8.682537745336882e-05, + "loss": 0.0007, + "step": 18740 + }, + { + "epoch": 80.1, + "learning_rate": 8.681749313962882e-05, + "loss": 0.0, + "step": 18744 + }, + { + "epoch": 80.12, + "learning_rate": 8.680960682562653e-05, + "loss": 0.0001, + "step": 18748 + }, + { + "epoch": 80.14, + "learning_rate": 8.680171851179044e-05, + "loss": 0.0001, + "step": 18752 + }, + { + "epoch": 80.15, + "learning_rate": 8.679382819854908e-05, + "loss": 0.0001, + "step": 18756 + }, + { + "epoch": 80.17, + "learning_rate": 8.678593588633118e-05, + "loss": 0.0002, + "step": 18760 + }, + { + "epoch": 80.19, + "learning_rate": 8.677804157556549e-05, + "loss": 0.0003, + "step": 18764 + }, + { + "epoch": 80.21, + "learning_rate": 8.677014526668087e-05, + "loss": 0.0006, + "step": 18768 + }, + { + "epoch": 80.22, + "learning_rate": 8.676224696010637e-05, + "loss": 0.0015, + "step": 18772 + }, + { + "epoch": 80.24, + "learning_rate": 8.675434665627107e-05, + "loss": 0.0001, + "step": 18776 + }, + { + "epoch": 80.26, + "learning_rate": 8.674644435560419e-05, + "loss": 0.0001, + "step": 18780 + }, + { + "epoch": 80.27, + "learning_rate": 8.673854005853508e-05, + "loss": 0.0001, + "step": 18784 + }, + { + "epoch": 80.29, + "learning_rate": 8.673063376549315e-05, + "loss": 0.0, + "step": 18788 + }, + { + "epoch": 80.31, + "learning_rate": 8.672272547690793e-05, + "loss": 0.0001, + "step": 18792 + }, + { + "epoch": 80.32, + "learning_rate": 8.67148151932091e-05, + "loss": 0.0001, + "step": 18796 + }, + { + "epoch": 80.34, + "learning_rate": 8.67069029148264e-05, + "loss": 0.0, + "step": 18800 + }, + { + "epoch": 80.36, + "learning_rate": 8.669898864218968e-05, + "loss": 0.0, + "step": 18804 + }, + { + "epoch": 80.38, + "learning_rate": 8.669107237572896e-05, + "loss": 0.0, + "step": 18808 + }, + { + "epoch": 80.39, + "learning_rate": 8.668315411587432e-05, + "loss": 0.0001, + "step": 18812 + }, + { + "epoch": 80.41, + "learning_rate": 8.66752338630559e-05, + "loss": 0.0007, + "step": 18816 + }, + { + "epoch": 80.43, + "learning_rate": 8.666731161770404e-05, + "loss": 0.0001, + "step": 18820 + }, + { + "epoch": 80.44, + "learning_rate": 8.665938738024915e-05, + "loss": 0.0003, + "step": 18824 + }, + { + "epoch": 80.46, + "learning_rate": 8.665146115112173e-05, + "loss": 0.0, + "step": 18828 + }, + { + "epoch": 80.48, + "learning_rate": 8.664353293075243e-05, + "loss": 0.0001, + "step": 18832 + }, + { + "epoch": 80.5, + "learning_rate": 8.663560271957196e-05, + "loss": 0.0001, + "step": 18836 + }, + { + "epoch": 80.51, + "learning_rate": 8.662767051801116e-05, + "loss": 0.0001, + "step": 18840 + }, + { + "epoch": 80.53, + "learning_rate": 8.6619736326501e-05, + "loss": 0.0001, + "step": 18844 + }, + { + "epoch": 80.55, + "learning_rate": 8.661180014547252e-05, + "loss": 0.0001, + "step": 18848 + }, + { + "epoch": 80.56, + "learning_rate": 8.66038619753569e-05, + "loss": 0.0, + "step": 18852 + }, + { + "epoch": 80.58, + "learning_rate": 8.65959218165854e-05, + "loss": 0.0007, + "step": 18856 + }, + { + "epoch": 80.6, + "learning_rate": 8.65879796695894e-05, + "loss": 0.0002, + "step": 18860 + }, + { + "epoch": 80.62, + "learning_rate": 8.658003553480042e-05, + "loss": 0.0007, + "step": 18864 + }, + { + "epoch": 80.63, + "learning_rate": 8.657208941265002e-05, + "loss": 0.0002, + "step": 18868 + }, + { + "epoch": 80.65, + "learning_rate": 8.656414130356994e-05, + "loss": 0.0, + "step": 18872 + }, + { + "epoch": 80.67, + "learning_rate": 8.655619120799195e-05, + "loss": 0.0001, + "step": 18876 + }, + { + "epoch": 80.68, + "learning_rate": 8.654823912634801e-05, + "loss": 0.0001, + "step": 18880 + }, + { + "epoch": 80.7, + "learning_rate": 8.654028505907012e-05, + "loss": 0.0002, + "step": 18884 + }, + { + "epoch": 80.72, + "learning_rate": 8.653232900659045e-05, + "loss": 0.0001, + "step": 18888 + }, + { + "epoch": 80.74, + "learning_rate": 8.652437096934124e-05, + "loss": 0.0002, + "step": 18892 + }, + { + "epoch": 80.75, + "learning_rate": 8.651641094775483e-05, + "loss": 0.0, + "step": 18896 + }, + { + "epoch": 80.77, + "learning_rate": 8.650844894226366e-05, + "loss": 0.0003, + "step": 18900 + }, + { + "epoch": 80.79, + "learning_rate": 8.650048495330034e-05, + "loss": 0.0002, + "step": 18904 + }, + { + "epoch": 80.8, + "learning_rate": 8.649251898129754e-05, + "loss": 0.0001, + "step": 18908 + }, + { + "epoch": 80.82, + "learning_rate": 8.648455102668802e-05, + "loss": 0.0001, + "step": 18912 + }, + { + "epoch": 80.84, + "learning_rate": 8.647658108990469e-05, + "loss": 0.0001, + "step": 18916 + }, + { + "epoch": 80.85, + "learning_rate": 8.646860917138055e-05, + "loss": 0.0021, + "step": 18920 + }, + { + "epoch": 80.87, + "learning_rate": 8.646063527154869e-05, + "loss": 0.0003, + "step": 18924 + }, + { + "epoch": 80.89, + "learning_rate": 8.645265939084235e-05, + "loss": 0.0, + "step": 18928 + }, + { + "epoch": 80.91, + "learning_rate": 8.644468152969482e-05, + "loss": 0.0003, + "step": 18932 + }, + { + "epoch": 80.92, + "learning_rate": 8.643670168853957e-05, + "loss": 0.0002, + "step": 18936 + }, + { + "epoch": 80.94, + "learning_rate": 8.642871986781012e-05, + "loss": 0.0001, + "step": 18940 + }, + { + "epoch": 80.96, + "learning_rate": 8.642073606794011e-05, + "loss": 0.0001, + "step": 18944 + }, + { + "epoch": 80.97, + "learning_rate": 8.64127502893633e-05, + "loss": 0.0001, + "step": 18948 + }, + { + "epoch": 80.99, + "learning_rate": 8.640476253251354e-05, + "loss": 0.0009, + "step": 18952 + }, + { + "epoch": 81.01, + "learning_rate": 8.639677279782481e-05, + "loss": 0.0001, + "step": 18956 + }, + { + "epoch": 81.03, + "learning_rate": 8.638878108573117e-05, + "loss": 0.0001, + "step": 18960 + }, + { + "epoch": 81.04, + "learning_rate": 8.638078739666683e-05, + "loss": 0.0, + "step": 18964 + }, + { + "epoch": 81.06, + "learning_rate": 8.637279173106605e-05, + "loss": 0.0001, + "step": 18968 + }, + { + "epoch": 81.08, + "learning_rate": 8.636479408936324e-05, + "loss": 0.0002, + "step": 18972 + }, + { + "epoch": 81.09, + "learning_rate": 8.635679447199292e-05, + "loss": 0.0001, + "step": 18976 + }, + { + "epoch": 81.11, + "learning_rate": 8.634879287938969e-05, + "loss": 0.0004, + "step": 18980 + }, + { + "epoch": 81.13, + "learning_rate": 8.634078931198825e-05, + "loss": 0.0001, + "step": 18984 + }, + { + "epoch": 81.15, + "learning_rate": 8.633278377022345e-05, + "loss": 0.0002, + "step": 18988 + }, + { + "epoch": 81.16, + "learning_rate": 8.632477625453021e-05, + "loss": 0.0002, + "step": 18992 + }, + { + "epoch": 81.18, + "learning_rate": 8.631676676534359e-05, + "loss": 0.0001, + "step": 18996 + }, + { + "epoch": 81.2, + "learning_rate": 8.630875530309873e-05, + "loss": 0.0001, + "step": 19000 + }, + { + "epoch": 81.2, + "eval_exact_match": 0.5166320166320166, + "eval_loss": 0.9038861393928528, + "eval_runtime": 138.7634, + "eval_samples_per_second": 6.933, + "step": 19000 + }, + { + "epoch": 81.21, + "learning_rate": 8.630074186823088e-05, + "loss": 0.0009, + "step": 19004 + }, + { + "epoch": 81.23, + "learning_rate": 8.629272646117542e-05, + "loss": 0.0001, + "step": 19008 + }, + { + "epoch": 81.25, + "learning_rate": 8.628470908236779e-05, + "loss": 0.0015, + "step": 19012 + }, + { + "epoch": 81.26, + "learning_rate": 8.627668973224358e-05, + "loss": 0.0001, + "step": 19016 + }, + { + "epoch": 81.28, + "learning_rate": 8.626866841123849e-05, + "loss": 0.0, + "step": 19020 + }, + { + "epoch": 81.3, + "learning_rate": 8.626064511978831e-05, + "loss": 0.0025, + "step": 19024 + }, + { + "epoch": 81.32, + "learning_rate": 8.62526198583289e-05, + "loss": 0.0014, + "step": 19028 + }, + { + "epoch": 81.33, + "learning_rate": 8.624459262729631e-05, + "loss": 0.0003, + "step": 19032 + }, + { + "epoch": 81.35, + "learning_rate": 8.623656342712664e-05, + "loss": 0.0005, + "step": 19036 + }, + { + "epoch": 81.37, + "learning_rate": 8.622853225825611e-05, + "loss": 0.0001, + "step": 19040 + }, + { + "epoch": 81.38, + "learning_rate": 8.622049912112103e-05, + "loss": 0.0004, + "step": 19044 + }, + { + "epoch": 81.4, + "learning_rate": 8.621246401615786e-05, + "loss": 0.0003, + "step": 19048 + }, + { + "epoch": 81.42, + "learning_rate": 8.620442694380311e-05, + "loss": 0.0, + "step": 19052 + }, + { + "epoch": 81.44, + "learning_rate": 8.619638790449345e-05, + "loss": 0.0001, + "step": 19056 + }, + { + "epoch": 81.45, + "learning_rate": 8.618834689866562e-05, + "loss": 0.0001, + "step": 19060 + }, + { + "epoch": 81.47, + "learning_rate": 8.618030392675649e-05, + "loss": 0.0002, + "step": 19064 + }, + { + "epoch": 81.49, + "learning_rate": 8.617225898920302e-05, + "loss": 0.0003, + "step": 19068 + }, + { + "epoch": 81.5, + "learning_rate": 8.61642120864423e-05, + "loss": 0.0001, + "step": 19072 + }, + { + "epoch": 81.52, + "learning_rate": 8.615616321891151e-05, + "loss": 0.0001, + "step": 19076 + }, + { + "epoch": 81.54, + "learning_rate": 8.614811238704791e-05, + "loss": 0.0005, + "step": 19080 + }, + { + "epoch": 81.56, + "learning_rate": 8.614005959128892e-05, + "loss": 0.0006, + "step": 19084 + }, + { + "epoch": 81.57, + "learning_rate": 8.613200483207205e-05, + "loss": 0.0001, + "step": 19088 + }, + { + "epoch": 81.59, + "learning_rate": 8.612394810983487e-05, + "loss": 0.0001, + "step": 19092 + }, + { + "epoch": 81.61, + "learning_rate": 8.611588942501512e-05, + "loss": 0.0006, + "step": 19096 + }, + { + "epoch": 81.62, + "learning_rate": 8.610782877805063e-05, + "loss": 0.0001, + "step": 19100 + }, + { + "epoch": 81.64, + "learning_rate": 8.609976616937931e-05, + "loss": 0.0001, + "step": 19104 + }, + { + "epoch": 81.66, + "learning_rate": 8.60917015994392e-05, + "loss": 0.0005, + "step": 19108 + }, + { + "epoch": 81.68, + "learning_rate": 8.608363506866844e-05, + "loss": 0.0002, + "step": 19112 + }, + { + "epoch": 81.69, + "learning_rate": 8.607556657750528e-05, + "loss": 0.0002, + "step": 19116 + }, + { + "epoch": 81.71, + "learning_rate": 8.606749612638809e-05, + "loss": 0.0001, + "step": 19120 + }, + { + "epoch": 81.73, + "learning_rate": 8.605942371575531e-05, + "loss": 0.0001, + "step": 19124 + }, + { + "epoch": 81.74, + "learning_rate": 8.605134934604548e-05, + "loss": 0.0002, + "step": 19128 + }, + { + "epoch": 81.76, + "learning_rate": 8.604327301769735e-05, + "loss": 0.0003, + "step": 19132 + }, + { + "epoch": 81.78, + "learning_rate": 8.603519473114962e-05, + "loss": 0.0001, + "step": 19136 + }, + { + "epoch": 81.79, + "learning_rate": 8.602711448684123e-05, + "loss": 0.0002, + "step": 19140 + }, + { + "epoch": 81.81, + "learning_rate": 8.601903228521116e-05, + "loss": 0.0002, + "step": 19144 + }, + { + "epoch": 81.83, + "learning_rate": 8.601094812669849e-05, + "loss": 0.0006, + "step": 19148 + }, + { + "epoch": 81.85, + "learning_rate": 8.600286201174243e-05, + "loss": 0.0001, + "step": 19152 + }, + { + "epoch": 81.86, + "learning_rate": 8.599477394078233e-05, + "loss": 0.0001, + "step": 19156 + }, + { + "epoch": 81.88, + "learning_rate": 8.598668391425754e-05, + "loss": 0.0002, + "step": 19160 + }, + { + "epoch": 81.9, + "learning_rate": 8.597859193260765e-05, + "loss": 0.0004, + "step": 19164 + }, + { + "epoch": 81.91, + "learning_rate": 8.597049799627225e-05, + "loss": 0.0001, + "step": 19168 + }, + { + "epoch": 81.93, + "learning_rate": 8.596240210569108e-05, + "loss": 0.0003, + "step": 19172 + }, + { + "epoch": 81.95, + "learning_rate": 8.5954304261304e-05, + "loss": 0.0001, + "step": 19176 + }, + { + "epoch": 81.97, + "learning_rate": 8.594620446355096e-05, + "loss": 0.0001, + "step": 19180 + }, + { + "epoch": 81.98, + "learning_rate": 8.5938102712872e-05, + "loss": 0.0001, + "step": 19184 + }, + { + "epoch": 82.0, + "learning_rate": 8.592999900970726e-05, + "loss": 0.0004, + "step": 19188 + }, + { + "epoch": 82.02, + "learning_rate": 8.592189335449706e-05, + "loss": 0.0001, + "step": 19192 + }, + { + "epoch": 82.03, + "learning_rate": 8.591378574768174e-05, + "loss": 0.0, + "step": 19196 + }, + { + "epoch": 82.05, + "learning_rate": 8.590567618970178e-05, + "loss": 0.0001, + "step": 19200 + }, + { + "epoch": 82.07, + "learning_rate": 8.589756468099778e-05, + "loss": 0.0, + "step": 19204 + }, + { + "epoch": 82.09, + "learning_rate": 8.588945122201042e-05, + "loss": 0.0004, + "step": 19208 + }, + { + "epoch": 82.1, + "learning_rate": 8.58813358131805e-05, + "loss": 0.0001, + "step": 19212 + }, + { + "epoch": 82.12, + "learning_rate": 8.587321845494891e-05, + "loss": 0.0002, + "step": 19216 + }, + { + "epoch": 82.14, + "learning_rate": 8.586509914775667e-05, + "loss": 0.0, + "step": 19220 + }, + { + "epoch": 82.15, + "learning_rate": 8.585697789204493e-05, + "loss": 0.0009, + "step": 19224 + }, + { + "epoch": 82.17, + "learning_rate": 8.584885468825483e-05, + "loss": 0.0002, + "step": 19228 + }, + { + "epoch": 82.19, + "learning_rate": 8.584072953682776e-05, + "loss": 0.0015, + "step": 19232 + }, + { + "epoch": 82.21, + "learning_rate": 8.583260243820514e-05, + "loss": 0.0001, + "step": 19236 + }, + { + "epoch": 82.22, + "learning_rate": 8.582447339282851e-05, + "loss": 0.0001, + "step": 19240 + }, + { + "epoch": 82.24, + "learning_rate": 8.58163424011395e-05, + "loss": 0.0001, + "step": 19244 + }, + { + "epoch": 82.26, + "learning_rate": 8.580820946357985e-05, + "loss": 0.0002, + "step": 19248 + }, + { + "epoch": 82.27, + "learning_rate": 8.580007458059147e-05, + "loss": 0.0001, + "step": 19252 + }, + { + "epoch": 82.29, + "learning_rate": 8.579193775261627e-05, + "loss": 0.0006, + "step": 19256 + }, + { + "epoch": 82.31, + "learning_rate": 8.578379898009632e-05, + "loss": 0.0, + "step": 19260 + }, + { + "epoch": 82.32, + "learning_rate": 8.57756582634738e-05, + "loss": 0.0005, + "step": 19264 + }, + { + "epoch": 82.34, + "learning_rate": 8.576751560319101e-05, + "loss": 0.0, + "step": 19268 + }, + { + "epoch": 82.36, + "learning_rate": 8.57593709996903e-05, + "loss": 0.0014, + "step": 19272 + }, + { + "epoch": 82.38, + "learning_rate": 8.575122445341417e-05, + "loss": 0.0001, + "step": 19276 + }, + { + "epoch": 82.39, + "learning_rate": 8.574307596480522e-05, + "loss": 0.0006, + "step": 19280 + }, + { + "epoch": 82.41, + "learning_rate": 8.573492553430615e-05, + "loss": 0.0006, + "step": 19284 + }, + { + "epoch": 82.43, + "learning_rate": 8.572677316235978e-05, + "loss": 0.0, + "step": 19288 + }, + { + "epoch": 82.44, + "learning_rate": 8.5718618849409e-05, + "loss": 0.0003, + "step": 19292 + }, + { + "epoch": 82.46, + "learning_rate": 8.571046259589681e-05, + "loss": 0.0001, + "step": 19296 + }, + { + "epoch": 82.48, + "learning_rate": 8.570230440226639e-05, + "loss": 0.0001, + "step": 19300 + }, + { + "epoch": 82.5, + "learning_rate": 8.56941442689609e-05, + "loss": 0.0003, + "step": 19304 + }, + { + "epoch": 82.51, + "learning_rate": 8.568598219642373e-05, + "loss": 0.0, + "step": 19308 + }, + { + "epoch": 82.53, + "learning_rate": 8.567781818509829e-05, + "loss": 0.0001, + "step": 19312 + }, + { + "epoch": 82.55, + "learning_rate": 8.566965223542811e-05, + "loss": 0.0002, + "step": 19316 + }, + { + "epoch": 82.56, + "learning_rate": 8.566148434785686e-05, + "loss": 0.0001, + "step": 19320 + }, + { + "epoch": 82.58, + "learning_rate": 8.56533145228283e-05, + "loss": 0.0, + "step": 19324 + }, + { + "epoch": 82.6, + "learning_rate": 8.564514276078626e-05, + "loss": 0.0008, + "step": 19328 + }, + { + "epoch": 82.62, + "learning_rate": 8.563696906217475e-05, + "loss": 0.0001, + "step": 19332 + }, + { + "epoch": 82.63, + "learning_rate": 8.562879342743779e-05, + "loss": 0.0001, + "step": 19336 + }, + { + "epoch": 82.65, + "learning_rate": 8.562061585701961e-05, + "loss": 0.0001, + "step": 19340 + }, + { + "epoch": 82.67, + "learning_rate": 8.561243635136443e-05, + "loss": 0.0001, + "step": 19344 + }, + { + "epoch": 82.68, + "learning_rate": 8.560425491091668e-05, + "loss": 0.0005, + "step": 19348 + }, + { + "epoch": 82.7, + "learning_rate": 8.559607153612084e-05, + "loss": 0.0001, + "step": 19352 + }, + { + "epoch": 82.72, + "learning_rate": 8.558788622742149e-05, + "loss": 0.0001, + "step": 19356 + }, + { + "epoch": 82.74, + "learning_rate": 8.557969898526335e-05, + "loss": 0.0, + "step": 19360 + }, + { + "epoch": 82.75, + "learning_rate": 8.557150981009121e-05, + "loss": 0.0006, + "step": 19364 + }, + { + "epoch": 82.77, + "learning_rate": 8.556331870234999e-05, + "loss": 0.0001, + "step": 19368 + }, + { + "epoch": 82.79, + "learning_rate": 8.555512566248471e-05, + "loss": 0.0003, + "step": 19372 + }, + { + "epoch": 82.8, + "learning_rate": 8.554693069094048e-05, + "loss": 0.0002, + "step": 19376 + }, + { + "epoch": 82.82, + "learning_rate": 8.553873378816252e-05, + "loss": 0.0001, + "step": 19380 + }, + { + "epoch": 82.84, + "learning_rate": 8.553053495459618e-05, + "loss": 0.0001, + "step": 19384 + }, + { + "epoch": 82.85, + "learning_rate": 8.55223341906869e-05, + "loss": 0.0001, + "step": 19388 + }, + { + "epoch": 82.87, + "learning_rate": 8.551413149688019e-05, + "loss": 0.0007, + "step": 19392 + }, + { + "epoch": 82.89, + "learning_rate": 8.55059268736217e-05, + "loss": 0.0001, + "step": 19396 + }, + { + "epoch": 82.91, + "learning_rate": 8.54977203213572e-05, + "loss": 0.0001, + "step": 19400 + }, + { + "epoch": 82.92, + "learning_rate": 8.548951184053257e-05, + "loss": 0.0001, + "step": 19404 + }, + { + "epoch": 82.94, + "learning_rate": 8.548130143159369e-05, + "loss": 0.0004, + "step": 19408 + }, + { + "epoch": 82.96, + "learning_rate": 8.54730890949867e-05, + "loss": 0.0002, + "step": 19412 + }, + { + "epoch": 82.97, + "learning_rate": 8.546487483115773e-05, + "loss": 0.0, + "step": 19416 + }, + { + "epoch": 82.99, + "learning_rate": 8.545665864055308e-05, + "loss": 0.0001, + "step": 19420 + }, + { + "epoch": 83.01, + "learning_rate": 8.544844052361909e-05, + "loss": 0.0001, + "step": 19424 + }, + { + "epoch": 83.03, + "learning_rate": 8.544022048080227e-05, + "loss": 0.003, + "step": 19428 + }, + { + "epoch": 83.04, + "learning_rate": 8.54319985125492e-05, + "loss": 0.0001, + "step": 19432 + }, + { + "epoch": 83.06, + "learning_rate": 8.542377461930659e-05, + "loss": 0.0001, + "step": 19436 + }, + { + "epoch": 83.08, + "learning_rate": 8.54155488015212e-05, + "loss": 0.0002, + "step": 19440 + }, + { + "epoch": 83.09, + "learning_rate": 8.540732105963998e-05, + "loss": 0.0001, + "step": 19444 + }, + { + "epoch": 83.11, + "learning_rate": 8.539909139410989e-05, + "loss": 0.0002, + "step": 19448 + }, + { + "epoch": 83.13, + "learning_rate": 8.539085980537807e-05, + "loss": 0.0001, + "step": 19452 + }, + { + "epoch": 83.15, + "learning_rate": 8.538262629389171e-05, + "loss": 0.0001, + "step": 19456 + }, + { + "epoch": 83.16, + "learning_rate": 8.537439086009817e-05, + "loss": 0.0, + "step": 19460 + }, + { + "epoch": 83.18, + "learning_rate": 8.536615350444483e-05, + "loss": 0.0001, + "step": 19464 + }, + { + "epoch": 83.2, + "learning_rate": 8.535791422737924e-05, + "loss": 0.0005, + "step": 19468 + }, + { + "epoch": 83.21, + "learning_rate": 8.534967302934905e-05, + "loss": 0.0001, + "step": 19472 + }, + { + "epoch": 83.23, + "learning_rate": 8.534142991080194e-05, + "loss": 0.0001, + "step": 19476 + }, + { + "epoch": 83.25, + "learning_rate": 8.53331848721858e-05, + "loss": 0.0016, + "step": 19480 + }, + { + "epoch": 83.26, + "learning_rate": 8.532493791394857e-05, + "loss": 0.0009, + "step": 19484 + }, + { + "epoch": 83.28, + "learning_rate": 8.531668903653831e-05, + "loss": 0.0, + "step": 19488 + }, + { + "epoch": 83.3, + "learning_rate": 8.530843824040314e-05, + "loss": 0.0005, + "step": 19492 + }, + { + "epoch": 83.32, + "learning_rate": 8.530018552599134e-05, + "loss": 0.0, + "step": 19496 + }, + { + "epoch": 83.33, + "learning_rate": 8.529193089375126e-05, + "loss": 0.0001, + "step": 19500 + }, + { + "epoch": 83.35, + "learning_rate": 8.52836743441314e-05, + "loss": 0.0005, + "step": 19504 + }, + { + "epoch": 83.37, + "learning_rate": 8.52754158775803e-05, + "loss": 0.0, + "step": 19508 + }, + { + "epoch": 83.38, + "learning_rate": 8.526715549454664e-05, + "loss": 0.0004, + "step": 19512 + }, + { + "epoch": 83.4, + "learning_rate": 8.52588931954792e-05, + "loss": 0.0, + "step": 19516 + }, + { + "epoch": 83.42, + "learning_rate": 8.525062898082685e-05, + "loss": 0.0001, + "step": 19520 + }, + { + "epoch": 83.44, + "learning_rate": 8.524236285103861e-05, + "loss": 0.0001, + "step": 19524 + }, + { + "epoch": 83.45, + "learning_rate": 8.523409480656356e-05, + "loss": 0.0001, + "step": 19528 + }, + { + "epoch": 83.47, + "learning_rate": 8.522582484785088e-05, + "loss": 0.0011, + "step": 19532 + }, + { + "epoch": 83.49, + "learning_rate": 8.52175529753499e-05, + "loss": 0.0001, + "step": 19536 + }, + { + "epoch": 83.5, + "learning_rate": 8.520927918950999e-05, + "loss": 0.0, + "step": 19540 + }, + { + "epoch": 83.52, + "learning_rate": 8.520100349078069e-05, + "loss": 0.0016, + "step": 19544 + }, + { + "epoch": 83.54, + "learning_rate": 8.519272587961155e-05, + "loss": 0.0006, + "step": 19548 + }, + { + "epoch": 83.56, + "learning_rate": 8.518444635645237e-05, + "loss": 0.0019, + "step": 19552 + }, + { + "epoch": 83.57, + "learning_rate": 8.517616492175292e-05, + "loss": 0.0001, + "step": 19556 + }, + { + "epoch": 83.59, + "learning_rate": 8.516788157596312e-05, + "loss": 0.0001, + "step": 19560 + }, + { + "epoch": 83.61, + "learning_rate": 8.515959631953301e-05, + "loss": 0.0002, + "step": 19564 + }, + { + "epoch": 83.62, + "learning_rate": 8.515130915291271e-05, + "loss": 0.0008, + "step": 19568 + }, + { + "epoch": 83.64, + "learning_rate": 8.514302007655248e-05, + "loss": 0.0001, + "step": 19572 + }, + { + "epoch": 83.66, + "learning_rate": 8.513472909090263e-05, + "loss": 0.0, + "step": 19576 + }, + { + "epoch": 83.68, + "learning_rate": 8.512643619641362e-05, + "loss": 0.0, + "step": 19580 + }, + { + "epoch": 83.69, + "learning_rate": 8.511814139353599e-05, + "loss": 0.0001, + "step": 19584 + }, + { + "epoch": 83.71, + "learning_rate": 8.510984468272039e-05, + "loss": 0.0014, + "step": 19588 + }, + { + "epoch": 83.73, + "learning_rate": 8.510154606441756e-05, + "loss": 0.0014, + "step": 19592 + }, + { + "epoch": 83.74, + "learning_rate": 8.509324553907837e-05, + "loss": 0.0001, + "step": 19596 + }, + { + "epoch": 83.76, + "learning_rate": 8.508494310715379e-05, + "loss": 0.0001, + "step": 19600 + }, + { + "epoch": 83.78, + "learning_rate": 8.507663876909487e-05, + "loss": 0.0001, + "step": 19604 + }, + { + "epoch": 83.79, + "learning_rate": 8.506833252535277e-05, + "loss": 0.0, + "step": 19608 + }, + { + "epoch": 83.81, + "learning_rate": 8.506002437637879e-05, + "loss": 0.0001, + "step": 19612 + }, + { + "epoch": 83.83, + "learning_rate": 8.505171432262427e-05, + "loss": 0.0, + "step": 19616 + }, + { + "epoch": 83.85, + "learning_rate": 8.504340236454071e-05, + "loss": 0.0013, + "step": 19620 + }, + { + "epoch": 83.86, + "learning_rate": 8.503508850257968e-05, + "loss": 0.0002, + "step": 19624 + }, + { + "epoch": 83.88, + "learning_rate": 8.502677273719287e-05, + "loss": 0.0, + "step": 19628 + }, + { + "epoch": 83.9, + "learning_rate": 8.501845506883208e-05, + "loss": 0.0002, + "step": 19632 + }, + { + "epoch": 83.91, + "learning_rate": 8.501013549794917e-05, + "loss": 0.0012, + "step": 19636 + }, + { + "epoch": 83.93, + "learning_rate": 8.500181402499617e-05, + "loss": 0.0005, + "step": 19640 + }, + { + "epoch": 83.95, + "learning_rate": 8.499349065042516e-05, + "loss": 0.001, + "step": 19644 + }, + { + "epoch": 83.97, + "learning_rate": 8.498516537468834e-05, + "loss": 0.0001, + "step": 19648 + }, + { + "epoch": 83.98, + "learning_rate": 8.497683819823801e-05, + "loss": 0.0009, + "step": 19652 + }, + { + "epoch": 84.0, + "learning_rate": 8.496850912152661e-05, + "loss": 0.0001, + "step": 19656 + }, + { + "epoch": 84.02, + "learning_rate": 8.496017814500661e-05, + "loss": 0.0001, + "step": 19660 + }, + { + "epoch": 84.03, + "learning_rate": 8.495184526913066e-05, + "loss": 0.0002, + "step": 19664 + }, + { + "epoch": 84.05, + "learning_rate": 8.494351049435145e-05, + "loss": 0.0001, + "step": 19668 + }, + { + "epoch": 84.07, + "learning_rate": 8.493517382112182e-05, + "loss": 0.0001, + "step": 19672 + }, + { + "epoch": 84.09, + "learning_rate": 8.492683524989467e-05, + "loss": 0.0001, + "step": 19676 + }, + { + "epoch": 84.1, + "learning_rate": 8.491849478112307e-05, + "loss": 0.0, + "step": 19680 + }, + { + "epoch": 84.12, + "learning_rate": 8.491015241526011e-05, + "loss": 0.0, + "step": 19684 + }, + { + "epoch": 84.14, + "learning_rate": 8.490180815275906e-05, + "loss": 0.0002, + "step": 19688 + }, + { + "epoch": 84.15, + "learning_rate": 8.489346199407321e-05, + "loss": 0.0017, + "step": 19692 + }, + { + "epoch": 84.17, + "learning_rate": 8.488511393965601e-05, + "loss": 0.0002, + "step": 19696 + }, + { + "epoch": 84.19, + "learning_rate": 8.487676398996105e-05, + "loss": 0.0004, + "step": 19700 + }, + { + "epoch": 84.21, + "learning_rate": 8.486841214544194e-05, + "loss": 0.0002, + "step": 19704 + }, + { + "epoch": 84.22, + "learning_rate": 8.48600584065524e-05, + "loss": 0.0001, + "step": 19708 + }, + { + "epoch": 84.24, + "learning_rate": 8.485170277374635e-05, + "loss": 0.0, + "step": 19712 + }, + { + "epoch": 84.26, + "learning_rate": 8.484334524747767e-05, + "loss": 0.0001, + "step": 19716 + }, + { + "epoch": 84.27, + "learning_rate": 8.483498582820048e-05, + "loss": 0.0, + "step": 19720 + }, + { + "epoch": 84.29, + "learning_rate": 8.482662451636891e-05, + "loss": 0.0001, + "step": 19724 + }, + { + "epoch": 84.31, + "learning_rate": 8.481826131243722e-05, + "loss": 0.0, + "step": 19728 + }, + { + "epoch": 84.32, + "learning_rate": 8.480989621685979e-05, + "loss": 0.0002, + "step": 19732 + }, + { + "epoch": 84.34, + "learning_rate": 8.480152923009107e-05, + "loss": 0.0001, + "step": 19736 + }, + { + "epoch": 84.36, + "learning_rate": 8.479316035258565e-05, + "loss": 0.0016, + "step": 19740 + }, + { + "epoch": 84.38, + "learning_rate": 8.478478958479819e-05, + "loss": 0.0001, + "step": 19744 + }, + { + "epoch": 84.39, + "learning_rate": 8.477641692718348e-05, + "loss": 0.0, + "step": 19748 + }, + { + "epoch": 84.41, + "learning_rate": 8.476804238019638e-05, + "loss": 0.0004, + "step": 19752 + }, + { + "epoch": 84.43, + "learning_rate": 8.475966594429188e-05, + "loss": 0.0001, + "step": 19756 + }, + { + "epoch": 84.44, + "learning_rate": 8.475128761992506e-05, + "loss": 0.0001, + "step": 19760 + }, + { + "epoch": 84.46, + "learning_rate": 8.474290740755113e-05, + "loss": 0.0001, + "step": 19764 + }, + { + "epoch": 84.48, + "learning_rate": 8.473452530762535e-05, + "loss": 0.0001, + "step": 19768 + }, + { + "epoch": 84.5, + "learning_rate": 8.472614132060314e-05, + "loss": 0.0016, + "step": 19772 + }, + { + "epoch": 84.51, + "learning_rate": 8.471775544693998e-05, + "loss": 0.0002, + "step": 19776 + }, + { + "epoch": 84.53, + "learning_rate": 8.470936768709146e-05, + "loss": 0.0, + "step": 19780 + }, + { + "epoch": 84.55, + "learning_rate": 8.47009780415133e-05, + "loss": 0.0004, + "step": 19784 + }, + { + "epoch": 84.56, + "learning_rate": 8.469258651066128e-05, + "loss": 0.0001, + "step": 19788 + }, + { + "epoch": 84.58, + "learning_rate": 8.468419309499131e-05, + "loss": 0.0002, + "step": 19792 + }, + { + "epoch": 84.6, + "learning_rate": 8.467579779495939e-05, + "loss": 0.0015, + "step": 19796 + }, + { + "epoch": 84.62, + "learning_rate": 8.466740061102166e-05, + "loss": 0.0, + "step": 19800 + }, + { + "epoch": 84.63, + "learning_rate": 8.465900154363431e-05, + "loss": 0.0003, + "step": 19804 + }, + { + "epoch": 84.65, + "learning_rate": 8.465060059325365e-05, + "loss": 0.0002, + "step": 19808 + }, + { + "epoch": 84.67, + "learning_rate": 8.464219776033611e-05, + "loss": 0.0001, + "step": 19812 + }, + { + "epoch": 84.68, + "learning_rate": 8.463379304533818e-05, + "loss": 0.0001, + "step": 19816 + }, + { + "epoch": 84.7, + "learning_rate": 8.462538644871653e-05, + "loss": 0.0008, + "step": 19820 + }, + { + "epoch": 84.72, + "learning_rate": 8.461697797092784e-05, + "loss": 0.0007, + "step": 19824 + }, + { + "epoch": 84.74, + "learning_rate": 8.460856761242894e-05, + "loss": 0.0, + "step": 19828 + }, + { + "epoch": 84.75, + "learning_rate": 8.46001553736768e-05, + "loss": 0.0, + "step": 19832 + }, + { + "epoch": 84.77, + "learning_rate": 8.459174125512838e-05, + "loss": 0.0003, + "step": 19836 + }, + { + "epoch": 84.79, + "learning_rate": 8.458332525724086e-05, + "loss": 0.001, + "step": 19840 + }, + { + "epoch": 84.8, + "learning_rate": 8.457490738047147e-05, + "loss": 0.0003, + "step": 19844 + }, + { + "epoch": 84.82, + "learning_rate": 8.456648762527755e-05, + "loss": 0.0003, + "step": 19848 + }, + { + "epoch": 84.84, + "learning_rate": 8.455806599211651e-05, + "loss": 0.0001, + "step": 19852 + }, + { + "epoch": 84.85, + "learning_rate": 8.45496424814459e-05, + "loss": 0.0001, + "step": 19856 + }, + { + "epoch": 84.87, + "learning_rate": 8.454121709372339e-05, + "loss": 0.0001, + "step": 19860 + }, + { + "epoch": 84.89, + "learning_rate": 8.453278982940667e-05, + "loss": 0.0013, + "step": 19864 + }, + { + "epoch": 84.91, + "learning_rate": 8.452436068895365e-05, + "loss": 0.0004, + "step": 19868 + }, + { + "epoch": 84.92, + "learning_rate": 8.451592967282222e-05, + "loss": 0.0017, + "step": 19872 + }, + { + "epoch": 84.94, + "learning_rate": 8.450749678147048e-05, + "loss": 0.0001, + "step": 19876 + }, + { + "epoch": 84.96, + "learning_rate": 8.449906201535653e-05, + "loss": 0.0004, + "step": 19880 + }, + { + "epoch": 84.97, + "learning_rate": 8.449062537493868e-05, + "loss": 0.0001, + "step": 19884 + }, + { + "epoch": 84.99, + "learning_rate": 8.448218686067524e-05, + "loss": 0.0003, + "step": 19888 + }, + { + "epoch": 85.01, + "learning_rate": 8.447374647302469e-05, + "loss": 0.0001, + "step": 19892 + }, + { + "epoch": 85.03, + "learning_rate": 8.446530421244557e-05, + "loss": 0.0001, + "step": 19896 + }, + { + "epoch": 85.04, + "learning_rate": 8.445686007939657e-05, + "loss": 0.0002, + "step": 19900 + }, + { + "epoch": 85.06, + "learning_rate": 8.444841407433644e-05, + "loss": 0.0008, + "step": 19904 + }, + { + "epoch": 85.08, + "learning_rate": 8.443996619772401e-05, + "loss": 0.0006, + "step": 19908 + }, + { + "epoch": 85.09, + "learning_rate": 8.44315164500183e-05, + "loss": 0.0001, + "step": 19912 + }, + { + "epoch": 85.11, + "learning_rate": 8.442306483167833e-05, + "loss": 0.0001, + "step": 19916 + }, + { + "epoch": 85.13, + "learning_rate": 8.44146113431633e-05, + "loss": 0.0001, + "step": 19920 + }, + { + "epoch": 85.15, + "learning_rate": 8.44061559849325e-05, + "loss": 0.0002, + "step": 19924 + }, + { + "epoch": 85.16, + "learning_rate": 8.439769875744524e-05, + "loss": 0.0003, + "step": 19928 + }, + { + "epoch": 85.18, + "learning_rate": 8.438923966116104e-05, + "loss": 0.0004, + "step": 19932 + }, + { + "epoch": 85.2, + "learning_rate": 8.438077869653946e-05, + "loss": 0.0001, + "step": 19936 + }, + { + "epoch": 85.21, + "learning_rate": 8.437231586404019e-05, + "loss": 0.0008, + "step": 19940 + }, + { + "epoch": 85.23, + "learning_rate": 8.4363851164123e-05, + "loss": 0.0004, + "step": 19944 + }, + { + "epoch": 85.25, + "learning_rate": 8.435538459724775e-05, + "loss": 0.0002, + "step": 19948 + }, + { + "epoch": 85.26, + "learning_rate": 8.434691616387446e-05, + "loss": 0.0022, + "step": 19952 + }, + { + "epoch": 85.28, + "learning_rate": 8.433844586446318e-05, + "loss": 0.0001, + "step": 19956 + }, + { + "epoch": 85.3, + "learning_rate": 8.43299736994741e-05, + "loss": 0.0002, + "step": 19960 + }, + { + "epoch": 85.32, + "learning_rate": 8.432149966936754e-05, + "loss": 0.0001, + "step": 19964 + }, + { + "epoch": 85.33, + "learning_rate": 8.431302377460383e-05, + "loss": 0.0001, + "step": 19968 + }, + { + "epoch": 85.35, + "learning_rate": 8.43045460156435e-05, + "loss": 0.0001, + "step": 19972 + }, + { + "epoch": 85.37, + "learning_rate": 8.429606639294711e-05, + "loss": 0.0, + "step": 19976 + }, + { + "epoch": 85.38, + "learning_rate": 8.428758490697538e-05, + "loss": 0.0007, + "step": 19980 + }, + { + "epoch": 85.4, + "learning_rate": 8.427910155818909e-05, + "loss": 0.0015, + "step": 19984 + }, + { + "epoch": 85.42, + "learning_rate": 8.427061634704911e-05, + "loss": 0.0001, + "step": 19988 + }, + { + "epoch": 85.44, + "learning_rate": 8.426212927401649e-05, + "loss": 0.0, + "step": 19992 + }, + { + "epoch": 85.45, + "learning_rate": 8.425364033955225e-05, + "loss": 0.0001, + "step": 19996 + }, + { + "epoch": 85.47, + "learning_rate": 8.424514954411767e-05, + "loss": 0.0024, + "step": 20000 + }, + { + "epoch": 85.47, + "eval_exact_match": 0.5031185031185031, + "eval_loss": 0.9105172753334045, + "eval_runtime": 140.8685, + "eval_samples_per_second": 6.829, + "step": 20000 + }, + { + "epoch": 85.49, + "learning_rate": 8.423665688817397e-05, + "loss": 0.0006, + "step": 20004 + }, + { + "epoch": 85.5, + "learning_rate": 8.422816237218259e-05, + "loss": 0.0013, + "step": 20008 + }, + { + "epoch": 85.52, + "learning_rate": 8.421966599660502e-05, + "loss": 0.0004, + "step": 20012 + }, + { + "epoch": 85.54, + "learning_rate": 8.421116776190288e-05, + "loss": 0.0003, + "step": 20016 + }, + { + "epoch": 85.56, + "learning_rate": 8.420266766853784e-05, + "loss": 0.0001, + "step": 20020 + }, + { + "epoch": 85.57, + "learning_rate": 8.419416571697171e-05, + "loss": 0.0001, + "step": 20024 + }, + { + "epoch": 85.59, + "learning_rate": 8.418566190766641e-05, + "loss": 0.0, + "step": 20028 + }, + { + "epoch": 85.61, + "learning_rate": 8.417715624108393e-05, + "loss": 0.0001, + "step": 20032 + }, + { + "epoch": 85.62, + "learning_rate": 8.416864871768639e-05, + "loss": 0.0001, + "step": 20036 + }, + { + "epoch": 85.64, + "learning_rate": 8.416013933793599e-05, + "loss": 0.0003, + "step": 20040 + }, + { + "epoch": 85.66, + "learning_rate": 8.415162810229502e-05, + "loss": 0.0001, + "step": 20044 + }, + { + "epoch": 85.68, + "learning_rate": 8.414311501122591e-05, + "loss": 0.0005, + "step": 20048 + }, + { + "epoch": 85.69, + "learning_rate": 8.413460006519116e-05, + "loss": 0.0002, + "step": 20052 + }, + { + "epoch": 85.71, + "learning_rate": 8.412608326465337e-05, + "loss": 0.0001, + "step": 20056 + }, + { + "epoch": 85.73, + "learning_rate": 8.411756461007527e-05, + "loss": 0.0001, + "step": 20060 + }, + { + "epoch": 85.74, + "learning_rate": 8.410904410191967e-05, + "loss": 0.0004, + "step": 20064 + }, + { + "epoch": 85.76, + "learning_rate": 8.410052174064946e-05, + "loss": 0.0011, + "step": 20068 + }, + { + "epoch": 85.78, + "learning_rate": 8.409199752672767e-05, + "loss": 0.0002, + "step": 20072 + }, + { + "epoch": 85.79, + "learning_rate": 8.408347146061741e-05, + "loss": 0.0, + "step": 20076 + }, + { + "epoch": 85.81, + "learning_rate": 8.40749435427819e-05, + "loss": 0.0001, + "step": 20080 + }, + { + "epoch": 85.83, + "learning_rate": 8.406641377368446e-05, + "loss": 0.0, + "step": 20084 + }, + { + "epoch": 85.85, + "learning_rate": 8.405788215378847e-05, + "loss": 0.0007, + "step": 20088 + }, + { + "epoch": 85.86, + "learning_rate": 8.404934868355747e-05, + "loss": 0.0002, + "step": 20092 + }, + { + "epoch": 85.88, + "learning_rate": 8.404081336345507e-05, + "loss": 0.0003, + "step": 20096 + }, + { + "epoch": 85.9, + "learning_rate": 8.4032276193945e-05, + "loss": 0.0005, + "step": 20100 + }, + { + "epoch": 85.91, + "learning_rate": 8.402373717549105e-05, + "loss": 0.0008, + "step": 20104 + }, + { + "epoch": 85.93, + "learning_rate": 8.401519630855718e-05, + "loss": 0.0008, + "step": 20108 + }, + { + "epoch": 85.95, + "learning_rate": 8.400665359360737e-05, + "loss": 0.0006, + "step": 20112 + }, + { + "epoch": 85.97, + "learning_rate": 8.399810903110575e-05, + "loss": 0.0001, + "step": 20116 + }, + { + "epoch": 85.98, + "learning_rate": 8.398956262151654e-05, + "loss": 0.0001, + "step": 20120 + }, + { + "epoch": 86.0, + "learning_rate": 8.398101436530405e-05, + "loss": 0.0001, + "step": 20124 + }, + { + "epoch": 86.02, + "learning_rate": 8.397246426293273e-05, + "loss": 0.0001, + "step": 20128 + }, + { + "epoch": 86.03, + "learning_rate": 8.396391231486707e-05, + "loss": 0.0001, + "step": 20132 + }, + { + "epoch": 86.05, + "learning_rate": 8.395535852157168e-05, + "loss": 0.0001, + "step": 20136 + }, + { + "epoch": 86.07, + "learning_rate": 8.394680288351132e-05, + "loss": 0.0, + "step": 20140 + }, + { + "epoch": 86.09, + "learning_rate": 8.393824540115076e-05, + "loss": 0.0002, + "step": 20144 + }, + { + "epoch": 86.1, + "learning_rate": 8.392968607495497e-05, + "loss": 0.0002, + "step": 20148 + }, + { + "epoch": 86.12, + "learning_rate": 8.392112490538894e-05, + "loss": 0.0001, + "step": 20152 + }, + { + "epoch": 86.14, + "learning_rate": 8.391256189291779e-05, + "loss": 0.0008, + "step": 20156 + }, + { + "epoch": 86.15, + "learning_rate": 8.390399703800678e-05, + "loss": 0.0002, + "step": 20160 + }, + { + "epoch": 86.17, + "learning_rate": 8.389543034112117e-05, + "loss": 0.0001, + "step": 20164 + }, + { + "epoch": 86.19, + "learning_rate": 8.388686180272643e-05, + "loss": 0.0003, + "step": 20168 + }, + { + "epoch": 86.21, + "learning_rate": 8.387829142328807e-05, + "loss": 0.0001, + "step": 20172 + }, + { + "epoch": 86.22, + "learning_rate": 8.386971920327169e-05, + "loss": 0.0002, + "step": 20176 + }, + { + "epoch": 86.24, + "learning_rate": 8.386114514314303e-05, + "loss": 0.0001, + "step": 20180 + }, + { + "epoch": 86.26, + "learning_rate": 8.385256924336792e-05, + "loss": 0.0002, + "step": 20184 + }, + { + "epoch": 86.27, + "learning_rate": 8.384399150441225e-05, + "loss": 0.0003, + "step": 20188 + }, + { + "epoch": 86.29, + "learning_rate": 8.383541192674207e-05, + "loss": 0.0001, + "step": 20192 + }, + { + "epoch": 86.31, + "learning_rate": 8.382683051082349e-05, + "loss": 0.0001, + "step": 20196 + }, + { + "epoch": 86.32, + "learning_rate": 8.381824725712272e-05, + "loss": 0.0, + "step": 20200 + }, + { + "epoch": 86.34, + "learning_rate": 8.380966216610612e-05, + "loss": 0.0003, + "step": 20204 + }, + { + "epoch": 86.36, + "learning_rate": 8.380107523824008e-05, + "loss": 0.0001, + "step": 20208 + }, + { + "epoch": 86.38, + "learning_rate": 8.379248647399111e-05, + "loss": 0.0, + "step": 20212 + }, + { + "epoch": 86.39, + "learning_rate": 8.378389587382584e-05, + "loss": 0.0, + "step": 20216 + }, + { + "epoch": 86.41, + "learning_rate": 8.3775303438211e-05, + "loss": 0.0006, + "step": 20220 + }, + { + "epoch": 86.43, + "learning_rate": 8.37667091676134e-05, + "loss": 0.0001, + "step": 20224 + }, + { + "epoch": 86.44, + "learning_rate": 8.375811306249996e-05, + "loss": 0.0001, + "step": 20228 + }, + { + "epoch": 86.46, + "learning_rate": 8.37495151233377e-05, + "loss": 0.0004, + "step": 20232 + }, + { + "epoch": 86.48, + "learning_rate": 8.374091535059374e-05, + "loss": 0.0001, + "step": 20236 + }, + { + "epoch": 86.5, + "learning_rate": 8.373231374473531e-05, + "loss": 0.0001, + "step": 20240 + }, + { + "epoch": 86.51, + "learning_rate": 8.37237103062297e-05, + "loss": 0.0006, + "step": 20244 + }, + { + "epoch": 86.53, + "learning_rate": 8.371510503554436e-05, + "loss": 0.0001, + "step": 20248 + }, + { + "epoch": 86.55, + "learning_rate": 8.370649793314678e-05, + "loss": 0.0001, + "step": 20252 + }, + { + "epoch": 86.56, + "learning_rate": 8.369788899950457e-05, + "loss": 0.0019, + "step": 20256 + }, + { + "epoch": 86.58, + "learning_rate": 8.368927823508548e-05, + "loss": 0.0001, + "step": 20260 + }, + { + "epoch": 86.6, + "learning_rate": 8.36806656403573e-05, + "loss": 0.0001, + "step": 20264 + }, + { + "epoch": 86.62, + "learning_rate": 8.367205121578797e-05, + "loss": 0.0, + "step": 20268 + }, + { + "epoch": 86.63, + "learning_rate": 8.366343496184546e-05, + "loss": 0.0001, + "step": 20272 + }, + { + "epoch": 86.65, + "learning_rate": 8.365481687899793e-05, + "loss": 0.0016, + "step": 20276 + }, + { + "epoch": 86.67, + "learning_rate": 8.364619696771355e-05, + "loss": 0.0, + "step": 20280 + }, + { + "epoch": 86.68, + "learning_rate": 8.363757522846066e-05, + "loss": 0.0002, + "step": 20284 + }, + { + "epoch": 86.7, + "learning_rate": 8.362895166170768e-05, + "loss": 0.0003, + "step": 20288 + }, + { + "epoch": 86.72, + "learning_rate": 8.362032626792308e-05, + "loss": 0.0005, + "step": 20292 + }, + { + "epoch": 86.74, + "learning_rate": 8.361169904757553e-05, + "loss": 0.0002, + "step": 20296 + }, + { + "epoch": 86.75, + "learning_rate": 8.360307000113369e-05, + "loss": 0.0002, + "step": 20300 + }, + { + "epoch": 86.77, + "learning_rate": 8.359443912906639e-05, + "loss": 0.0001, + "step": 20304 + }, + { + "epoch": 86.79, + "learning_rate": 8.358580643184253e-05, + "loss": 0.0005, + "step": 20308 + }, + { + "epoch": 86.8, + "learning_rate": 8.357717190993113e-05, + "loss": 0.0003, + "step": 20312 + }, + { + "epoch": 86.82, + "learning_rate": 8.356853556380127e-05, + "loss": 0.0, + "step": 20316 + }, + { + "epoch": 86.84, + "learning_rate": 8.355989739392217e-05, + "loss": 0.0012, + "step": 20320 + }, + { + "epoch": 86.85, + "learning_rate": 8.355125740076315e-05, + "loss": 0.0008, + "step": 20324 + }, + { + "epoch": 86.87, + "learning_rate": 8.35426155847936e-05, + "loss": 0.0008, + "step": 20328 + }, + { + "epoch": 86.89, + "learning_rate": 8.3533971946483e-05, + "loss": 0.0018, + "step": 20332 + }, + { + "epoch": 86.91, + "learning_rate": 8.352532648630098e-05, + "loss": 0.0009, + "step": 20336 + }, + { + "epoch": 86.92, + "learning_rate": 8.351667920471723e-05, + "loss": 0.0002, + "step": 20340 + }, + { + "epoch": 86.94, + "learning_rate": 8.350803010220156e-05, + "loss": 0.0003, + "step": 20344 + }, + { + "epoch": 86.96, + "learning_rate": 8.349937917922386e-05, + "loss": 0.0001, + "step": 20348 + }, + { + "epoch": 86.97, + "learning_rate": 8.349072643625412e-05, + "loss": 0.0001, + "step": 20352 + }, + { + "epoch": 86.99, + "learning_rate": 8.348207187376245e-05, + "loss": 0.0001, + "step": 20356 + }, + { + "epoch": 87.01, + "learning_rate": 8.347341549221903e-05, + "loss": 0.0001, + "step": 20360 + }, + { + "epoch": 87.03, + "learning_rate": 8.346475729209416e-05, + "loss": 0.0001, + "step": 20364 + }, + { + "epoch": 87.04, + "learning_rate": 8.345609727385825e-05, + "loss": 0.0002, + "step": 20368 + }, + { + "epoch": 87.06, + "learning_rate": 8.344743543798176e-05, + "loss": 0.0001, + "step": 20372 + }, + { + "epoch": 87.08, + "learning_rate": 8.343877178493529e-05, + "loss": 0.0001, + "step": 20376 + }, + { + "epoch": 87.09, + "learning_rate": 8.343010631518955e-05, + "loss": 0.0003, + "step": 20380 + }, + { + "epoch": 87.11, + "learning_rate": 8.342143902921531e-05, + "loss": 0.0, + "step": 20384 + }, + { + "epoch": 87.13, + "learning_rate": 8.341276992748344e-05, + "loss": 0.0001, + "step": 20388 + }, + { + "epoch": 87.15, + "learning_rate": 8.340409901046496e-05, + "loss": 0.0001, + "step": 20392 + }, + { + "epoch": 87.16, + "learning_rate": 8.339542627863093e-05, + "loss": 0.0009, + "step": 20396 + }, + { + "epoch": 87.18, + "learning_rate": 8.338675173245254e-05, + "loss": 0.0001, + "step": 20400 + }, + { + "epoch": 87.2, + "learning_rate": 8.337807537240106e-05, + "loss": 0.0002, + "step": 20404 + }, + { + "epoch": 87.21, + "learning_rate": 8.336939719894788e-05, + "loss": 0.0002, + "step": 20408 + }, + { + "epoch": 87.23, + "learning_rate": 8.336071721256447e-05, + "loss": 0.0003, + "step": 20412 + }, + { + "epoch": 87.25, + "learning_rate": 8.335203541372242e-05, + "loss": 0.0, + "step": 20416 + }, + { + "epoch": 87.26, + "learning_rate": 8.33433518028934e-05, + "loss": 0.0005, + "step": 20420 + }, + { + "epoch": 87.28, + "learning_rate": 8.333466638054916e-05, + "loss": 0.0003, + "step": 20424 + }, + { + "epoch": 87.3, + "learning_rate": 8.33259791471616e-05, + "loss": 0.0003, + "step": 20428 + }, + { + "epoch": 87.32, + "learning_rate": 8.331729010320267e-05, + "loss": 0.0004, + "step": 20432 + }, + { + "epoch": 87.33, + "learning_rate": 8.330859924914444e-05, + "loss": 0.0005, + "step": 20436 + }, + { + "epoch": 87.35, + "learning_rate": 8.329990658545912e-05, + "loss": 0.0011, + "step": 20440 + }, + { + "epoch": 87.37, + "learning_rate": 8.329121211261892e-05, + "loss": 0.0005, + "step": 20444 + }, + { + "epoch": 87.38, + "learning_rate": 8.328251583109621e-05, + "loss": 0.0001, + "step": 20448 + }, + { + "epoch": 87.4, + "learning_rate": 8.327381774136347e-05, + "loss": 0.0, + "step": 20452 + }, + { + "epoch": 87.42, + "learning_rate": 8.326511784389326e-05, + "loss": 0.0001, + "step": 20456 + }, + { + "epoch": 87.44, + "learning_rate": 8.325641613915822e-05, + "loss": 0.0002, + "step": 20460 + }, + { + "epoch": 87.45, + "learning_rate": 8.324771262763114e-05, + "loss": 0.0003, + "step": 20464 + }, + { + "epoch": 87.47, + "learning_rate": 8.323900730978482e-05, + "loss": 0.0016, + "step": 20468 + }, + { + "epoch": 87.49, + "learning_rate": 8.323030018609225e-05, + "loss": 0.0001, + "step": 20472 + }, + { + "epoch": 87.5, + "learning_rate": 8.322159125702649e-05, + "loss": 0.0, + "step": 20476 + }, + { + "epoch": 87.52, + "learning_rate": 8.321288052306066e-05, + "loss": 0.0002, + "step": 20480 + }, + { + "epoch": 87.54, + "learning_rate": 8.320416798466803e-05, + "loss": 0.0, + "step": 20484 + }, + { + "epoch": 87.56, + "learning_rate": 8.319545364232193e-05, + "loss": 0.0001, + "step": 20488 + }, + { + "epoch": 87.57, + "learning_rate": 8.31867374964958e-05, + "loss": 0.0001, + "step": 20492 + }, + { + "epoch": 87.59, + "learning_rate": 8.317801954766318e-05, + "loss": 0.0007, + "step": 20496 + }, + { + "epoch": 87.61, + "learning_rate": 8.316929979629773e-05, + "loss": 0.0001, + "step": 20500 + }, + { + "epoch": 87.62, + "learning_rate": 8.316057824287315e-05, + "loss": 0.0004, + "step": 20504 + }, + { + "epoch": 87.64, + "learning_rate": 8.315185488786332e-05, + "loss": 0.0, + "step": 20508 + }, + { + "epoch": 87.66, + "learning_rate": 8.314312973174214e-05, + "loss": 0.0013, + "step": 20512 + }, + { + "epoch": 87.68, + "learning_rate": 8.313440277498366e-05, + "loss": 0.0004, + "step": 20516 + }, + { + "epoch": 87.69, + "learning_rate": 8.312567401806197e-05, + "loss": 0.0005, + "step": 20520 + }, + { + "epoch": 87.71, + "learning_rate": 8.311694346145133e-05, + "loss": 0.0, + "step": 20524 + }, + { + "epoch": 87.73, + "learning_rate": 8.310821110562608e-05, + "loss": 0.0004, + "step": 20528 + }, + { + "epoch": 87.74, + "learning_rate": 8.30994769510606e-05, + "loss": 0.0, + "step": 20532 + }, + { + "epoch": 87.76, + "learning_rate": 8.309074099822942e-05, + "loss": 0.0002, + "step": 20536 + }, + { + "epoch": 87.78, + "learning_rate": 8.308200324760717e-05, + "loss": 0.0003, + "step": 20540 + }, + { + "epoch": 87.79, + "learning_rate": 8.307326369966854e-05, + "loss": 0.0001, + "step": 20544 + }, + { + "epoch": 87.81, + "learning_rate": 8.306452235488839e-05, + "loss": 0.0004, + "step": 20548 + }, + { + "epoch": 87.83, + "learning_rate": 8.305577921374158e-05, + "loss": 0.0001, + "step": 20552 + }, + { + "epoch": 87.85, + "learning_rate": 8.304703427670313e-05, + "loss": 0.0001, + "step": 20556 + }, + { + "epoch": 87.86, + "learning_rate": 8.303828754424816e-05, + "loss": 0.0001, + "step": 20560 + }, + { + "epoch": 87.88, + "learning_rate": 8.302953901685187e-05, + "loss": 0.0, + "step": 20564 + }, + { + "epoch": 87.9, + "learning_rate": 8.302078869498955e-05, + "loss": 0.0015, + "step": 20568 + }, + { + "epoch": 87.91, + "learning_rate": 8.301203657913658e-05, + "loss": 0.0001, + "step": 20572 + }, + { + "epoch": 87.93, + "learning_rate": 8.300328266976848e-05, + "loss": 0.0, + "step": 20576 + }, + { + "epoch": 87.95, + "learning_rate": 8.299452696736086e-05, + "loss": 0.0002, + "step": 20580 + }, + { + "epoch": 87.97, + "learning_rate": 8.298576947238938e-05, + "loss": 0.0001, + "step": 20584 + }, + { + "epoch": 87.98, + "learning_rate": 8.297701018532982e-05, + "loss": 0.0008, + "step": 20588 + }, + { + "epoch": 88.0, + "learning_rate": 8.29682491066581e-05, + "loss": 0.0002, + "step": 20592 + }, + { + "epoch": 88.02, + "learning_rate": 8.295948623685014e-05, + "loss": 0.0004, + "step": 20596 + }, + { + "epoch": 88.03, + "learning_rate": 8.29507215763821e-05, + "loss": 0.0005, + "step": 20600 + }, + { + "epoch": 88.05, + "learning_rate": 8.294195512573011e-05, + "loss": 0.0001, + "step": 20604 + }, + { + "epoch": 88.07, + "learning_rate": 8.293318688537043e-05, + "loss": 0.0001, + "step": 20608 + }, + { + "epoch": 88.09, + "learning_rate": 8.292441685577946e-05, + "loss": 0.0, + "step": 20612 + }, + { + "epoch": 88.1, + "learning_rate": 8.291564503743365e-05, + "loss": 0.0001, + "step": 20616 + }, + { + "epoch": 88.12, + "learning_rate": 8.29068714308096e-05, + "loss": 0.0001, + "step": 20620 + }, + { + "epoch": 88.14, + "learning_rate": 8.289809603638391e-05, + "loss": 0.0, + "step": 20624 + }, + { + "epoch": 88.15, + "learning_rate": 8.28893188546334e-05, + "loss": 0.0001, + "step": 20628 + }, + { + "epoch": 88.17, + "learning_rate": 8.28805398860349e-05, + "loss": 0.0, + "step": 20632 + }, + { + "epoch": 88.19, + "learning_rate": 8.287175913106535e-05, + "loss": 0.0, + "step": 20636 + }, + { + "epoch": 88.21, + "learning_rate": 8.286297659020183e-05, + "loss": 0.0001, + "step": 20640 + }, + { + "epoch": 88.22, + "learning_rate": 8.285419226392148e-05, + "loss": 0.0, + "step": 20644 + }, + { + "epoch": 88.24, + "learning_rate": 8.284540615270152e-05, + "loss": 0.0001, + "step": 20648 + }, + { + "epoch": 88.26, + "learning_rate": 8.283661825701933e-05, + "loss": 0.0001, + "step": 20652 + }, + { + "epoch": 88.27, + "learning_rate": 8.28278285773523e-05, + "loss": 0.0001, + "step": 20656 + }, + { + "epoch": 88.29, + "learning_rate": 8.281903711417802e-05, + "loss": 0.0001, + "step": 20660 + }, + { + "epoch": 88.31, + "learning_rate": 8.28102438679741e-05, + "loss": 0.0003, + "step": 20664 + }, + { + "epoch": 88.32, + "learning_rate": 8.280144883921827e-05, + "loss": 0.0001, + "step": 20668 + }, + { + "epoch": 88.34, + "learning_rate": 8.279265202838833e-05, + "loss": 0.0001, + "step": 20672 + }, + { + "epoch": 88.36, + "learning_rate": 8.278385343596225e-05, + "loss": 0.0001, + "step": 20676 + }, + { + "epoch": 88.38, + "learning_rate": 8.277505306241799e-05, + "loss": 0.0002, + "step": 20680 + }, + { + "epoch": 88.39, + "learning_rate": 8.276625090823374e-05, + "loss": 0.0, + "step": 20684 + }, + { + "epoch": 88.41, + "learning_rate": 8.275744697388766e-05, + "loss": 0.0, + "step": 20688 + }, + { + "epoch": 88.43, + "learning_rate": 8.274864125985808e-05, + "loss": 0.0, + "step": 20692 + }, + { + "epoch": 88.44, + "learning_rate": 8.273983376662339e-05, + "loss": 0.0005, + "step": 20696 + }, + { + "epoch": 88.46, + "learning_rate": 8.273102449466212e-05, + "loss": 0.0, + "step": 20700 + }, + { + "epoch": 88.48, + "learning_rate": 8.272221344445285e-05, + "loss": 0.0, + "step": 20704 + }, + { + "epoch": 88.5, + "learning_rate": 8.27134006164743e-05, + "loss": 0.0, + "step": 20708 + }, + { + "epoch": 88.51, + "learning_rate": 8.270458601120523e-05, + "loss": 0.0001, + "step": 20712 + }, + { + "epoch": 88.53, + "learning_rate": 8.269576962912456e-05, + "loss": 0.0, + "step": 20716 + }, + { + "epoch": 88.55, + "learning_rate": 8.268695147071123e-05, + "loss": 0.0001, + "step": 20720 + }, + { + "epoch": 88.56, + "learning_rate": 8.267813153644439e-05, + "loss": 0.0003, + "step": 20724 + }, + { + "epoch": 88.58, + "learning_rate": 8.266930982680317e-05, + "loss": 0.0017, + "step": 20728 + }, + { + "epoch": 88.6, + "learning_rate": 8.266048634226686e-05, + "loss": 0.0, + "step": 20732 + }, + { + "epoch": 88.62, + "learning_rate": 8.265166108331483e-05, + "loss": 0.0001, + "step": 20736 + }, + { + "epoch": 88.63, + "learning_rate": 8.264283405042657e-05, + "loss": 0.0, + "step": 20740 + }, + { + "epoch": 88.65, + "learning_rate": 8.26340052440816e-05, + "loss": 0.0, + "step": 20744 + }, + { + "epoch": 88.67, + "learning_rate": 8.262517466475963e-05, + "loss": 0.0, + "step": 20748 + }, + { + "epoch": 88.68, + "learning_rate": 8.261634231294037e-05, + "loss": 0.0, + "step": 20752 + }, + { + "epoch": 88.7, + "learning_rate": 8.260750818910372e-05, + "loss": 0.0, + "step": 20756 + }, + { + "epoch": 88.72, + "learning_rate": 8.259867229372961e-05, + "loss": 0.0001, + "step": 20760 + }, + { + "epoch": 88.74, + "learning_rate": 8.258983462729808e-05, + "loss": 0.0003, + "step": 20764 + }, + { + "epoch": 88.75, + "learning_rate": 8.258099519028928e-05, + "loss": 0.0, + "step": 20768 + }, + { + "epoch": 88.77, + "learning_rate": 8.257215398318344e-05, + "loss": 0.0002, + "step": 20772 + }, + { + "epoch": 88.79, + "learning_rate": 8.25633110064609e-05, + "loss": 0.0, + "step": 20776 + }, + { + "epoch": 88.8, + "learning_rate": 8.25544662606021e-05, + "loss": 0.0001, + "step": 20780 + }, + { + "epoch": 88.82, + "learning_rate": 8.254561974608757e-05, + "loss": 0.0002, + "step": 20784 + }, + { + "epoch": 88.84, + "learning_rate": 8.253677146339791e-05, + "loss": 0.0001, + "step": 20788 + }, + { + "epoch": 88.85, + "learning_rate": 8.252792141301386e-05, + "loss": 0.0012, + "step": 20792 + }, + { + "epoch": 88.87, + "learning_rate": 8.251906959541625e-05, + "loss": 0.0, + "step": 20796 + }, + { + "epoch": 88.89, + "learning_rate": 8.251021601108593e-05, + "loss": 0.0, + "step": 20800 + }, + { + "epoch": 88.91, + "learning_rate": 8.250136066050397e-05, + "loss": 0.0, + "step": 20804 + }, + { + "epoch": 88.92, + "learning_rate": 8.249250354415145e-05, + "loss": 0.001, + "step": 20808 + }, + { + "epoch": 88.94, + "learning_rate": 8.248364466250959e-05, + "loss": 0.0001, + "step": 20812 + }, + { + "epoch": 88.96, + "learning_rate": 8.247478401605963e-05, + "loss": 0.0001, + "step": 20816 + }, + { + "epoch": 88.97, + "learning_rate": 8.246592160528302e-05, + "loss": 0.0001, + "step": 20820 + }, + { + "epoch": 88.99, + "learning_rate": 8.245705743066123e-05, + "loss": 0.0002, + "step": 20824 + }, + { + "epoch": 89.01, + "learning_rate": 8.244819149267584e-05, + "loss": 0.0013, + "step": 20828 + }, + { + "epoch": 89.03, + "learning_rate": 8.24393237918085e-05, + "loss": 0.0003, + "step": 20832 + }, + { + "epoch": 89.04, + "learning_rate": 8.243045432854102e-05, + "loss": 0.0001, + "step": 20836 + }, + { + "epoch": 89.06, + "learning_rate": 8.242158310335528e-05, + "loss": 0.0, + "step": 20840 + }, + { + "epoch": 89.08, + "learning_rate": 8.241271011673322e-05, + "loss": 0.0001, + "step": 20844 + }, + { + "epoch": 89.09, + "learning_rate": 8.24038353691569e-05, + "loss": 0.0001, + "step": 20848 + }, + { + "epoch": 89.11, + "learning_rate": 8.239495886110848e-05, + "loss": 0.0002, + "step": 20852 + }, + { + "epoch": 89.13, + "learning_rate": 8.238608059307023e-05, + "loss": 0.0001, + "step": 20856 + }, + { + "epoch": 89.15, + "learning_rate": 8.237720056552449e-05, + "loss": 0.0001, + "step": 20860 + }, + { + "epoch": 89.16, + "learning_rate": 8.23683187789537e-05, + "loss": 0.0, + "step": 20864 + }, + { + "epoch": 89.18, + "learning_rate": 8.23594352338404e-05, + "loss": 0.0, + "step": 20868 + }, + { + "epoch": 89.2, + "learning_rate": 8.235054993066722e-05, + "loss": 0.0, + "step": 20872 + }, + { + "epoch": 89.21, + "learning_rate": 8.23416628699169e-05, + "loss": 0.0, + "step": 20876 + }, + { + "epoch": 89.23, + "learning_rate": 8.233277405207227e-05, + "loss": 0.0001, + "step": 20880 + }, + { + "epoch": 89.25, + "learning_rate": 8.232388347761624e-05, + "loss": 0.0021, + "step": 20884 + }, + { + "epoch": 89.26, + "learning_rate": 8.231499114703184e-05, + "loss": 0.0004, + "step": 20888 + }, + { + "epoch": 89.28, + "learning_rate": 8.230609706080217e-05, + "loss": 0.0004, + "step": 20892 + }, + { + "epoch": 89.3, + "learning_rate": 8.229720121941043e-05, + "loss": 0.0001, + "step": 20896 + }, + { + "epoch": 89.32, + "learning_rate": 8.228830362333995e-05, + "loss": 0.0, + "step": 20900 + }, + { + "epoch": 89.33, + "learning_rate": 8.227940427307413e-05, + "loss": 0.0001, + "step": 20904 + }, + { + "epoch": 89.35, + "learning_rate": 8.227050316909642e-05, + "loss": 0.0001, + "step": 20908 + }, + { + "epoch": 89.37, + "learning_rate": 8.226160031189045e-05, + "loss": 0.0, + "step": 20912 + }, + { + "epoch": 89.38, + "learning_rate": 8.225269570193989e-05, + "loss": 0.0001, + "step": 20916 + }, + { + "epoch": 89.4, + "learning_rate": 8.224378933972851e-05, + "loss": 0.0, + "step": 20920 + }, + { + "epoch": 89.42, + "learning_rate": 8.223488122574022e-05, + "loss": 0.0, + "step": 20924 + }, + { + "epoch": 89.44, + "learning_rate": 8.222597136045895e-05, + "loss": 0.0, + "step": 20928 + }, + { + "epoch": 89.45, + "learning_rate": 8.221705974436881e-05, + "loss": 0.0001, + "step": 20932 + }, + { + "epoch": 89.47, + "learning_rate": 8.22081463779539e-05, + "loss": 0.0, + "step": 20936 + }, + { + "epoch": 89.49, + "learning_rate": 8.219923126169853e-05, + "loss": 0.0002, + "step": 20940 + }, + { + "epoch": 89.5, + "learning_rate": 8.219031439608702e-05, + "loss": 0.0003, + "step": 20944 + }, + { + "epoch": 89.52, + "learning_rate": 8.218139578160382e-05, + "loss": 0.0009, + "step": 20948 + }, + { + "epoch": 89.54, + "learning_rate": 8.217247541873347e-05, + "loss": 0.0001, + "step": 20952 + }, + { + "epoch": 89.56, + "learning_rate": 8.216355330796061e-05, + "loss": 0.0006, + "step": 20956 + }, + { + "epoch": 89.57, + "learning_rate": 8.215462944976999e-05, + "loss": 0.0, + "step": 20960 + }, + { + "epoch": 89.59, + "learning_rate": 8.21457038446464e-05, + "loss": 0.0002, + "step": 20964 + }, + { + "epoch": 89.61, + "learning_rate": 8.213677649307478e-05, + "loss": 0.0002, + "step": 20968 + }, + { + "epoch": 89.62, + "learning_rate": 8.212784739554015e-05, + "loss": 0.0, + "step": 20972 + }, + { + "epoch": 89.64, + "learning_rate": 8.21189165525276e-05, + "loss": 0.0006, + "step": 20976 + }, + { + "epoch": 89.66, + "learning_rate": 8.210998396452235e-05, + "loss": 0.0, + "step": 20980 + }, + { + "epoch": 89.68, + "learning_rate": 8.21010496320097e-05, + "loss": 0.0, + "step": 20984 + }, + { + "epoch": 89.69, + "learning_rate": 8.209211355547504e-05, + "loss": 0.0, + "step": 20988 + }, + { + "epoch": 89.71, + "learning_rate": 8.208317573540386e-05, + "loss": 0.0, + "step": 20992 + }, + { + "epoch": 89.73, + "learning_rate": 8.207423617228174e-05, + "loss": 0.0, + "step": 20996 + }, + { + "epoch": 89.74, + "learning_rate": 8.206529486659435e-05, + "loss": 0.0001, + "step": 21000 + }, + { + "epoch": 89.74, + "eval_exact_match": 0.5083160083160083, + "eval_loss": 0.9255384206771851, + "eval_runtime": 135.5229, + "eval_samples_per_second": 7.098, + "step": 21000 + }, + { + "epoch": 89.76, + "learning_rate": 8.20563518188275e-05, + "loss": 0.0, + "step": 21004 + }, + { + "epoch": 89.78, + "learning_rate": 8.204740702946702e-05, + "loss": 0.0001, + "step": 21008 + }, + { + "epoch": 89.79, + "learning_rate": 8.203846049899891e-05, + "loss": 0.0005, + "step": 21012 + }, + { + "epoch": 89.81, + "learning_rate": 8.202951222790916e-05, + "loss": 0.0001, + "step": 21016 + }, + { + "epoch": 89.83, + "learning_rate": 8.2020562216684e-05, + "loss": 0.0, + "step": 21020 + }, + { + "epoch": 89.85, + "learning_rate": 8.201161046580963e-05, + "loss": 0.0, + "step": 21024 + }, + { + "epoch": 89.86, + "learning_rate": 8.200265697577241e-05, + "loss": 0.0004, + "step": 21028 + }, + { + "epoch": 89.88, + "learning_rate": 8.199370174705876e-05, + "loss": 0.001, + "step": 21032 + }, + { + "epoch": 89.9, + "learning_rate": 8.198474478015521e-05, + "loss": 0.0001, + "step": 21036 + }, + { + "epoch": 89.91, + "learning_rate": 8.197578607554842e-05, + "loss": 0.0002, + "step": 21040 + }, + { + "epoch": 89.93, + "learning_rate": 8.196682563372505e-05, + "loss": 0.0002, + "step": 21044 + }, + { + "epoch": 89.95, + "learning_rate": 8.195786345517196e-05, + "loss": 0.0011, + "step": 21048 + }, + { + "epoch": 89.97, + "learning_rate": 8.194889954037603e-05, + "loss": 0.0001, + "step": 21052 + }, + { + "epoch": 89.98, + "learning_rate": 8.193993388982428e-05, + "loss": 0.0001, + "step": 21056 + }, + { + "epoch": 90.0, + "learning_rate": 8.19309665040038e-05, + "loss": 0.0, + "step": 21060 + }, + { + "epoch": 90.02, + "learning_rate": 8.192199738340177e-05, + "loss": 0.0001, + "step": 21064 + }, + { + "epoch": 90.03, + "learning_rate": 8.19130265285055e-05, + "loss": 0.0001, + "step": 21068 + }, + { + "epoch": 90.05, + "learning_rate": 8.190405393980234e-05, + "loss": 0.0001, + "step": 21072 + }, + { + "epoch": 90.07, + "learning_rate": 8.189507961777976e-05, + "loss": 0.0011, + "step": 21076 + }, + { + "epoch": 90.09, + "learning_rate": 8.188610356292536e-05, + "loss": 0.0, + "step": 21080 + }, + { + "epoch": 90.1, + "learning_rate": 8.187712577572679e-05, + "loss": 0.0001, + "step": 21084 + }, + { + "epoch": 90.12, + "learning_rate": 8.186814625667178e-05, + "loss": 0.0001, + "step": 21088 + }, + { + "epoch": 90.14, + "learning_rate": 8.185916500624821e-05, + "loss": 0.0001, + "step": 21092 + }, + { + "epoch": 90.15, + "learning_rate": 8.185018202494401e-05, + "loss": 0.0005, + "step": 21096 + }, + { + "epoch": 90.17, + "learning_rate": 8.184119731324722e-05, + "loss": 0.0, + "step": 21100 + }, + { + "epoch": 90.19, + "learning_rate": 8.183221087164596e-05, + "loss": 0.0001, + "step": 21104 + }, + { + "epoch": 90.21, + "learning_rate": 8.182322270062848e-05, + "loss": 0.0003, + "step": 21108 + }, + { + "epoch": 90.22, + "learning_rate": 8.181423280068308e-05, + "loss": 0.0, + "step": 21112 + }, + { + "epoch": 90.24, + "learning_rate": 8.180524117229818e-05, + "loss": 0.0, + "step": 21116 + }, + { + "epoch": 90.26, + "learning_rate": 8.179624781596226e-05, + "loss": 0.0001, + "step": 21120 + }, + { + "epoch": 90.27, + "learning_rate": 8.1787252732164e-05, + "loss": 0.0002, + "step": 21124 + }, + { + "epoch": 90.29, + "learning_rate": 8.1778255921392e-05, + "loss": 0.0001, + "step": 21128 + }, + { + "epoch": 90.31, + "learning_rate": 8.17692573841351e-05, + "loss": 0.0, + "step": 21132 + }, + { + "epoch": 90.32, + "learning_rate": 8.176025712088218e-05, + "loss": 0.0001, + "step": 21136 + }, + { + "epoch": 90.34, + "learning_rate": 8.175125513212221e-05, + "loss": 0.0001, + "step": 21140 + }, + { + "epoch": 90.36, + "learning_rate": 8.174225141834426e-05, + "loss": 0.0002, + "step": 21144 + }, + { + "epoch": 90.38, + "learning_rate": 8.17332459800375e-05, + "loss": 0.0001, + "step": 21148 + }, + { + "epoch": 90.39, + "learning_rate": 8.172423881769117e-05, + "loss": 0.0006, + "step": 21152 + }, + { + "epoch": 90.41, + "learning_rate": 8.171522993179463e-05, + "loss": 0.0001, + "step": 21156 + }, + { + "epoch": 90.43, + "learning_rate": 8.170621932283735e-05, + "loss": 0.0003, + "step": 21160 + }, + { + "epoch": 90.44, + "learning_rate": 8.169720699130883e-05, + "loss": 0.0, + "step": 21164 + }, + { + "epoch": 90.46, + "learning_rate": 8.168819293769874e-05, + "loss": 0.0001, + "step": 21168 + }, + { + "epoch": 90.48, + "learning_rate": 8.167917716249677e-05, + "loss": 0.0001, + "step": 21172 + }, + { + "epoch": 90.5, + "learning_rate": 8.167015966619274e-05, + "loss": 0.0, + "step": 21176 + }, + { + "epoch": 90.51, + "learning_rate": 8.16611404492766e-05, + "loss": 0.0, + "step": 21180 + }, + { + "epoch": 90.53, + "learning_rate": 8.165211951223832e-05, + "loss": 0.0001, + "step": 21184 + }, + { + "epoch": 90.55, + "learning_rate": 8.164309685556802e-05, + "loss": 0.0, + "step": 21188 + }, + { + "epoch": 90.56, + "learning_rate": 8.163407247975588e-05, + "loss": 0.0001, + "step": 21192 + }, + { + "epoch": 90.58, + "learning_rate": 8.162504638529218e-05, + "loss": 0.0002, + "step": 21196 + }, + { + "epoch": 90.6, + "learning_rate": 8.161601857266732e-05, + "loss": 0.0, + "step": 21200 + }, + { + "epoch": 90.62, + "learning_rate": 8.160698904237176e-05, + "loss": 0.0003, + "step": 21204 + }, + { + "epoch": 90.63, + "learning_rate": 8.15979577948961e-05, + "loss": 0.0001, + "step": 21208 + }, + { + "epoch": 90.65, + "learning_rate": 8.158892483073094e-05, + "loss": 0.0001, + "step": 21212 + }, + { + "epoch": 90.67, + "learning_rate": 8.157989015036707e-05, + "loss": 0.0002, + "step": 21216 + }, + { + "epoch": 90.68, + "learning_rate": 8.157085375429533e-05, + "loss": 0.0, + "step": 21220 + }, + { + "epoch": 90.7, + "learning_rate": 8.156181564300667e-05, + "loss": 0.0002, + "step": 21224 + }, + { + "epoch": 90.72, + "learning_rate": 8.155277581699212e-05, + "loss": 0.0001, + "step": 21228 + }, + { + "epoch": 90.74, + "learning_rate": 8.154373427674278e-05, + "loss": 0.0, + "step": 21232 + }, + { + "epoch": 90.75, + "learning_rate": 8.153469102274988e-05, + "loss": 0.0002, + "step": 21236 + }, + { + "epoch": 90.77, + "learning_rate": 8.152564605550476e-05, + "loss": 0.0, + "step": 21240 + }, + { + "epoch": 90.79, + "learning_rate": 8.15165993754988e-05, + "loss": 0.0003, + "step": 21244 + }, + { + "epoch": 90.8, + "learning_rate": 8.150755098322351e-05, + "loss": 0.0002, + "step": 21248 + }, + { + "epoch": 90.82, + "learning_rate": 8.149850087917045e-05, + "loss": 0.0002, + "step": 21252 + }, + { + "epoch": 90.84, + "learning_rate": 8.148944906383136e-05, + "loss": 0.0002, + "step": 21256 + }, + { + "epoch": 90.85, + "learning_rate": 8.148039553769796e-05, + "loss": 0.0001, + "step": 21260 + }, + { + "epoch": 90.87, + "learning_rate": 8.147134030126217e-05, + "loss": 0.0001, + "step": 21264 + }, + { + "epoch": 90.89, + "learning_rate": 8.146228335501591e-05, + "loss": 0.0001, + "step": 21268 + }, + { + "epoch": 90.91, + "learning_rate": 8.145322469945126e-05, + "loss": 0.0001, + "step": 21272 + }, + { + "epoch": 90.92, + "learning_rate": 8.144416433506036e-05, + "loss": 0.0002, + "step": 21276 + }, + { + "epoch": 90.94, + "learning_rate": 8.143510226233546e-05, + "loss": 0.0001, + "step": 21280 + }, + { + "epoch": 90.96, + "learning_rate": 8.14260384817689e-05, + "loss": 0.0002, + "step": 21284 + }, + { + "epoch": 90.97, + "learning_rate": 8.141697299385308e-05, + "loss": 0.0001, + "step": 21288 + }, + { + "epoch": 90.99, + "learning_rate": 8.140790579908056e-05, + "loss": 0.0, + "step": 21292 + }, + { + "epoch": 91.01, + "learning_rate": 8.13988368979439e-05, + "loss": 0.0001, + "step": 21296 + }, + { + "epoch": 91.03, + "learning_rate": 8.138976629093586e-05, + "loss": 0.0, + "step": 21300 + }, + { + "epoch": 91.04, + "learning_rate": 8.13806939785492e-05, + "loss": 0.0021, + "step": 21304 + }, + { + "epoch": 91.06, + "learning_rate": 8.137161996127682e-05, + "loss": 0.0011, + "step": 21308 + }, + { + "epoch": 91.08, + "learning_rate": 8.136254423961172e-05, + "loss": 0.0, + "step": 21312 + }, + { + "epoch": 91.09, + "learning_rate": 8.135346681404696e-05, + "loss": 0.0, + "step": 21316 + }, + { + "epoch": 91.11, + "learning_rate": 8.134438768507572e-05, + "loss": 0.0, + "step": 21320 + }, + { + "epoch": 91.13, + "learning_rate": 8.133530685319125e-05, + "loss": 0.0003, + "step": 21324 + }, + { + "epoch": 91.15, + "learning_rate": 8.13262243188869e-05, + "loss": 0.0001, + "step": 21328 + }, + { + "epoch": 91.16, + "learning_rate": 8.131714008265614e-05, + "loss": 0.0001, + "step": 21332 + }, + { + "epoch": 91.18, + "learning_rate": 8.130805414499247e-05, + "loss": 0.0, + "step": 21336 + }, + { + "epoch": 91.2, + "learning_rate": 8.129896650638956e-05, + "loss": 0.0001, + "step": 21340 + }, + { + "epoch": 91.21, + "learning_rate": 8.128987716734111e-05, + "loss": 0.0, + "step": 21344 + }, + { + "epoch": 91.23, + "learning_rate": 8.128078612834095e-05, + "loss": 0.0001, + "step": 21348 + }, + { + "epoch": 91.25, + "learning_rate": 8.127169338988299e-05, + "loss": 0.0, + "step": 21352 + }, + { + "epoch": 91.26, + "learning_rate": 8.12625989524612e-05, + "loss": 0.0, + "step": 21356 + }, + { + "epoch": 91.28, + "learning_rate": 8.125350281656969e-05, + "loss": 0.0001, + "step": 21360 + }, + { + "epoch": 91.3, + "learning_rate": 8.124440498270267e-05, + "loss": 0.0, + "step": 21364 + }, + { + "epoch": 91.32, + "learning_rate": 8.123530545135439e-05, + "loss": 0.0001, + "step": 21368 + }, + { + "epoch": 91.33, + "learning_rate": 8.122620422301923e-05, + "loss": 0.0003, + "step": 21372 + }, + { + "epoch": 91.35, + "learning_rate": 8.121710129819163e-05, + "loss": 0.0001, + "step": 21376 + }, + { + "epoch": 91.37, + "learning_rate": 8.120799667736618e-05, + "loss": 0.0006, + "step": 21380 + }, + { + "epoch": 91.38, + "learning_rate": 8.11988903610375e-05, + "loss": 0.0001, + "step": 21384 + }, + { + "epoch": 91.4, + "learning_rate": 8.118978234970035e-05, + "loss": 0.0, + "step": 21388 + }, + { + "epoch": 91.42, + "learning_rate": 8.118067264384955e-05, + "loss": 0.0003, + "step": 21392 + }, + { + "epoch": 91.44, + "learning_rate": 8.117156124397999e-05, + "loss": 0.0001, + "step": 21396 + }, + { + "epoch": 91.45, + "learning_rate": 8.116244815058673e-05, + "loss": 0.0, + "step": 21400 + }, + { + "epoch": 91.47, + "learning_rate": 8.115333336416486e-05, + "loss": 0.0001, + "step": 21404 + }, + { + "epoch": 91.49, + "learning_rate": 8.114421688520957e-05, + "loss": 0.0001, + "step": 21408 + }, + { + "epoch": 91.5, + "learning_rate": 8.113509871421618e-05, + "loss": 0.0001, + "step": 21412 + }, + { + "epoch": 91.52, + "learning_rate": 8.112597885168003e-05, + "loss": 0.0, + "step": 21416 + }, + { + "epoch": 91.54, + "learning_rate": 8.111685729809662e-05, + "loss": 0.0002, + "step": 21420 + }, + { + "epoch": 91.56, + "learning_rate": 8.110773405396153e-05, + "loss": 0.0001, + "step": 21424 + }, + { + "epoch": 91.57, + "learning_rate": 8.109860911977039e-05, + "loss": 0.0002, + "step": 21428 + }, + { + "epoch": 91.59, + "learning_rate": 8.108948249601894e-05, + "loss": 0.0002, + "step": 21432 + }, + { + "epoch": 91.61, + "learning_rate": 8.108035418320306e-05, + "loss": 0.0001, + "step": 21436 + }, + { + "epoch": 91.62, + "learning_rate": 8.107122418181865e-05, + "loss": 0.0028, + "step": 21440 + }, + { + "epoch": 91.64, + "learning_rate": 8.106209249236177e-05, + "loss": 0.0, + "step": 21444 + }, + { + "epoch": 91.66, + "learning_rate": 8.105295911532848e-05, + "loss": 0.0004, + "step": 21448 + }, + { + "epoch": 91.68, + "learning_rate": 8.104382405121506e-05, + "loss": 0.0002, + "step": 21452 + }, + { + "epoch": 91.69, + "learning_rate": 8.103468730051774e-05, + "loss": 0.0, + "step": 21456 + }, + { + "epoch": 91.71, + "learning_rate": 8.102554886373296e-05, + "loss": 0.0, + "step": 21460 + }, + { + "epoch": 91.73, + "learning_rate": 8.10164087413572e-05, + "loss": 0.0, + "step": 21464 + }, + { + "epoch": 91.74, + "learning_rate": 8.100726693388703e-05, + "loss": 0.0001, + "step": 21468 + }, + { + "epoch": 91.76, + "learning_rate": 8.099812344181909e-05, + "loss": 0.0001, + "step": 21472 + }, + { + "epoch": 91.78, + "learning_rate": 8.098897826565016e-05, + "loss": 0.0001, + "step": 21476 + }, + { + "epoch": 91.79, + "learning_rate": 8.09798314058771e-05, + "loss": 0.0001, + "step": 21480 + }, + { + "epoch": 91.81, + "learning_rate": 8.097068286299683e-05, + "loss": 0.0021, + "step": 21484 + }, + { + "epoch": 91.83, + "learning_rate": 8.096153263750639e-05, + "loss": 0.0003, + "step": 21488 + }, + { + "epoch": 91.85, + "learning_rate": 8.095238072990289e-05, + "loss": 0.0001, + "step": 21492 + }, + { + "epoch": 91.86, + "learning_rate": 8.094322714068358e-05, + "loss": 0.0, + "step": 21496 + }, + { + "epoch": 91.88, + "learning_rate": 8.093407187034576e-05, + "loss": 0.0018, + "step": 21500 + }, + { + "epoch": 91.9, + "learning_rate": 8.092491491938678e-05, + "loss": 0.0007, + "step": 21504 + }, + { + "epoch": 91.91, + "learning_rate": 8.091575628830419e-05, + "loss": 0.0001, + "step": 21508 + }, + { + "epoch": 91.93, + "learning_rate": 8.090659597759554e-05, + "loss": 0.0001, + "step": 21512 + }, + { + "epoch": 91.95, + "learning_rate": 8.089743398775849e-05, + "loss": 0.0, + "step": 21516 + }, + { + "epoch": 91.97, + "learning_rate": 8.088827031929081e-05, + "loss": 0.0, + "step": 21520 + }, + { + "epoch": 91.98, + "learning_rate": 8.08791049726904e-05, + "loss": 0.0001, + "step": 21524 + }, + { + "epoch": 92.0, + "learning_rate": 8.086993794845514e-05, + "loss": 0.0001, + "step": 21528 + }, + { + "epoch": 92.02, + "learning_rate": 8.08607692470831e-05, + "loss": 0.0001, + "step": 21532 + }, + { + "epoch": 92.03, + "learning_rate": 8.085159886907239e-05, + "loss": 0.0, + "step": 21536 + }, + { + "epoch": 92.05, + "learning_rate": 8.084242681492125e-05, + "loss": 0.0001, + "step": 21540 + }, + { + "epoch": 92.07, + "learning_rate": 8.083325308512799e-05, + "loss": 0.0, + "step": 21544 + }, + { + "epoch": 92.09, + "learning_rate": 8.082407768019099e-05, + "loss": 0.0, + "step": 21548 + }, + { + "epoch": 92.1, + "learning_rate": 8.081490060060875e-05, + "loss": 0.0005, + "step": 21552 + }, + { + "epoch": 92.12, + "learning_rate": 8.080572184687987e-05, + "loss": 0.0, + "step": 21556 + }, + { + "epoch": 92.14, + "learning_rate": 8.0796541419503e-05, + "loss": 0.0001, + "step": 21560 + }, + { + "epoch": 92.15, + "learning_rate": 8.078735931897691e-05, + "loss": 0.0004, + "step": 21564 + }, + { + "epoch": 92.17, + "learning_rate": 8.077817554580045e-05, + "loss": 0.0003, + "step": 21568 + }, + { + "epoch": 92.19, + "learning_rate": 8.076899010047259e-05, + "loss": 0.0, + "step": 21572 + }, + { + "epoch": 92.21, + "learning_rate": 8.075980298349235e-05, + "loss": 0.0002, + "step": 21576 + }, + { + "epoch": 92.22, + "learning_rate": 8.075061419535885e-05, + "loss": 0.0001, + "step": 21580 + }, + { + "epoch": 92.24, + "learning_rate": 8.074142373657135e-05, + "loss": 0.0, + "step": 21584 + }, + { + "epoch": 92.26, + "learning_rate": 8.07322316076291e-05, + "loss": 0.0002, + "step": 21588 + }, + { + "epoch": 92.27, + "learning_rate": 8.072303780903153e-05, + "loss": 0.0004, + "step": 21592 + }, + { + "epoch": 92.29, + "learning_rate": 8.071384234127813e-05, + "loss": 0.0, + "step": 21596 + }, + { + "epoch": 92.31, + "learning_rate": 8.070464520486849e-05, + "loss": 0.0018, + "step": 21600 + }, + { + "epoch": 92.32, + "learning_rate": 8.069544640030227e-05, + "loss": 0.0, + "step": 21604 + }, + { + "epoch": 92.34, + "learning_rate": 8.068624592807924e-05, + "loss": 0.0001, + "step": 21608 + }, + { + "epoch": 92.36, + "learning_rate": 8.067704378869927e-05, + "loss": 0.0, + "step": 21612 + }, + { + "epoch": 92.38, + "learning_rate": 8.066783998266225e-05, + "loss": 0.0, + "step": 21616 + }, + { + "epoch": 92.39, + "learning_rate": 8.065863451046828e-05, + "loss": 0.0003, + "step": 21620 + }, + { + "epoch": 92.41, + "learning_rate": 8.064942737261745e-05, + "loss": 0.0004, + "step": 21624 + }, + { + "epoch": 92.43, + "learning_rate": 8.064021856960998e-05, + "loss": 0.0001, + "step": 21628 + }, + { + "epoch": 92.44, + "learning_rate": 8.063100810194616e-05, + "loss": 0.0, + "step": 21632 + }, + { + "epoch": 92.46, + "learning_rate": 8.062179597012641e-05, + "loss": 0.0001, + "step": 21636 + }, + { + "epoch": 92.48, + "learning_rate": 8.061258217465121e-05, + "loss": 0.0003, + "step": 21640 + }, + { + "epoch": 92.5, + "learning_rate": 8.060336671602116e-05, + "loss": 0.0001, + "step": 21644 + }, + { + "epoch": 92.51, + "learning_rate": 8.059414959473689e-05, + "loss": 0.0001, + "step": 21648 + }, + { + "epoch": 92.53, + "learning_rate": 8.058493081129917e-05, + "loss": 0.0012, + "step": 21652 + }, + { + "epoch": 92.55, + "learning_rate": 8.057571036620886e-05, + "loss": 0.0, + "step": 21656 + }, + { + "epoch": 92.56, + "learning_rate": 8.056648825996688e-05, + "loss": 0.0001, + "step": 21660 + }, + { + "epoch": 92.58, + "learning_rate": 8.05572644930743e-05, + "loss": 0.0, + "step": 21664 + }, + { + "epoch": 92.6, + "learning_rate": 8.054803906603219e-05, + "loss": 0.0, + "step": 21668 + }, + { + "epoch": 92.62, + "learning_rate": 8.053881197934178e-05, + "loss": 0.0, + "step": 21672 + }, + { + "epoch": 92.63, + "learning_rate": 8.052958323350437e-05, + "loss": 0.0, + "step": 21676 + }, + { + "epoch": 92.65, + "learning_rate": 8.052035282902135e-05, + "loss": 0.0001, + "step": 21680 + }, + { + "epoch": 92.67, + "learning_rate": 8.05111207663942e-05, + "loss": 0.0, + "step": 21684 + }, + { + "epoch": 92.68, + "learning_rate": 8.050188704612448e-05, + "loss": 0.0002, + "step": 21688 + }, + { + "epoch": 92.7, + "learning_rate": 8.049265166871387e-05, + "loss": 0.0, + "step": 21692 + }, + { + "epoch": 92.72, + "learning_rate": 8.04834146346641e-05, + "loss": 0.0001, + "step": 21696 + }, + { + "epoch": 92.74, + "learning_rate": 8.047417594447702e-05, + "loss": 0.0001, + "step": 21700 + }, + { + "epoch": 92.75, + "learning_rate": 8.046493559865456e-05, + "loss": 0.0, + "step": 21704 + }, + { + "epoch": 92.77, + "learning_rate": 8.045569359769874e-05, + "loss": 0.0001, + "step": 21708 + }, + { + "epoch": 92.79, + "learning_rate": 8.044644994211165e-05, + "loss": 0.0006, + "step": 21712 + }, + { + "epoch": 92.8, + "learning_rate": 8.043720463239553e-05, + "loss": 0.0001, + "step": 21716 + }, + { + "epoch": 92.82, + "learning_rate": 8.042795766905264e-05, + "loss": 0.0004, + "step": 21720 + }, + { + "epoch": 92.84, + "learning_rate": 8.041870905258538e-05, + "loss": 0.0001, + "step": 21724 + }, + { + "epoch": 92.85, + "learning_rate": 8.040945878349617e-05, + "loss": 0.0001, + "step": 21728 + }, + { + "epoch": 92.87, + "learning_rate": 8.040020686228764e-05, + "loss": 0.0001, + "step": 21732 + }, + { + "epoch": 92.89, + "learning_rate": 8.039095328946239e-05, + "loss": 0.0018, + "step": 21736 + }, + { + "epoch": 92.91, + "learning_rate": 8.038169806552318e-05, + "loss": 0.0, + "step": 21740 + }, + { + "epoch": 92.92, + "learning_rate": 8.037244119097283e-05, + "loss": 0.0, + "step": 21744 + }, + { + "epoch": 92.94, + "learning_rate": 8.036318266631425e-05, + "loss": 0.0, + "step": 21748 + }, + { + "epoch": 92.96, + "learning_rate": 8.035392249205046e-05, + "loss": 0.0004, + "step": 21752 + }, + { + "epoch": 92.97, + "learning_rate": 8.034466066868454e-05, + "loss": 0.0026, + "step": 21756 + }, + { + "epoch": 92.99, + "learning_rate": 8.033539719671972e-05, + "loss": 0.0001, + "step": 21760 + }, + { + "epoch": 93.01, + "learning_rate": 8.032613207665922e-05, + "loss": 0.0001, + "step": 21764 + }, + { + "epoch": 93.03, + "learning_rate": 8.031686530900645e-05, + "loss": 0.0001, + "step": 21768 + }, + { + "epoch": 93.04, + "learning_rate": 8.030759689426484e-05, + "loss": 0.0, + "step": 21772 + }, + { + "epoch": 93.06, + "learning_rate": 8.029832683293795e-05, + "loss": 0.0001, + "step": 21776 + }, + { + "epoch": 93.08, + "learning_rate": 8.028905512552939e-05, + "loss": 0.0001, + "step": 21780 + }, + { + "epoch": 93.09, + "learning_rate": 8.027978177254292e-05, + "loss": 0.0001, + "step": 21784 + }, + { + "epoch": 93.11, + "learning_rate": 8.027050677448231e-05, + "loss": 0.0, + "step": 21788 + }, + { + "epoch": 93.13, + "learning_rate": 8.026123013185152e-05, + "loss": 0.0009, + "step": 21792 + }, + { + "epoch": 93.15, + "learning_rate": 8.025195184515448e-05, + "loss": 0.0008, + "step": 21796 + }, + { + "epoch": 93.16, + "learning_rate": 8.02426719148953e-05, + "loss": 0.0003, + "step": 21800 + }, + { + "epoch": 93.18, + "learning_rate": 8.023339034157817e-05, + "loss": 0.0, + "step": 21804 + }, + { + "epoch": 93.2, + "learning_rate": 8.022410712570732e-05, + "loss": 0.0016, + "step": 21808 + }, + { + "epoch": 93.21, + "learning_rate": 8.021482226778712e-05, + "loss": 0.0001, + "step": 21812 + }, + { + "epoch": 93.23, + "learning_rate": 8.020553576832197e-05, + "loss": 0.0001, + "step": 21816 + }, + { + "epoch": 93.25, + "learning_rate": 8.019624762781643e-05, + "loss": 0.0, + "step": 21820 + }, + { + "epoch": 93.26, + "learning_rate": 8.018695784677513e-05, + "loss": 0.002, + "step": 21824 + }, + { + "epoch": 93.28, + "learning_rate": 8.017766642570276e-05, + "loss": 0.0, + "step": 21828 + }, + { + "epoch": 93.3, + "learning_rate": 8.01683733651041e-05, + "loss": 0.0002, + "step": 21832 + }, + { + "epoch": 93.32, + "learning_rate": 8.015907866548404e-05, + "loss": 0.0001, + "step": 21836 + }, + { + "epoch": 93.33, + "learning_rate": 8.014978232734757e-05, + "loss": 0.0, + "step": 21840 + }, + { + "epoch": 93.35, + "learning_rate": 8.014048435119973e-05, + "loss": 0.0002, + "step": 21844 + }, + { + "epoch": 93.37, + "learning_rate": 8.013118473754568e-05, + "loss": 0.0, + "step": 21848 + }, + { + "epoch": 93.38, + "learning_rate": 8.012188348689068e-05, + "loss": 0.0, + "step": 21852 + }, + { + "epoch": 93.4, + "learning_rate": 8.011258059974002e-05, + "loss": 0.0003, + "step": 21856 + }, + { + "epoch": 93.42, + "learning_rate": 8.010327607659914e-05, + "loss": 0.0, + "step": 21860 + }, + { + "epoch": 93.44, + "learning_rate": 8.009396991797355e-05, + "loss": 0.0, + "step": 21864 + }, + { + "epoch": 93.45, + "learning_rate": 8.008466212436884e-05, + "loss": 0.0001, + "step": 21868 + }, + { + "epoch": 93.47, + "learning_rate": 8.007535269629069e-05, + "loss": 0.0002, + "step": 21872 + }, + { + "epoch": 93.49, + "learning_rate": 8.00660416342449e-05, + "loss": 0.0001, + "step": 21876 + }, + { + "epoch": 93.5, + "learning_rate": 8.005672893873729e-05, + "loss": 0.0, + "step": 21880 + }, + { + "epoch": 93.52, + "learning_rate": 8.004741461027381e-05, + "loss": 0.0001, + "step": 21884 + }, + { + "epoch": 93.54, + "learning_rate": 8.003809864936055e-05, + "loss": 0.0001, + "step": 21888 + }, + { + "epoch": 93.56, + "learning_rate": 8.00287810565036e-05, + "loss": 0.0, + "step": 21892 + }, + { + "epoch": 93.57, + "learning_rate": 8.001946183220916e-05, + "loss": 0.0, + "step": 21896 + }, + { + "epoch": 93.59, + "learning_rate": 8.001014097698359e-05, + "loss": 0.0003, + "step": 21900 + }, + { + "epoch": 93.61, + "learning_rate": 8.000081849133324e-05, + "loss": 0.0001, + "step": 21904 + }, + { + "epoch": 93.62, + "learning_rate": 7.999149437576461e-05, + "loss": 0.0, + "step": 21908 + }, + { + "epoch": 93.64, + "learning_rate": 7.998216863078427e-05, + "loss": 0.0, + "step": 21912 + }, + { + "epoch": 93.66, + "learning_rate": 7.997284125689886e-05, + "loss": 0.0, + "step": 21916 + }, + { + "epoch": 93.68, + "learning_rate": 7.996351225461516e-05, + "loss": 0.0001, + "step": 21920 + }, + { + "epoch": 93.69, + "learning_rate": 7.995418162443999e-05, + "loss": 0.0002, + "step": 21924 + }, + { + "epoch": 93.71, + "learning_rate": 7.994484936688027e-05, + "loss": 0.0004, + "step": 21928 + }, + { + "epoch": 93.73, + "learning_rate": 7.993551548244301e-05, + "loss": 0.0002, + "step": 21932 + }, + { + "epoch": 93.74, + "learning_rate": 7.992617997163533e-05, + "loss": 0.0, + "step": 21936 + }, + { + "epoch": 93.76, + "learning_rate": 7.991684283496442e-05, + "loss": 0.0001, + "step": 21940 + }, + { + "epoch": 93.78, + "learning_rate": 7.990750407293754e-05, + "loss": 0.0005, + "step": 21944 + }, + { + "epoch": 93.79, + "learning_rate": 7.989816368606207e-05, + "loss": 0.0001, + "step": 21948 + }, + { + "epoch": 93.81, + "learning_rate": 7.988882167484546e-05, + "loss": 0.0001, + "step": 21952 + }, + { + "epoch": 93.83, + "learning_rate": 7.987947803979525e-05, + "loss": 0.0001, + "step": 21956 + }, + { + "epoch": 93.85, + "learning_rate": 7.987013278141909e-05, + "loss": 0.0006, + "step": 21960 + }, + { + "epoch": 93.86, + "learning_rate": 7.986078590022468e-05, + "loss": 0.0, + "step": 21964 + }, + { + "epoch": 93.88, + "learning_rate": 7.985143739671983e-05, + "loss": 0.0, + "step": 21968 + }, + { + "epoch": 93.9, + "learning_rate": 7.984208727141245e-05, + "loss": 0.0, + "step": 21972 + }, + { + "epoch": 93.91, + "learning_rate": 7.983273552481048e-05, + "loss": 0.0, + "step": 21976 + }, + { + "epoch": 93.93, + "learning_rate": 7.982338215742207e-05, + "loss": 0.0001, + "step": 21980 + }, + { + "epoch": 93.95, + "learning_rate": 7.981402716975532e-05, + "loss": 0.0, + "step": 21984 + }, + { + "epoch": 93.97, + "learning_rate": 7.980467056231848e-05, + "loss": 0.0002, + "step": 21988 + }, + { + "epoch": 93.98, + "learning_rate": 7.979531233561992e-05, + "loss": 0.0002, + "step": 21992 + }, + { + "epoch": 94.0, + "learning_rate": 7.978595249016803e-05, + "loss": 0.0003, + "step": 21996 + }, + { + "epoch": 94.02, + "learning_rate": 7.977659102647137e-05, + "loss": 0.0001, + "step": 22000 + }, + { + "epoch": 94.02, + "eval_exact_match": 0.5135135135135135, + "eval_loss": 0.9283977746963501, + "eval_runtime": 174.6852, + "eval_samples_per_second": 5.507, + "step": 22000 + }, + { + "epoch": 94.03, + "learning_rate": 7.976722794503848e-05, + "loss": 0.0, + "step": 22004 + }, + { + "epoch": 94.05, + "learning_rate": 7.975786324637808e-05, + "loss": 0.0, + "step": 22008 + }, + { + "epoch": 94.07, + "learning_rate": 7.974849693099895e-05, + "loss": 0.0001, + "step": 22012 + }, + { + "epoch": 94.09, + "learning_rate": 7.973912899940994e-05, + "loss": 0.0008, + "step": 22016 + }, + { + "epoch": 94.1, + "learning_rate": 7.972975945212e-05, + "loss": 0.0001, + "step": 22020 + }, + { + "epoch": 94.12, + "learning_rate": 7.97203882896382e-05, + "loss": 0.0018, + "step": 22024 + }, + { + "epoch": 94.14, + "learning_rate": 7.971101551247361e-05, + "loss": 0.0001, + "step": 22028 + }, + { + "epoch": 94.15, + "learning_rate": 7.970164112113551e-05, + "loss": 0.0, + "step": 22032 + }, + { + "epoch": 94.17, + "learning_rate": 7.969226511613314e-05, + "loss": 0.0, + "step": 22036 + }, + { + "epoch": 94.19, + "learning_rate": 7.968288749797596e-05, + "loss": 0.0001, + "step": 22040 + }, + { + "epoch": 94.21, + "learning_rate": 7.967350826717338e-05, + "loss": 0.0004, + "step": 22044 + }, + { + "epoch": 94.22, + "learning_rate": 7.966412742423501e-05, + "loss": 0.0, + "step": 22048 + }, + { + "epoch": 94.24, + "learning_rate": 7.965474496967047e-05, + "loss": 0.0001, + "step": 22052 + }, + { + "epoch": 94.26, + "learning_rate": 7.964536090398953e-05, + "loss": 0.0001, + "step": 22056 + }, + { + "epoch": 94.27, + "learning_rate": 7.963597522770201e-05, + "loss": 0.0001, + "step": 22060 + }, + { + "epoch": 94.29, + "learning_rate": 7.962658794131781e-05, + "loss": 0.0001, + "step": 22064 + }, + { + "epoch": 94.31, + "learning_rate": 7.961719904534694e-05, + "loss": 0.0001, + "step": 22068 + }, + { + "epoch": 94.32, + "learning_rate": 7.960780854029952e-05, + "loss": 0.0001, + "step": 22072 + }, + { + "epoch": 94.34, + "learning_rate": 7.959841642668569e-05, + "loss": 0.0, + "step": 22076 + }, + { + "epoch": 94.36, + "learning_rate": 7.958902270501571e-05, + "loss": 0.0002, + "step": 22080 + }, + { + "epoch": 94.38, + "learning_rate": 7.957962737579998e-05, + "loss": 0.0, + "step": 22084 + }, + { + "epoch": 94.39, + "learning_rate": 7.95702304395489e-05, + "loss": 0.0001, + "step": 22088 + }, + { + "epoch": 94.41, + "learning_rate": 7.956083189677302e-05, + "loss": 0.0002, + "step": 22092 + }, + { + "epoch": 94.43, + "learning_rate": 7.955143174798292e-05, + "loss": 0.0, + "step": 22096 + }, + { + "epoch": 94.44, + "learning_rate": 7.954202999368934e-05, + "loss": 0.0006, + "step": 22100 + }, + { + "epoch": 94.46, + "learning_rate": 7.953262663440306e-05, + "loss": 0.0, + "step": 22104 + }, + { + "epoch": 94.48, + "learning_rate": 7.952322167063492e-05, + "loss": 0.0, + "step": 22108 + }, + { + "epoch": 94.5, + "learning_rate": 7.951381510289596e-05, + "loss": 0.0, + "step": 22112 + }, + { + "epoch": 94.51, + "learning_rate": 7.950440693169714e-05, + "loss": 0.0015, + "step": 22116 + }, + { + "epoch": 94.53, + "learning_rate": 7.949499715754967e-05, + "loss": 0.0001, + "step": 22120 + }, + { + "epoch": 94.55, + "learning_rate": 7.948558578096474e-05, + "loss": 0.0, + "step": 22124 + }, + { + "epoch": 94.56, + "learning_rate": 7.947617280245366e-05, + "loss": 0.0008, + "step": 22128 + }, + { + "epoch": 94.58, + "learning_rate": 7.946675822252786e-05, + "loss": 0.0003, + "step": 22132 + }, + { + "epoch": 94.6, + "learning_rate": 7.945734204169879e-05, + "loss": 0.0, + "step": 22136 + }, + { + "epoch": 94.62, + "learning_rate": 7.944792426047802e-05, + "loss": 0.0001, + "step": 22140 + }, + { + "epoch": 94.63, + "learning_rate": 7.943850487937725e-05, + "loss": 0.0, + "step": 22144 + }, + { + "epoch": 94.65, + "learning_rate": 7.94290838989082e-05, + "loss": 0.0, + "step": 22148 + }, + { + "epoch": 94.67, + "learning_rate": 7.941966131958272e-05, + "loss": 0.0012, + "step": 22152 + }, + { + "epoch": 94.68, + "learning_rate": 7.941023714191269e-05, + "loss": 0.0003, + "step": 22156 + }, + { + "epoch": 94.7, + "learning_rate": 7.940081136641015e-05, + "loss": 0.0, + "step": 22160 + }, + { + "epoch": 94.72, + "learning_rate": 7.93913839935872e-05, + "loss": 0.0002, + "step": 22164 + }, + { + "epoch": 94.74, + "learning_rate": 7.9381955023956e-05, + "loss": 0.0001, + "step": 22168 + }, + { + "epoch": 94.75, + "learning_rate": 7.937252445802884e-05, + "loss": 0.0007, + "step": 22172 + }, + { + "epoch": 94.77, + "learning_rate": 7.936309229631804e-05, + "loss": 0.0002, + "step": 22176 + }, + { + "epoch": 94.79, + "learning_rate": 7.93536585393361e-05, + "loss": 0.0006, + "step": 22180 + }, + { + "epoch": 94.8, + "learning_rate": 7.934422318759547e-05, + "loss": 0.0, + "step": 22184 + }, + { + "epoch": 94.82, + "learning_rate": 7.933478624160884e-05, + "loss": 0.0003, + "step": 22188 + }, + { + "epoch": 94.84, + "learning_rate": 7.932534770188886e-05, + "loss": 0.0001, + "step": 22192 + }, + { + "epoch": 94.85, + "learning_rate": 7.931590756894833e-05, + "loss": 0.0, + "step": 22196 + }, + { + "epoch": 94.87, + "learning_rate": 7.930646584330012e-05, + "loss": 0.0003, + "step": 22200 + }, + { + "epoch": 94.89, + "learning_rate": 7.929702252545723e-05, + "loss": 0.0013, + "step": 22204 + }, + { + "epoch": 94.91, + "learning_rate": 7.928757761593265e-05, + "loss": 0.0002, + "step": 22208 + }, + { + "epoch": 94.92, + "learning_rate": 7.927813111523955e-05, + "loss": 0.0008, + "step": 22212 + }, + { + "epoch": 94.94, + "learning_rate": 7.926868302389114e-05, + "loss": 0.0, + "step": 22216 + }, + { + "epoch": 94.96, + "learning_rate": 7.925923334240072e-05, + "loss": 0.0001, + "step": 22220 + }, + { + "epoch": 94.97, + "learning_rate": 7.92497820712817e-05, + "loss": 0.0, + "step": 22224 + }, + { + "epoch": 94.99, + "learning_rate": 7.924032921104754e-05, + "loss": 0.0002, + "step": 22228 + }, + { + "epoch": 95.01, + "learning_rate": 7.923087476221182e-05, + "loss": 0.0001, + "step": 22232 + }, + { + "epoch": 95.03, + "learning_rate": 7.922141872528817e-05, + "loss": 0.0, + "step": 22236 + }, + { + "epoch": 95.04, + "learning_rate": 7.921196110079037e-05, + "loss": 0.0001, + "step": 22240 + }, + { + "epoch": 95.06, + "learning_rate": 7.92025018892322e-05, + "loss": 0.0001, + "step": 22244 + }, + { + "epoch": 95.08, + "learning_rate": 7.91930410911276e-05, + "loss": 0.0, + "step": 22248 + }, + { + "epoch": 95.09, + "learning_rate": 7.918357870699056e-05, + "loss": 0.0028, + "step": 22252 + }, + { + "epoch": 95.11, + "learning_rate": 7.917411473733514e-05, + "loss": 0.0, + "step": 22256 + }, + { + "epoch": 95.13, + "learning_rate": 7.916464918267554e-05, + "loss": 0.0, + "step": 22260 + }, + { + "epoch": 95.15, + "learning_rate": 7.915518204352602e-05, + "loss": 0.0004, + "step": 22264 + }, + { + "epoch": 95.16, + "learning_rate": 7.914571332040088e-05, + "loss": 0.0006, + "step": 22268 + }, + { + "epoch": 95.18, + "learning_rate": 7.913624301381459e-05, + "loss": 0.0, + "step": 22272 + }, + { + "epoch": 95.2, + "learning_rate": 7.912677112428164e-05, + "loss": 0.0002, + "step": 22276 + }, + { + "epoch": 95.21, + "learning_rate": 7.911729765231665e-05, + "loss": 0.0, + "step": 22280 + }, + { + "epoch": 95.23, + "learning_rate": 7.910782259843427e-05, + "loss": 0.0016, + "step": 22284 + }, + { + "epoch": 95.25, + "learning_rate": 7.909834596314932e-05, + "loss": 0.0005, + "step": 22288 + }, + { + "epoch": 95.26, + "learning_rate": 7.908886774697661e-05, + "loss": 0.0003, + "step": 22292 + }, + { + "epoch": 95.28, + "learning_rate": 7.90793879504311e-05, + "loss": 0.0002, + "step": 22296 + }, + { + "epoch": 95.3, + "learning_rate": 7.906990657402783e-05, + "loss": 0.0, + "step": 22300 + }, + { + "epoch": 95.32, + "learning_rate": 7.90604236182819e-05, + "loss": 0.0001, + "step": 22304 + }, + { + "epoch": 95.33, + "learning_rate": 7.905093908370852e-05, + "loss": 0.0012, + "step": 22308 + }, + { + "epoch": 95.35, + "learning_rate": 7.904145297082299e-05, + "loss": 0.0, + "step": 22312 + }, + { + "epoch": 95.37, + "learning_rate": 7.903196528014065e-05, + "loss": 0.0, + "step": 22316 + }, + { + "epoch": 95.38, + "learning_rate": 7.902247601217699e-05, + "loss": 0.0, + "step": 22320 + }, + { + "epoch": 95.4, + "learning_rate": 7.901298516744753e-05, + "loss": 0.0001, + "step": 22324 + }, + { + "epoch": 95.42, + "learning_rate": 7.900349274646791e-05, + "loss": 0.0, + "step": 22328 + }, + { + "epoch": 95.44, + "learning_rate": 7.899399874975383e-05, + "loss": 0.0002, + "step": 22332 + }, + { + "epoch": 95.45, + "learning_rate": 7.898450317782112e-05, + "loss": 0.0, + "step": 22336 + }, + { + "epoch": 95.47, + "learning_rate": 7.897500603118565e-05, + "loss": 0.0, + "step": 22340 + }, + { + "epoch": 95.49, + "learning_rate": 7.896550731036338e-05, + "loss": 0.0, + "step": 22344 + }, + { + "epoch": 95.5, + "learning_rate": 7.89560070158704e-05, + "loss": 0.0, + "step": 22348 + }, + { + "epoch": 95.52, + "learning_rate": 7.894650514822281e-05, + "loss": 0.0, + "step": 22352 + }, + { + "epoch": 95.54, + "learning_rate": 7.893700170793686e-05, + "loss": 0.0, + "step": 22356 + }, + { + "epoch": 95.56, + "learning_rate": 7.892749669552888e-05, + "loss": 0.0, + "step": 22360 + }, + { + "epoch": 95.57, + "learning_rate": 7.891799011151523e-05, + "loss": 0.0011, + "step": 22364 + }, + { + "epoch": 95.59, + "learning_rate": 7.890848195641243e-05, + "loss": 0.0, + "step": 22368 + }, + { + "epoch": 95.61, + "learning_rate": 7.889897223073703e-05, + "loss": 0.0004, + "step": 22372 + }, + { + "epoch": 95.62, + "learning_rate": 7.888946093500568e-05, + "loss": 0.0002, + "step": 22376 + }, + { + "epoch": 95.64, + "learning_rate": 7.887994806973516e-05, + "loss": 0.0, + "step": 22380 + }, + { + "epoch": 95.66, + "learning_rate": 7.887043363544225e-05, + "loss": 0.0, + "step": 22384 + }, + { + "epoch": 95.68, + "learning_rate": 7.886091763264388e-05, + "loss": 0.0, + "step": 22388 + }, + { + "epoch": 95.69, + "learning_rate": 7.885140006185705e-05, + "loss": 0.0, + "step": 22392 + }, + { + "epoch": 95.71, + "learning_rate": 7.884188092359881e-05, + "loss": 0.0001, + "step": 22396 + }, + { + "epoch": 95.73, + "learning_rate": 7.883236021838638e-05, + "loss": 0.0, + "step": 22400 + }, + { + "epoch": 95.74, + "learning_rate": 7.882283794673698e-05, + "loss": 0.0004, + "step": 22404 + }, + { + "epoch": 95.76, + "learning_rate": 7.881331410916795e-05, + "loss": 0.0, + "step": 22408 + }, + { + "epoch": 95.78, + "learning_rate": 7.880378870619672e-05, + "loss": 0.0001, + "step": 22412 + }, + { + "epoch": 95.79, + "learning_rate": 7.879426173834076e-05, + "loss": 0.0006, + "step": 22416 + }, + { + "epoch": 95.81, + "learning_rate": 7.878473320611771e-05, + "loss": 0.0, + "step": 22420 + }, + { + "epoch": 95.83, + "learning_rate": 7.877520311004523e-05, + "loss": 0.0001, + "step": 22424 + }, + { + "epoch": 95.85, + "learning_rate": 7.876567145064108e-05, + "loss": 0.0005, + "step": 22428 + }, + { + "epoch": 95.86, + "learning_rate": 7.875613822842311e-05, + "loss": 0.0, + "step": 22432 + }, + { + "epoch": 95.88, + "learning_rate": 7.874660344390923e-05, + "loss": 0.0001, + "step": 22436 + }, + { + "epoch": 95.9, + "learning_rate": 7.873706709761748e-05, + "loss": 0.0, + "step": 22440 + }, + { + "epoch": 95.91, + "learning_rate": 7.872752919006596e-05, + "loss": 0.0001, + "step": 22444 + }, + { + "epoch": 95.93, + "learning_rate": 7.871798972177287e-05, + "loss": 0.0, + "step": 22448 + }, + { + "epoch": 95.95, + "learning_rate": 7.870844869325644e-05, + "loss": 0.0001, + "step": 22452 + }, + { + "epoch": 95.97, + "learning_rate": 7.869890610503504e-05, + "loss": 0.0, + "step": 22456 + }, + { + "epoch": 95.98, + "learning_rate": 7.868936195762714e-05, + "loss": 0.0007, + "step": 22460 + }, + { + "epoch": 96.0, + "learning_rate": 7.867981625155124e-05, + "loss": 0.0, + "step": 22464 + }, + { + "epoch": 96.02, + "learning_rate": 7.867026898732595e-05, + "loss": 0.001, + "step": 22468 + }, + { + "epoch": 96.03, + "learning_rate": 7.866072016546997e-05, + "loss": 0.0001, + "step": 22472 + }, + { + "epoch": 96.05, + "learning_rate": 7.865116978650208e-05, + "loss": 0.0, + "step": 22476 + }, + { + "epoch": 96.07, + "learning_rate": 7.864161785094114e-05, + "loss": 0.0001, + "step": 22480 + }, + { + "epoch": 96.09, + "learning_rate": 7.86320643593061e-05, + "loss": 0.0004, + "step": 22484 + }, + { + "epoch": 96.1, + "learning_rate": 7.8622509312116e-05, + "loss": 0.0006, + "step": 22488 + }, + { + "epoch": 96.12, + "learning_rate": 7.861295270988994e-05, + "loss": 0.0, + "step": 22492 + }, + { + "epoch": 96.14, + "learning_rate": 7.860339455314713e-05, + "loss": 0.0001, + "step": 22496 + }, + { + "epoch": 96.15, + "learning_rate": 7.859383484240687e-05, + "loss": 0.0007, + "step": 22500 + }, + { + "epoch": 96.17, + "learning_rate": 7.858427357818851e-05, + "loss": 0.0, + "step": 22504 + }, + { + "epoch": 96.19, + "learning_rate": 7.857471076101153e-05, + "loss": 0.0, + "step": 22508 + }, + { + "epoch": 96.21, + "learning_rate": 7.856514639139546e-05, + "loss": 0.0, + "step": 22512 + }, + { + "epoch": 96.22, + "learning_rate": 7.855558046985986e-05, + "loss": 0.0002, + "step": 22516 + }, + { + "epoch": 96.24, + "learning_rate": 7.854601299692454e-05, + "loss": 0.0008, + "step": 22520 + }, + { + "epoch": 96.26, + "learning_rate": 7.853644397310926e-05, + "loss": 0.0, + "step": 22524 + }, + { + "epoch": 96.27, + "learning_rate": 7.852687339893386e-05, + "loss": 0.0001, + "step": 22528 + }, + { + "epoch": 96.29, + "learning_rate": 7.851730127491834e-05, + "loss": 0.0, + "step": 22532 + }, + { + "epoch": 96.31, + "learning_rate": 7.850772760158273e-05, + "loss": 0.0, + "step": 22536 + }, + { + "epoch": 96.32, + "learning_rate": 7.849815237944716e-05, + "loss": 0.0001, + "step": 22540 + }, + { + "epoch": 96.34, + "learning_rate": 7.848857560903183e-05, + "loss": 0.0001, + "step": 22544 + }, + { + "epoch": 96.36, + "learning_rate": 7.847899729085706e-05, + "loss": 0.0, + "step": 22548 + }, + { + "epoch": 96.38, + "learning_rate": 7.846941742544323e-05, + "loss": 0.0002, + "step": 22552 + }, + { + "epoch": 96.39, + "learning_rate": 7.84598360133108e-05, + "loss": 0.0006, + "step": 22556 + }, + { + "epoch": 96.41, + "learning_rate": 7.84502530549803e-05, + "loss": 0.0006, + "step": 22560 + }, + { + "epoch": 96.43, + "learning_rate": 7.844066855097241e-05, + "loss": 0.0014, + "step": 22564 + }, + { + "epoch": 96.44, + "learning_rate": 7.84310825018078e-05, + "loss": 0.0001, + "step": 22568 + }, + { + "epoch": 96.46, + "learning_rate": 7.84214949080073e-05, + "loss": 0.0, + "step": 22572 + }, + { + "epoch": 96.48, + "learning_rate": 7.841190577009179e-05, + "loss": 0.0001, + "step": 22576 + }, + { + "epoch": 96.5, + "learning_rate": 7.840231508858222e-05, + "loss": 0.0001, + "step": 22580 + }, + { + "epoch": 96.51, + "learning_rate": 7.839272286399967e-05, + "loss": 0.0002, + "step": 22584 + }, + { + "epoch": 96.53, + "learning_rate": 7.838312909686528e-05, + "loss": 0.0, + "step": 22588 + }, + { + "epoch": 96.55, + "learning_rate": 7.837353378770024e-05, + "loss": 0.0001, + "step": 22592 + }, + { + "epoch": 96.56, + "learning_rate": 7.836393693702588e-05, + "loss": 0.0001, + "step": 22596 + }, + { + "epoch": 96.58, + "learning_rate": 7.835433854536359e-05, + "loss": 0.0001, + "step": 22600 + }, + { + "epoch": 96.6, + "learning_rate": 7.834473861323483e-05, + "loss": 0.0, + "step": 22604 + }, + { + "epoch": 96.62, + "learning_rate": 7.833513714116117e-05, + "loss": 0.0001, + "step": 22608 + }, + { + "epoch": 96.63, + "learning_rate": 7.832553412966421e-05, + "loss": 0.0005, + "step": 22612 + }, + { + "epoch": 96.65, + "learning_rate": 7.831592957926572e-05, + "loss": 0.0026, + "step": 22616 + }, + { + "epoch": 96.67, + "learning_rate": 7.830632349048748e-05, + "loss": 0.0, + "step": 22620 + }, + { + "epoch": 96.68, + "learning_rate": 7.829671586385142e-05, + "loss": 0.0, + "step": 22624 + }, + { + "epoch": 96.7, + "learning_rate": 7.828710669987946e-05, + "loss": 0.0002, + "step": 22628 + }, + { + "epoch": 96.72, + "learning_rate": 7.827749599909368e-05, + "loss": 0.0002, + "step": 22632 + }, + { + "epoch": 96.74, + "learning_rate": 7.826788376201622e-05, + "loss": 0.0007, + "step": 22636 + }, + { + "epoch": 96.75, + "learning_rate": 7.825826998916932e-05, + "loss": 0.0001, + "step": 22640 + }, + { + "epoch": 96.77, + "learning_rate": 7.824865468107528e-05, + "loss": 0.0002, + "step": 22644 + }, + { + "epoch": 96.79, + "learning_rate": 7.823903783825646e-05, + "loss": 0.0002, + "step": 22648 + }, + { + "epoch": 96.8, + "learning_rate": 7.822941946123538e-05, + "loss": 0.0001, + "step": 22652 + }, + { + "epoch": 96.82, + "learning_rate": 7.821979955053458e-05, + "loss": 0.0, + "step": 22656 + }, + { + "epoch": 96.84, + "learning_rate": 7.821017810667669e-05, + "loss": 0.0002, + "step": 22660 + }, + { + "epoch": 96.85, + "learning_rate": 7.820055513018445e-05, + "loss": 0.0, + "step": 22664 + }, + { + "epoch": 96.87, + "learning_rate": 7.819093062158067e-05, + "loss": 0.0, + "step": 22668 + }, + { + "epoch": 96.89, + "learning_rate": 7.818130458138823e-05, + "loss": 0.0001, + "step": 22672 + }, + { + "epoch": 96.91, + "learning_rate": 7.81716770101301e-05, + "loss": 0.0001, + "step": 22676 + }, + { + "epoch": 96.92, + "learning_rate": 7.816204790832935e-05, + "loss": 0.0001, + "step": 22680 + }, + { + "epoch": 96.94, + "learning_rate": 7.81524172765091e-05, + "loss": 0.0004, + "step": 22684 + }, + { + "epoch": 96.96, + "learning_rate": 7.814278511519262e-05, + "loss": 0.0001, + "step": 22688 + }, + { + "epoch": 96.97, + "learning_rate": 7.813315142490318e-05, + "loss": 0.0002, + "step": 22692 + }, + { + "epoch": 96.99, + "learning_rate": 7.812351620616416e-05, + "loss": 0.0, + "step": 22696 + }, + { + "epoch": 97.01, + "learning_rate": 7.811387945949906e-05, + "loss": 0.0002, + "step": 22700 + }, + { + "epoch": 97.03, + "learning_rate": 7.810424118543143e-05, + "loss": 0.0001, + "step": 22704 + }, + { + "epoch": 97.04, + "learning_rate": 7.809460138448491e-05, + "loss": 0.0, + "step": 22708 + }, + { + "epoch": 97.06, + "learning_rate": 7.808496005718323e-05, + "loss": 0.0, + "step": 22712 + }, + { + "epoch": 97.08, + "learning_rate": 7.807531720405015e-05, + "loss": 0.0002, + "step": 22716 + }, + { + "epoch": 97.09, + "learning_rate": 7.806567282560959e-05, + "loss": 0.0001, + "step": 22720 + }, + { + "epoch": 97.11, + "learning_rate": 7.805602692238554e-05, + "loss": 0.0001, + "step": 22724 + }, + { + "epoch": 97.13, + "learning_rate": 7.804637949490203e-05, + "loss": 0.0002, + "step": 22728 + }, + { + "epoch": 97.15, + "learning_rate": 7.80367305436832e-05, + "loss": 0.0001, + "step": 22732 + }, + { + "epoch": 97.16, + "learning_rate": 7.802708006925326e-05, + "loss": 0.0, + "step": 22736 + }, + { + "epoch": 97.18, + "learning_rate": 7.801742807213652e-05, + "loss": 0.0005, + "step": 22740 + }, + { + "epoch": 97.2, + "learning_rate": 7.80077745528574e-05, + "loss": 0.0, + "step": 22744 + }, + { + "epoch": 97.21, + "learning_rate": 7.79981195119403e-05, + "loss": 0.0, + "step": 22748 + }, + { + "epoch": 97.23, + "learning_rate": 7.79884629499098e-05, + "loss": 0.0002, + "step": 22752 + }, + { + "epoch": 97.25, + "learning_rate": 7.797880486729055e-05, + "loss": 0.0, + "step": 22756 + }, + { + "epoch": 97.26, + "learning_rate": 7.796914526460725e-05, + "loss": 0.0002, + "step": 22760 + }, + { + "epoch": 97.28, + "learning_rate": 7.79594841423847e-05, + "loss": 0.0, + "step": 22764 + }, + { + "epoch": 97.3, + "learning_rate": 7.794982150114778e-05, + "loss": 0.0006, + "step": 22768 + }, + { + "epoch": 97.32, + "learning_rate": 7.794015734142144e-05, + "loss": 0.0, + "step": 22772 + }, + { + "epoch": 97.33, + "learning_rate": 7.793049166373075e-05, + "loss": 0.0008, + "step": 22776 + }, + { + "epoch": 97.35, + "learning_rate": 7.792082446860083e-05, + "loss": 0.0005, + "step": 22780 + }, + { + "epoch": 97.37, + "learning_rate": 7.791115575655687e-05, + "loss": 0.0001, + "step": 22784 + }, + { + "epoch": 97.38, + "learning_rate": 7.790148552812417e-05, + "loss": 0.0, + "step": 22788 + }, + { + "epoch": 97.4, + "learning_rate": 7.789181378382816e-05, + "loss": 0.0001, + "step": 22792 + }, + { + "epoch": 97.42, + "learning_rate": 7.78821405241942e-05, + "loss": 0.0001, + "step": 22796 + }, + { + "epoch": 97.44, + "learning_rate": 7.78724657497479e-05, + "loss": 0.0007, + "step": 22800 + }, + { + "epoch": 97.45, + "learning_rate": 7.786278946101487e-05, + "loss": 0.0, + "step": 22804 + }, + { + "epoch": 97.47, + "learning_rate": 7.785311165852078e-05, + "loss": 0.0, + "step": 22808 + }, + { + "epoch": 97.49, + "learning_rate": 7.784343234279147e-05, + "loss": 0.0, + "step": 22812 + }, + { + "epoch": 97.5, + "learning_rate": 7.783375151435277e-05, + "loss": 0.0005, + "step": 22816 + }, + { + "epoch": 97.52, + "learning_rate": 7.782406917373065e-05, + "loss": 0.0, + "step": 22820 + }, + { + "epoch": 97.54, + "learning_rate": 7.781438532145114e-05, + "loss": 0.0001, + "step": 22824 + }, + { + "epoch": 97.56, + "learning_rate": 7.780469995804034e-05, + "loss": 0.0002, + "step": 22828 + }, + { + "epoch": 97.57, + "learning_rate": 7.779501308402446e-05, + "loss": 0.0015, + "step": 22832 + }, + { + "epoch": 97.59, + "learning_rate": 7.778532469992977e-05, + "loss": 0.0, + "step": 22836 + }, + { + "epoch": 97.61, + "learning_rate": 7.777563480628265e-05, + "loss": 0.0004, + "step": 22840 + }, + { + "epoch": 97.62, + "learning_rate": 7.776594340360954e-05, + "loss": 0.0001, + "step": 22844 + }, + { + "epoch": 97.64, + "learning_rate": 7.775625049243695e-05, + "loss": 0.0002, + "step": 22848 + }, + { + "epoch": 97.66, + "learning_rate": 7.77465560732915e-05, + "loss": 0.0, + "step": 22852 + }, + { + "epoch": 97.68, + "learning_rate": 7.773686014669988e-05, + "loss": 0.0, + "step": 22856 + }, + { + "epoch": 97.69, + "learning_rate": 7.772716271318884e-05, + "loss": 0.0, + "step": 22860 + }, + { + "epoch": 97.71, + "learning_rate": 7.771746377328527e-05, + "loss": 0.0001, + "step": 22864 + }, + { + "epoch": 97.73, + "learning_rate": 7.770776332751606e-05, + "loss": 0.0002, + "step": 22868 + }, + { + "epoch": 97.74, + "learning_rate": 7.769806137640827e-05, + "loss": 0.0001, + "step": 22872 + }, + { + "epoch": 97.76, + "learning_rate": 7.768835792048896e-05, + "loss": 0.0, + "step": 22876 + }, + { + "epoch": 97.78, + "learning_rate": 7.767865296028535e-05, + "loss": 0.0, + "step": 22880 + }, + { + "epoch": 97.79, + "learning_rate": 7.766894649632468e-05, + "loss": 0.0001, + "step": 22884 + }, + { + "epoch": 97.81, + "learning_rate": 7.765923852913431e-05, + "loss": 0.0005, + "step": 22888 + }, + { + "epoch": 97.83, + "learning_rate": 7.764952905924162e-05, + "loss": 0.0001, + "step": 22892 + }, + { + "epoch": 97.85, + "learning_rate": 7.763981808717416e-05, + "loss": 0.0001, + "step": 22896 + }, + { + "epoch": 97.86, + "learning_rate": 7.763010561345952e-05, + "loss": 0.0, + "step": 22900 + }, + { + "epoch": 97.88, + "learning_rate": 7.762039163862533e-05, + "loss": 0.0003, + "step": 22904 + }, + { + "epoch": 97.9, + "learning_rate": 7.761067616319939e-05, + "loss": 0.002, + "step": 22908 + }, + { + "epoch": 97.91, + "learning_rate": 7.760095918770951e-05, + "loss": 0.0007, + "step": 22912 + }, + { + "epoch": 97.93, + "learning_rate": 7.75912407126836e-05, + "loss": 0.0, + "step": 22916 + }, + { + "epoch": 97.95, + "learning_rate": 7.758152073864967e-05, + "loss": 0.0, + "step": 22920 + }, + { + "epoch": 97.97, + "learning_rate": 7.757179926613579e-05, + "loss": 0.0001, + "step": 22924 + }, + { + "epoch": 97.98, + "learning_rate": 7.756207629567011e-05, + "loss": 0.0001, + "step": 22928 + }, + { + "epoch": 98.0, + "learning_rate": 7.75523518277809e-05, + "loss": 0.0, + "step": 22932 + }, + { + "epoch": 98.02, + "learning_rate": 7.754262586299645e-05, + "loss": 0.0001, + "step": 22936 + }, + { + "epoch": 98.03, + "learning_rate": 7.753289840184518e-05, + "loss": 0.0, + "step": 22940 + }, + { + "epoch": 98.05, + "learning_rate": 7.752316944485556e-05, + "loss": 0.0002, + "step": 22944 + }, + { + "epoch": 98.07, + "learning_rate": 7.751343899255618e-05, + "loss": 0.0, + "step": 22948 + }, + { + "epoch": 98.09, + "learning_rate": 7.750370704547567e-05, + "loss": 0.0001, + "step": 22952 + }, + { + "epoch": 98.1, + "learning_rate": 7.749397360414274e-05, + "loss": 0.0001, + "step": 22956 + }, + { + "epoch": 98.12, + "learning_rate": 7.748423866908625e-05, + "loss": 0.0002, + "step": 22960 + }, + { + "epoch": 98.14, + "learning_rate": 7.747450224083505e-05, + "loss": 0.0001, + "step": 22964 + }, + { + "epoch": 98.15, + "learning_rate": 7.746476431991811e-05, + "loss": 0.0, + "step": 22968 + }, + { + "epoch": 98.17, + "learning_rate": 7.745502490686452e-05, + "loss": 0.0, + "step": 22972 + }, + { + "epoch": 98.19, + "learning_rate": 7.744528400220336e-05, + "loss": 0.0, + "step": 22976 + }, + { + "epoch": 98.21, + "learning_rate": 7.74355416064639e-05, + "loss": 0.0, + "step": 22980 + }, + { + "epoch": 98.22, + "learning_rate": 7.74257977201754e-05, + "loss": 0.0, + "step": 22984 + }, + { + "epoch": 98.24, + "learning_rate": 7.741605234386724e-05, + "loss": 0.0, + "step": 22988 + }, + { + "epoch": 98.26, + "learning_rate": 7.740630547806889e-05, + "loss": 0.0001, + "step": 22992 + }, + { + "epoch": 98.27, + "learning_rate": 7.739655712330989e-05, + "loss": 0.0001, + "step": 22996 + }, + { + "epoch": 98.29, + "learning_rate": 7.738680728011983e-05, + "loss": 0.0001, + "step": 23000 + }, + { + "epoch": 98.29, + "eval_exact_match": 0.5093555093555093, + "eval_loss": 0.9610804915428162, + "eval_runtime": 137.9706, + "eval_samples_per_second": 6.973, + "step": 23000 + }, + { + "epoch": 98.31, + "learning_rate": 7.737705594902847e-05, + "loss": 0.0007, + "step": 23004 + }, + { + "epoch": 98.32, + "learning_rate": 7.736730313056552e-05, + "loss": 0.0, + "step": 23008 + }, + { + "epoch": 98.34, + "learning_rate": 7.73575488252609e-05, + "loss": 0.0001, + "step": 23012 + }, + { + "epoch": 98.36, + "learning_rate": 7.73477930336445e-05, + "loss": 0.0001, + "step": 23016 + }, + { + "epoch": 98.38, + "learning_rate": 7.73380357562464e-05, + "loss": 0.0001, + "step": 23020 + }, + { + "epoch": 98.39, + "learning_rate": 7.732827699359668e-05, + "loss": 0.0, + "step": 23024 + }, + { + "epoch": 98.41, + "learning_rate": 7.73185167462255e-05, + "loss": 0.0, + "step": 23028 + }, + { + "epoch": 98.43, + "learning_rate": 7.730875501466317e-05, + "loss": 0.0001, + "step": 23032 + }, + { + "epoch": 98.44, + "learning_rate": 7.729899179944e-05, + "loss": 0.0, + "step": 23036 + }, + { + "epoch": 98.46, + "learning_rate": 7.728922710108644e-05, + "loss": 0.0001, + "step": 23040 + }, + { + "epoch": 98.48, + "learning_rate": 7.727946092013298e-05, + "loss": 0.0, + "step": 23044 + }, + { + "epoch": 98.5, + "learning_rate": 7.726969325711023e-05, + "loss": 0.0, + "step": 23048 + }, + { + "epoch": 98.51, + "learning_rate": 7.725992411254885e-05, + "loss": 0.0, + "step": 23052 + }, + { + "epoch": 98.53, + "learning_rate": 7.725015348697956e-05, + "loss": 0.0001, + "step": 23056 + }, + { + "epoch": 98.55, + "learning_rate": 7.724038138093324e-05, + "loss": 0.0001, + "step": 23060 + }, + { + "epoch": 98.56, + "learning_rate": 7.723060779494075e-05, + "loss": 0.0002, + "step": 23064 + }, + { + "epoch": 98.58, + "learning_rate": 7.722083272953314e-05, + "loss": 0.0, + "step": 23068 + }, + { + "epoch": 98.6, + "learning_rate": 7.721105618524141e-05, + "loss": 0.0, + "step": 23072 + }, + { + "epoch": 98.62, + "learning_rate": 7.720127816259677e-05, + "loss": 0.0002, + "step": 23076 + }, + { + "epoch": 98.63, + "learning_rate": 7.719149866213041e-05, + "loss": 0.0, + "step": 23080 + }, + { + "epoch": 98.65, + "learning_rate": 7.718171768437367e-05, + "loss": 0.0001, + "step": 23084 + }, + { + "epoch": 98.67, + "learning_rate": 7.717193522985794e-05, + "loss": 0.0003, + "step": 23088 + }, + { + "epoch": 98.68, + "learning_rate": 7.716215129911467e-05, + "loss": 0.0001, + "step": 23092 + }, + { + "epoch": 98.7, + "learning_rate": 7.715236589267542e-05, + "loss": 0.0001, + "step": 23096 + }, + { + "epoch": 98.72, + "learning_rate": 7.714257901107185e-05, + "loss": 0.0003, + "step": 23100 + }, + { + "epoch": 98.74, + "learning_rate": 7.713279065483565e-05, + "loss": 0.0001, + "step": 23104 + }, + { + "epoch": 98.75, + "learning_rate": 7.712300082449862e-05, + "loss": 0.0, + "step": 23108 + }, + { + "epoch": 98.77, + "learning_rate": 7.71132095205926e-05, + "loss": 0.0028, + "step": 23112 + }, + { + "epoch": 98.79, + "learning_rate": 7.710341674364958e-05, + "loss": 0.0004, + "step": 23116 + }, + { + "epoch": 98.8, + "learning_rate": 7.70936224942016e-05, + "loss": 0.0001, + "step": 23120 + }, + { + "epoch": 98.82, + "learning_rate": 7.708382677278074e-05, + "loss": 0.0003, + "step": 23124 + }, + { + "epoch": 98.84, + "learning_rate": 7.707402957991923e-05, + "loss": 0.0, + "step": 23128 + }, + { + "epoch": 98.85, + "learning_rate": 7.70642309161493e-05, + "loss": 0.0, + "step": 23132 + }, + { + "epoch": 98.87, + "learning_rate": 7.705443078200333e-05, + "loss": 0.0001, + "step": 23136 + }, + { + "epoch": 98.89, + "learning_rate": 7.704462917801376e-05, + "loss": 0.0001, + "step": 23140 + }, + { + "epoch": 98.91, + "learning_rate": 7.703482610471309e-05, + "loss": 0.0, + "step": 23144 + }, + { + "epoch": 98.92, + "learning_rate": 7.70250215626339e-05, + "loss": 0.0, + "step": 23148 + }, + { + "epoch": 98.94, + "learning_rate": 7.701521555230888e-05, + "loss": 0.0, + "step": 23152 + }, + { + "epoch": 98.96, + "learning_rate": 7.700540807427078e-05, + "loss": 0.0, + "step": 23156 + }, + { + "epoch": 98.97, + "learning_rate": 7.699559912905243e-05, + "loss": 0.0003, + "step": 23160 + }, + { + "epoch": 98.99, + "learning_rate": 7.698578871718673e-05, + "loss": 0.0, + "step": 23164 + }, + { + "epoch": 99.01, + "learning_rate": 7.697597683920669e-05, + "loss": 0.0006, + "step": 23168 + }, + { + "epoch": 99.03, + "learning_rate": 7.696616349564539e-05, + "loss": 0.0, + "step": 23172 + }, + { + "epoch": 99.04, + "learning_rate": 7.695634868703594e-05, + "loss": 0.0, + "step": 23176 + }, + { + "epoch": 99.06, + "learning_rate": 7.694653241391161e-05, + "loss": 0.0002, + "step": 23180 + }, + { + "epoch": 99.08, + "learning_rate": 7.693671467680567e-05, + "loss": 0.0002, + "step": 23184 + }, + { + "epoch": 99.09, + "learning_rate": 7.692689547625154e-05, + "loss": 0.0, + "step": 23188 + }, + { + "epoch": 99.11, + "learning_rate": 7.69170748127827e-05, + "loss": 0.0001, + "step": 23192 + }, + { + "epoch": 99.13, + "learning_rate": 7.690725268693266e-05, + "loss": 0.0, + "step": 23196 + }, + { + "epoch": 99.15, + "learning_rate": 7.689742909923505e-05, + "loss": 0.0, + "step": 23200 + }, + { + "epoch": 99.16, + "learning_rate": 7.688760405022362e-05, + "loss": 0.0, + "step": 23204 + }, + { + "epoch": 99.18, + "learning_rate": 7.687777754043212e-05, + "loss": 0.0015, + "step": 23208 + }, + { + "epoch": 99.2, + "learning_rate": 7.686794957039442e-05, + "loss": 0.0001, + "step": 23212 + }, + { + "epoch": 99.21, + "learning_rate": 7.685812014064448e-05, + "loss": 0.0, + "step": 23216 + }, + { + "epoch": 99.23, + "learning_rate": 7.68482892517163e-05, + "loss": 0.0, + "step": 23220 + }, + { + "epoch": 99.25, + "learning_rate": 7.683845690414402e-05, + "loss": 0.0002, + "step": 23224 + }, + { + "epoch": 99.26, + "learning_rate": 7.682862309846177e-05, + "loss": 0.0008, + "step": 23228 + }, + { + "epoch": 99.28, + "learning_rate": 7.681878783520386e-05, + "loss": 0.0, + "step": 23232 + }, + { + "epoch": 99.3, + "learning_rate": 7.680895111490462e-05, + "loss": 0.0, + "step": 23236 + }, + { + "epoch": 99.32, + "learning_rate": 7.679911293809846e-05, + "loss": 0.0002, + "step": 23240 + }, + { + "epoch": 99.33, + "learning_rate": 7.678927330531988e-05, + "loss": 0.0001, + "step": 23244 + }, + { + "epoch": 99.35, + "learning_rate": 7.677943221710346e-05, + "loss": 0.0001, + "step": 23248 + }, + { + "epoch": 99.37, + "learning_rate": 7.676958967398386e-05, + "loss": 0.0001, + "step": 23252 + }, + { + "epoch": 99.38, + "learning_rate": 7.675974567649583e-05, + "loss": 0.0001, + "step": 23256 + }, + { + "epoch": 99.4, + "learning_rate": 7.674990022517417e-05, + "loss": 0.0005, + "step": 23260 + }, + { + "epoch": 99.42, + "learning_rate": 7.674005332055376e-05, + "loss": 0.0, + "step": 23264 + }, + { + "epoch": 99.44, + "learning_rate": 7.673020496316961e-05, + "loss": 0.0, + "step": 23268 + }, + { + "epoch": 99.45, + "learning_rate": 7.672035515355675e-05, + "loss": 0.0, + "step": 23272 + }, + { + "epoch": 99.47, + "learning_rate": 7.67105038922503e-05, + "loss": 0.0, + "step": 23276 + }, + { + "epoch": 99.49, + "learning_rate": 7.670065117978552e-05, + "loss": 0.0, + "step": 23280 + }, + { + "epoch": 99.5, + "learning_rate": 7.669079701669762e-05, + "loss": 0.0, + "step": 23284 + }, + { + "epoch": 99.52, + "learning_rate": 7.668094140352205e-05, + "loss": 0.0, + "step": 23288 + }, + { + "epoch": 99.54, + "learning_rate": 7.667108434079421e-05, + "loss": 0.0, + "step": 23292 + }, + { + "epoch": 99.56, + "learning_rate": 7.666122582904962e-05, + "loss": 0.0, + "step": 23296 + }, + { + "epoch": 99.57, + "learning_rate": 7.665136586882391e-05, + "loss": 0.0, + "step": 23300 + }, + { + "epoch": 99.59, + "learning_rate": 7.664150446065278e-05, + "loss": 0.0001, + "step": 23304 + }, + { + "epoch": 99.61, + "learning_rate": 7.663164160507192e-05, + "loss": 0.0, + "step": 23308 + }, + { + "epoch": 99.62, + "learning_rate": 7.662177730261723e-05, + "loss": 0.0, + "step": 23312 + }, + { + "epoch": 99.64, + "learning_rate": 7.661191155382464e-05, + "loss": 0.0001, + "step": 23316 + }, + { + "epoch": 99.66, + "learning_rate": 7.66020443592301e-05, + "loss": 0.0, + "step": 23320 + }, + { + "epoch": 99.68, + "learning_rate": 7.659217571936969e-05, + "loss": 0.0, + "step": 23324 + }, + { + "epoch": 99.69, + "learning_rate": 7.658230563477962e-05, + "loss": 0.0001, + "step": 23328 + }, + { + "epoch": 99.71, + "learning_rate": 7.657243410599608e-05, + "loss": 0.0, + "step": 23332 + }, + { + "epoch": 99.73, + "learning_rate": 7.656256113355536e-05, + "loss": 0.0006, + "step": 23336 + }, + { + "epoch": 99.74, + "learning_rate": 7.655268671799389e-05, + "loss": 0.0, + "step": 23340 + }, + { + "epoch": 99.76, + "learning_rate": 7.654281085984814e-05, + "loss": 0.0, + "step": 23344 + }, + { + "epoch": 99.78, + "learning_rate": 7.653293355965464e-05, + "loss": 0.0, + "step": 23348 + }, + { + "epoch": 99.79, + "learning_rate": 7.652305481795e-05, + "loss": 0.0002, + "step": 23352 + }, + { + "epoch": 99.81, + "learning_rate": 7.651317463527093e-05, + "loss": 0.0001, + "step": 23356 + }, + { + "epoch": 99.83, + "learning_rate": 7.650329301215424e-05, + "loss": 0.0003, + "step": 23360 + }, + { + "epoch": 99.85, + "learning_rate": 7.649340994913677e-05, + "loss": 0.0001, + "step": 23364 + }, + { + "epoch": 99.86, + "learning_rate": 7.648352544675546e-05, + "loss": 0.0, + "step": 23368 + }, + { + "epoch": 99.88, + "learning_rate": 7.647363950554734e-05, + "loss": 0.0001, + "step": 23372 + }, + { + "epoch": 99.9, + "learning_rate": 7.646375212604949e-05, + "loss": 0.0, + "step": 23376 + }, + { + "epoch": 99.91, + "learning_rate": 7.645386330879906e-05, + "loss": 0.0, + "step": 23380 + }, + { + "epoch": 99.93, + "learning_rate": 7.644397305433334e-05, + "loss": 0.0, + "step": 23384 + }, + { + "epoch": 99.95, + "learning_rate": 7.643408136318964e-05, + "loss": 0.0, + "step": 23388 + }, + { + "epoch": 99.97, + "learning_rate": 7.642418823590538e-05, + "loss": 0.0, + "step": 23392 + }, + { + "epoch": 99.98, + "learning_rate": 7.641429367301802e-05, + "loss": 0.0001, + "step": 23396 + }, + { + "epoch": 100.0, + "learning_rate": 7.640439767506516e-05, + "loss": 0.0005, + "step": 23400 + }, + { + "epoch": 100.02, + "learning_rate": 7.63945002425844e-05, + "loss": 0.0, + "step": 23404 + }, + { + "epoch": 100.03, + "learning_rate": 7.638460137611349e-05, + "loss": 0.0005, + "step": 23408 + }, + { + "epoch": 100.05, + "learning_rate": 7.637470107619021e-05, + "loss": 0.0, + "step": 23412 + }, + { + "epoch": 100.07, + "learning_rate": 7.636479934335243e-05, + "loss": 0.0002, + "step": 23416 + }, + { + "epoch": 100.09, + "learning_rate": 7.635489617813814e-05, + "loss": 0.0007, + "step": 23420 + }, + { + "epoch": 100.1, + "learning_rate": 7.634499158108532e-05, + "loss": 0.0, + "step": 23424 + }, + { + "epoch": 100.12, + "learning_rate": 7.633508555273213e-05, + "loss": 0.0, + "step": 23428 + }, + { + "epoch": 100.14, + "learning_rate": 7.63251780936167e-05, + "loss": 0.0, + "step": 23432 + }, + { + "epoch": 100.15, + "learning_rate": 7.631526920427732e-05, + "loss": 0.0, + "step": 23436 + }, + { + "epoch": 100.17, + "learning_rate": 7.630535888525232e-05, + "loss": 0.0001, + "step": 23440 + }, + { + "epoch": 100.19, + "learning_rate": 7.629544713708016e-05, + "loss": 0.0002, + "step": 23444 + }, + { + "epoch": 100.21, + "learning_rate": 7.62855339602993e-05, + "loss": 0.0001, + "step": 23448 + }, + { + "epoch": 100.22, + "learning_rate": 7.627561935544831e-05, + "loss": 0.0, + "step": 23452 + }, + { + "epoch": 100.24, + "learning_rate": 7.626570332306585e-05, + "loss": 0.0, + "step": 23456 + }, + { + "epoch": 100.26, + "learning_rate": 7.625578586369067e-05, + "loss": 0.0, + "step": 23460 + }, + { + "epoch": 100.27, + "learning_rate": 7.624586697786156e-05, + "loss": 0.0, + "step": 23464 + }, + { + "epoch": 100.29, + "learning_rate": 7.623594666611738e-05, + "loss": 0.0, + "step": 23468 + }, + { + "epoch": 100.31, + "learning_rate": 7.622602492899714e-05, + "loss": 0.0, + "step": 23472 + }, + { + "epoch": 100.32, + "learning_rate": 7.621610176703984e-05, + "loss": 0.0, + "step": 23476 + }, + { + "epoch": 100.34, + "learning_rate": 7.620617718078462e-05, + "loss": 0.0001, + "step": 23480 + }, + { + "epoch": 100.36, + "learning_rate": 7.619625117077067e-05, + "loss": 0.0001, + "step": 23484 + }, + { + "epoch": 100.38, + "learning_rate": 7.618632373753724e-05, + "loss": 0.0, + "step": 23488 + }, + { + "epoch": 100.39, + "learning_rate": 7.617639488162371e-05, + "loss": 0.0002, + "step": 23492 + }, + { + "epoch": 100.41, + "learning_rate": 7.616646460356949e-05, + "loss": 0.0, + "step": 23496 + }, + { + "epoch": 100.43, + "learning_rate": 7.615653290391409e-05, + "loss": 0.0001, + "step": 23500 + }, + { + "epoch": 100.44, + "learning_rate": 7.614659978319709e-05, + "loss": 0.0022, + "step": 23504 + }, + { + "epoch": 100.46, + "learning_rate": 7.613666524195814e-05, + "loss": 0.0002, + "step": 23508 + }, + { + "epoch": 100.48, + "learning_rate": 7.612672928073698e-05, + "loss": 0.0004, + "step": 23512 + }, + { + "epoch": 100.5, + "learning_rate": 7.611679190007342e-05, + "loss": 0.001, + "step": 23516 + }, + { + "epoch": 100.51, + "learning_rate": 7.610685310050733e-05, + "loss": 0.0, + "step": 23520 + }, + { + "epoch": 100.53, + "learning_rate": 7.609691288257872e-05, + "loss": 0.0001, + "step": 23524 + }, + { + "epoch": 100.55, + "learning_rate": 7.608697124682761e-05, + "loss": 0.0009, + "step": 23528 + }, + { + "epoch": 100.56, + "learning_rate": 7.607702819379413e-05, + "loss": 0.0, + "step": 23532 + }, + { + "epoch": 100.58, + "learning_rate": 7.606708372401843e-05, + "loss": 0.0, + "step": 23536 + }, + { + "epoch": 100.6, + "learning_rate": 7.605713783804085e-05, + "loss": 0.0002, + "step": 23540 + }, + { + "epoch": 100.62, + "learning_rate": 7.604719053640172e-05, + "loss": 0.0, + "step": 23544 + }, + { + "epoch": 100.63, + "learning_rate": 7.603724181964144e-05, + "loss": 0.0, + "step": 23548 + }, + { + "epoch": 100.65, + "learning_rate": 7.602729168830056e-05, + "loss": 0.0014, + "step": 23552 + }, + { + "epoch": 100.67, + "learning_rate": 7.601734014291961e-05, + "loss": 0.0008, + "step": 23556 + }, + { + "epoch": 100.68, + "learning_rate": 7.60073871840393e-05, + "loss": 0.0003, + "step": 23560 + }, + { + "epoch": 100.7, + "learning_rate": 7.599743281220033e-05, + "loss": 0.0001, + "step": 23564 + }, + { + "epoch": 100.72, + "learning_rate": 7.598747702794353e-05, + "loss": 0.0, + "step": 23568 + }, + { + "epoch": 100.74, + "learning_rate": 7.59775198318098e-05, + "loss": 0.0, + "step": 23572 + }, + { + "epoch": 100.75, + "learning_rate": 7.596756122434007e-05, + "loss": 0.0, + "step": 23576 + }, + { + "epoch": 100.77, + "learning_rate": 7.59576012060754e-05, + "loss": 0.0023, + "step": 23580 + }, + { + "epoch": 100.79, + "learning_rate": 7.594763977755692e-05, + "loss": 0.0008, + "step": 23584 + }, + { + "epoch": 100.8, + "learning_rate": 7.593767693932583e-05, + "loss": 0.0, + "step": 23588 + }, + { + "epoch": 100.82, + "learning_rate": 7.592771269192339e-05, + "loss": 0.0, + "step": 23592 + }, + { + "epoch": 100.84, + "learning_rate": 7.591774703589092e-05, + "loss": 0.0003, + "step": 23596 + }, + { + "epoch": 100.85, + "learning_rate": 7.59077799717699e-05, + "loss": 0.0, + "step": 23600 + }, + { + "epoch": 100.87, + "learning_rate": 7.58978115001018e-05, + "loss": 0.0, + "step": 23604 + }, + { + "epoch": 100.89, + "learning_rate": 7.588784162142819e-05, + "loss": 0.0, + "step": 23608 + }, + { + "epoch": 100.91, + "learning_rate": 7.587787033629075e-05, + "loss": 0.0, + "step": 23612 + }, + { + "epoch": 100.92, + "learning_rate": 7.586789764523121e-05, + "loss": 0.0001, + "step": 23616 + }, + { + "epoch": 100.94, + "learning_rate": 7.585792354879136e-05, + "loss": 0.0, + "step": 23620 + }, + { + "epoch": 100.96, + "learning_rate": 7.584794804751309e-05, + "loss": 0.0001, + "step": 23624 + }, + { + "epoch": 100.97, + "learning_rate": 7.583797114193836e-05, + "loss": 0.0001, + "step": 23628 + }, + { + "epoch": 100.99, + "learning_rate": 7.582799283260921e-05, + "loss": 0.0, + "step": 23632 + }, + { + "epoch": 101.01, + "learning_rate": 7.581801312006776e-05, + "loss": 0.0, + "step": 23636 + }, + { + "epoch": 101.03, + "learning_rate": 7.580803200485618e-05, + "loss": 0.0, + "step": 23640 + }, + { + "epoch": 101.04, + "learning_rate": 7.579804948751675e-05, + "loss": 0.0, + "step": 23644 + }, + { + "epoch": 101.06, + "learning_rate": 7.57880655685918e-05, + "loss": 0.0001, + "step": 23648 + }, + { + "epoch": 101.08, + "learning_rate": 7.577808024862378e-05, + "loss": 0.0, + "step": 23652 + }, + { + "epoch": 101.09, + "learning_rate": 7.576809352815512e-05, + "loss": 0.0, + "step": 23656 + }, + { + "epoch": 101.11, + "learning_rate": 7.575810540772846e-05, + "loss": 0.0, + "step": 23660 + }, + { + "epoch": 101.13, + "learning_rate": 7.574811588788641e-05, + "loss": 0.0, + "step": 23664 + }, + { + "epoch": 101.15, + "learning_rate": 7.57381249691717e-05, + "loss": 0.0001, + "step": 23668 + }, + { + "epoch": 101.16, + "learning_rate": 7.572813265212711e-05, + "loss": 0.0001, + "step": 23672 + }, + { + "epoch": 101.18, + "learning_rate": 7.571813893729552e-05, + "loss": 0.0002, + "step": 23676 + }, + { + "epoch": 101.2, + "learning_rate": 7.570814382521991e-05, + "loss": 0.0, + "step": 23680 + }, + { + "epoch": 101.21, + "learning_rate": 7.569814731644327e-05, + "loss": 0.0013, + "step": 23684 + }, + { + "epoch": 101.23, + "learning_rate": 7.568814941150873e-05, + "loss": 0.0008, + "step": 23688 + }, + { + "epoch": 101.25, + "learning_rate": 7.567815011095945e-05, + "loss": 0.0, + "step": 23692 + }, + { + "epoch": 101.26, + "learning_rate": 7.566814941533866e-05, + "loss": 0.0, + "step": 23696 + }, + { + "epoch": 101.28, + "learning_rate": 7.565814732518974e-05, + "loss": 0.0017, + "step": 23700 + }, + { + "epoch": 101.3, + "learning_rate": 7.564814384105607e-05, + "loss": 0.0, + "step": 23704 + }, + { + "epoch": 101.32, + "learning_rate": 7.563813896348113e-05, + "loss": 0.0, + "step": 23708 + }, + { + "epoch": 101.33, + "learning_rate": 7.562813269300845e-05, + "loss": 0.0001, + "step": 23712 + }, + { + "epoch": 101.35, + "learning_rate": 7.561812503018172e-05, + "loss": 0.0, + "step": 23716 + }, + { + "epoch": 101.37, + "learning_rate": 7.56081159755446e-05, + "loss": 0.0006, + "step": 23720 + }, + { + "epoch": 101.38, + "learning_rate": 7.559810552964091e-05, + "loss": 0.0, + "step": 23724 + }, + { + "epoch": 101.4, + "learning_rate": 7.558809369301447e-05, + "loss": 0.0, + "step": 23728 + }, + { + "epoch": 101.42, + "learning_rate": 7.557808046620922e-05, + "loss": 0.0006, + "step": 23732 + }, + { + "epoch": 101.44, + "learning_rate": 7.556806584976919e-05, + "loss": 0.0, + "step": 23736 + }, + { + "epoch": 101.45, + "learning_rate": 7.555804984423849e-05, + "loss": 0.0003, + "step": 23740 + }, + { + "epoch": 101.47, + "learning_rate": 7.554803245016123e-05, + "loss": 0.0, + "step": 23744 + }, + { + "epoch": 101.49, + "learning_rate": 7.553801366808165e-05, + "loss": 0.0, + "step": 23748 + }, + { + "epoch": 101.5, + "learning_rate": 7.552799349854408e-05, + "loss": 0.0009, + "step": 23752 + }, + { + "epoch": 101.52, + "learning_rate": 7.551797194209292e-05, + "loss": 0.0, + "step": 23756 + }, + { + "epoch": 101.54, + "learning_rate": 7.550794899927258e-05, + "loss": 0.0, + "step": 23760 + }, + { + "epoch": 101.56, + "learning_rate": 7.549792467062768e-05, + "loss": 0.0, + "step": 23764 + }, + { + "epoch": 101.57, + "learning_rate": 7.548789895670277e-05, + "loss": 0.0, + "step": 23768 + }, + { + "epoch": 101.59, + "learning_rate": 7.547787185804253e-05, + "loss": 0.0, + "step": 23772 + }, + { + "epoch": 101.61, + "learning_rate": 7.546784337519177e-05, + "loss": 0.0, + "step": 23776 + }, + { + "epoch": 101.62, + "learning_rate": 7.545781350869532e-05, + "loss": 0.0, + "step": 23780 + }, + { + "epoch": 101.64, + "learning_rate": 7.544778225909807e-05, + "loss": 0.0, + "step": 23784 + }, + { + "epoch": 101.66, + "learning_rate": 7.543774962694501e-05, + "loss": 0.0, + "step": 23788 + }, + { + "epoch": 101.68, + "learning_rate": 7.542771561278123e-05, + "loss": 0.0, + "step": 23792 + }, + { + "epoch": 101.69, + "learning_rate": 7.541768021715184e-05, + "loss": 0.0001, + "step": 23796 + }, + { + "epoch": 101.71, + "learning_rate": 7.540764344060207e-05, + "loss": 0.0, + "step": 23800 + }, + { + "epoch": 101.73, + "learning_rate": 7.539760528367721e-05, + "loss": 0.0, + "step": 23804 + }, + { + "epoch": 101.74, + "learning_rate": 7.538756574692263e-05, + "loss": 0.0, + "step": 23808 + }, + { + "epoch": 101.76, + "learning_rate": 7.537752483088376e-05, + "loss": 0.0, + "step": 23812 + }, + { + "epoch": 101.78, + "learning_rate": 7.536748253610611e-05, + "loss": 0.0, + "step": 23816 + }, + { + "epoch": 101.79, + "learning_rate": 7.535743886313526e-05, + "loss": 0.0001, + "step": 23820 + }, + { + "epoch": 101.81, + "learning_rate": 7.534739381251691e-05, + "loss": 0.0011, + "step": 23824 + }, + { + "epoch": 101.83, + "learning_rate": 7.533734738479676e-05, + "loss": 0.0001, + "step": 23828 + }, + { + "epoch": 101.85, + "learning_rate": 7.532729958052065e-05, + "loss": 0.0, + "step": 23832 + }, + { + "epoch": 101.86, + "learning_rate": 7.531725040023445e-05, + "loss": 0.0, + "step": 23836 + }, + { + "epoch": 101.88, + "learning_rate": 7.530719984448416e-05, + "loss": 0.0, + "step": 23840 + }, + { + "epoch": 101.9, + "learning_rate": 7.529714791381575e-05, + "loss": 0.0, + "step": 23844 + }, + { + "epoch": 101.91, + "learning_rate": 7.528709460877542e-05, + "loss": 0.0, + "step": 23848 + }, + { + "epoch": 101.93, + "learning_rate": 7.527703992990926e-05, + "loss": 0.0, + "step": 23852 + }, + { + "epoch": 101.95, + "learning_rate": 7.526698387776361e-05, + "loss": 0.0004, + "step": 23856 + }, + { + "epoch": 101.97, + "learning_rate": 7.525692645288477e-05, + "loss": 0.0, + "step": 23860 + }, + { + "epoch": 101.98, + "learning_rate": 7.524686765581916e-05, + "loss": 0.0005, + "step": 23864 + }, + { + "epoch": 102.0, + "learning_rate": 7.523680748711328e-05, + "loss": 0.0001, + "step": 23868 + }, + { + "epoch": 102.02, + "learning_rate": 7.522674594731366e-05, + "loss": 0.0, + "step": 23872 + }, + { + "epoch": 102.03, + "learning_rate": 7.521668303696694e-05, + "loss": 0.0, + "step": 23876 + }, + { + "epoch": 102.05, + "learning_rate": 7.520661875661987e-05, + "loss": 0.0, + "step": 23880 + }, + { + "epoch": 102.07, + "learning_rate": 7.519655310681919e-05, + "loss": 0.0001, + "step": 23884 + }, + { + "epoch": 102.09, + "learning_rate": 7.518648608811176e-05, + "loss": 0.0001, + "step": 23888 + }, + { + "epoch": 102.1, + "learning_rate": 7.517641770104453e-05, + "loss": 0.0001, + "step": 23892 + }, + { + "epoch": 102.12, + "learning_rate": 7.516634794616451e-05, + "loss": 0.0, + "step": 23896 + }, + { + "epoch": 102.14, + "learning_rate": 7.515627682401874e-05, + "loss": 0.0, + "step": 23900 + }, + { + "epoch": 102.15, + "learning_rate": 7.514620433515443e-05, + "loss": 0.0003, + "step": 23904 + }, + { + "epoch": 102.17, + "learning_rate": 7.51361304801188e-05, + "loss": 0.0, + "step": 23908 + }, + { + "epoch": 102.19, + "learning_rate": 7.51260552594591e-05, + "loss": 0.0001, + "step": 23912 + }, + { + "epoch": 102.21, + "learning_rate": 7.511597867372278e-05, + "loss": 0.0, + "step": 23916 + }, + { + "epoch": 102.22, + "learning_rate": 7.510590072345724e-05, + "loss": 0.0, + "step": 23920 + }, + { + "epoch": 102.24, + "learning_rate": 7.509582140921006e-05, + "loss": 0.0001, + "step": 23924 + }, + { + "epoch": 102.26, + "learning_rate": 7.508574073152876e-05, + "loss": 0.0004, + "step": 23928 + }, + { + "epoch": 102.27, + "learning_rate": 7.507565869096109e-05, + "loss": 0.0, + "step": 23932 + }, + { + "epoch": 102.29, + "learning_rate": 7.506557528805476e-05, + "loss": 0.0, + "step": 23936 + }, + { + "epoch": 102.31, + "learning_rate": 7.50554905233576e-05, + "loss": 0.0, + "step": 23940 + }, + { + "epoch": 102.32, + "learning_rate": 7.50454043974175e-05, + "loss": 0.0, + "step": 23944 + }, + { + "epoch": 102.34, + "learning_rate": 7.503531691078245e-05, + "loss": 0.0001, + "step": 23948 + }, + { + "epoch": 102.36, + "learning_rate": 7.50252280640005e-05, + "loss": 0.0, + "step": 23952 + }, + { + "epoch": 102.38, + "learning_rate": 7.501513785761971e-05, + "loss": 0.0001, + "step": 23956 + }, + { + "epoch": 102.39, + "learning_rate": 7.500504629218834e-05, + "loss": 0.0, + "step": 23960 + }, + { + "epoch": 102.41, + "learning_rate": 7.499495336825463e-05, + "loss": 0.0006, + "step": 23964 + }, + { + "epoch": 102.43, + "learning_rate": 7.49848590863669e-05, + "loss": 0.0, + "step": 23968 + }, + { + "epoch": 102.44, + "learning_rate": 7.497476344707359e-05, + "loss": 0.0, + "step": 23972 + }, + { + "epoch": 102.46, + "learning_rate": 7.49646664509232e-05, + "loss": 0.0, + "step": 23976 + }, + { + "epoch": 102.48, + "learning_rate": 7.495456809846424e-05, + "loss": 0.0002, + "step": 23980 + }, + { + "epoch": 102.5, + "learning_rate": 7.494446839024539e-05, + "loss": 0.0, + "step": 23984 + }, + { + "epoch": 102.51, + "learning_rate": 7.493436732681535e-05, + "loss": 0.0001, + "step": 23988 + }, + { + "epoch": 102.53, + "learning_rate": 7.492426490872289e-05, + "loss": 0.0001, + "step": 23992 + }, + { + "epoch": 102.55, + "learning_rate": 7.491416113651686e-05, + "loss": 0.0001, + "step": 23996 + }, + { + "epoch": 102.56, + "learning_rate": 7.49040560107462e-05, + "loss": 0.0, + "step": 24000 + }, + { + "epoch": 102.56, + "eval_exact_match": 0.5343035343035343, + "eval_loss": 0.9552536010742188, + "eval_runtime": 133.8111, + "eval_samples_per_second": 7.189, + "step": 24000 + } + ], + "max_steps": 59904, + "num_train_epochs": 256, + "total_flos": 2.0936137837879296e+17, + "trial_name": null, + "trial_params": null +}