diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 96.3302752293578, - "global_step": 10500, + "epoch": 120.0, + "global_step": 13080, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -63218,18 +63218,15548 @@ "step": 10500 }, { - "epoch": 96.33, - "step": 10500, - "total_flos": 1.2873670788396168e+20, - "train_loss": 0.0, - "train_runtime": 113.9955, - "train_samples_per_second": 152.55, - "train_steps_per_second": 4.781 + "epoch": 96.34, + "learning_rate": 9.873853211009175e-05, + "loss": 0.1608, + "step": 10501 + }, + { + "epoch": 96.35, + "learning_rate": 9.870030581039755e-05, + "loss": 0.1755, + "step": 10502 + }, + { + "epoch": 96.36, + "learning_rate": 9.866207951070337e-05, + "loss": 0.0978, + "step": 10503 + }, + { + "epoch": 96.37, + "learning_rate": 9.862385321100918e-05, + "loss": 0.1589, + "step": 10504 + }, + { + "epoch": 96.38, + "learning_rate": 9.8585626911315e-05, + "loss": 0.1309, + "step": 10505 + }, + { + "epoch": 96.39, + "learning_rate": 9.854740061162078e-05, + "loss": 0.1038, + "step": 10506 + }, + { + "epoch": 96.39, + "learning_rate": 9.85091743119266e-05, + "loss": 0.0773, + "step": 10507 + }, + { + "epoch": 96.4, + "learning_rate": 9.847094801223242e-05, + "loss": 0.0701, + "step": 10508 + }, + { + "epoch": 96.41, + "learning_rate": 9.843272171253822e-05, + "loss": 0.0693, + "step": 10509 + }, + { + "epoch": 96.42, + "learning_rate": 9.839449541284404e-05, + "loss": 0.1357, + "step": 10510 + }, + { + "epoch": 96.43, + "learning_rate": 9.835626911314985e-05, + "loss": 0.0942, + "step": 10511 + }, + { + "epoch": 96.44, + "learning_rate": 9.831804281345566e-05, + "loss": 0.1105, + "step": 10512 + }, + { + "epoch": 96.45, + "learning_rate": 9.827981651376147e-05, + "loss": 0.1312, + "step": 10513 + }, + { + "epoch": 96.46, + "learning_rate": 9.824159021406729e-05, + "loss": 0.1684, + "step": 10514 + }, + { + "epoch": 96.47, + "learning_rate": 9.820336391437309e-05, + "loss": 0.0683, + "step": 10515 + }, + { + "epoch": 96.48, + "learning_rate": 9.816513761467891e-05, + "loss": 0.1148, + "step": 10516 + }, + { + "epoch": 96.49, + "learning_rate": 9.812691131498471e-05, + "loss": 0.0958, + "step": 10517 + }, + { + "epoch": 96.5, + "learning_rate": 9.808868501529052e-05, + "loss": 0.1225, + "step": 10518 + }, + { + "epoch": 96.5, + "learning_rate": 9.805045871559632e-05, + "loss": 0.1302, + "step": 10519 + }, + { + "epoch": 96.51, + "learning_rate": 9.801223241590214e-05, + "loss": 0.1702, + "step": 10520 + }, + { + "epoch": 96.52, + "learning_rate": 9.797400611620794e-05, + "loss": 0.1444, + "step": 10521 + }, + { + "epoch": 96.53, + "learning_rate": 9.793577981651376e-05, + "loss": 0.1642, + "step": 10522 + }, + { + "epoch": 96.54, + "learning_rate": 9.789755351681958e-05, + "loss": 0.1328, + "step": 10523 + }, + { + "epoch": 96.55, + "learning_rate": 9.785932721712538e-05, + "loss": 0.1216, + "step": 10524 + }, + { + "epoch": 96.56, + "learning_rate": 9.78211009174312e-05, + "loss": 0.1151, + "step": 10525 + }, + { + "epoch": 96.57, + "learning_rate": 9.7782874617737e-05, + "loss": 0.0657, + "step": 10526 + }, + { + "epoch": 96.58, + "learning_rate": 9.774464831804282e-05, + "loss": 0.0773, + "step": 10527 + }, + { + "epoch": 96.59, + "learning_rate": 9.770642201834863e-05, + "loss": 0.1217, + "step": 10528 + }, + { + "epoch": 96.6, + "learning_rate": 9.766819571865445e-05, + "loss": 0.0877, + "step": 10529 + }, + { + "epoch": 96.61, + "learning_rate": 9.762996941896024e-05, + "loss": 0.1168, + "step": 10530 + }, + { + "epoch": 96.61, + "learning_rate": 9.759174311926605e-05, + "loss": 0.0754, + "step": 10531 + }, + { + "epoch": 96.62, + "learning_rate": 9.755351681957186e-05, + "loss": 0.0723, + "step": 10532 + }, + { + "epoch": 96.63, + "learning_rate": 9.751529051987768e-05, + "loss": 0.1496, + "step": 10533 + }, + { + "epoch": 96.64, + "learning_rate": 9.747706422018348e-05, + "loss": 0.1072, + "step": 10534 + }, + { + "epoch": 96.65, + "learning_rate": 9.74388379204893e-05, + "loss": 0.141, + "step": 10535 + }, + { + "epoch": 96.66, + "learning_rate": 9.740061162079512e-05, + "loss": 0.0756, + "step": 10536 + }, + { + "epoch": 96.67, + "learning_rate": 9.736238532110092e-05, + "loss": 0.1152, + "step": 10537 + }, + { + "epoch": 96.68, + "learning_rate": 9.732415902140674e-05, + "loss": 0.1491, + "step": 10538 + }, + { + "epoch": 96.69, + "learning_rate": 9.728593272171254e-05, + "loss": 0.0857, + "step": 10539 + }, + { + "epoch": 96.7, + "learning_rate": 9.724770642201836e-05, + "loss": 0.0887, + "step": 10540 + }, + { + "epoch": 96.71, + "learning_rate": 9.720948012232417e-05, + "loss": 0.0449, + "step": 10541 + }, + { + "epoch": 96.72, + "learning_rate": 9.717125382262997e-05, + "loss": 0.0635, + "step": 10542 + }, + { + "epoch": 96.72, + "learning_rate": 9.713302752293577e-05, + "loss": 0.0883, + "step": 10543 + }, + { + "epoch": 96.73, + "learning_rate": 9.709480122324159e-05, + "loss": 0.1412, + "step": 10544 + }, + { + "epoch": 96.74, + "learning_rate": 9.70565749235474e-05, + "loss": 0.143, + "step": 10545 + }, + { + "epoch": 96.75, + "learning_rate": 9.701834862385321e-05, + "loss": 0.1546, + "step": 10546 + }, + { + "epoch": 96.76, + "learning_rate": 9.698012232415902e-05, + "loss": 0.1197, + "step": 10547 + }, + { + "epoch": 96.77, + "learning_rate": 9.694189602446484e-05, + "loss": 0.1359, + "step": 10548 + }, + { + "epoch": 96.78, + "learning_rate": 9.690366972477064e-05, + "loss": 0.1124, + "step": 10549 + }, + { + "epoch": 96.79, + "learning_rate": 9.686544342507646e-05, + "loss": 0.1273, + "step": 10550 + }, + { + "epoch": 96.8, + "learning_rate": 9.682721712538228e-05, + "loss": 0.1145, + "step": 10551 + }, + { + "epoch": 96.81, + "learning_rate": 9.678899082568808e-05, + "loss": 0.0947, + "step": 10552 + }, + { + "epoch": 96.82, + "learning_rate": 9.67507645259939e-05, + "loss": 0.0917, + "step": 10553 + }, + { + "epoch": 96.83, + "learning_rate": 9.671253822629969e-05, + "loss": 0.1067, + "step": 10554 + }, + { + "epoch": 96.83, + "learning_rate": 9.66743119266055e-05, + "loss": 0.0778, + "step": 10555 + }, + { + "epoch": 96.84, + "learning_rate": 9.663608562691131e-05, + "loss": 0.0993, + "step": 10556 + }, + { + "epoch": 96.85, + "learning_rate": 9.659785932721713e-05, + "loss": 0.0875, + "step": 10557 + }, + { + "epoch": 96.86, + "learning_rate": 9.655963302752293e-05, + "loss": 0.0887, + "step": 10558 + }, + { + "epoch": 96.87, + "learning_rate": 9.652140672782875e-05, + "loss": 0.0695, + "step": 10559 + }, + { + "epoch": 96.88, + "learning_rate": 9.648318042813455e-05, + "loss": 0.1077, + "step": 10560 + }, + { + "epoch": 96.89, + "learning_rate": 9.644495412844037e-05, + "loss": 0.1115, + "step": 10561 + }, + { + "epoch": 96.9, + "learning_rate": 9.640672782874618e-05, + "loss": 0.135, + "step": 10562 + }, + { + "epoch": 96.91, + "learning_rate": 9.6368501529052e-05, + "loss": 0.0785, + "step": 10563 + }, + { + "epoch": 96.92, + "learning_rate": 9.63302752293578e-05, + "loss": 0.142, + "step": 10564 + }, + { + "epoch": 96.93, + "learning_rate": 9.629204892966362e-05, + "loss": 0.1055, + "step": 10565 + }, + { + "epoch": 96.94, + "learning_rate": 9.625382262996942e-05, + "loss": 0.1395, + "step": 10566 + }, + { + "epoch": 96.94, + "learning_rate": 9.621559633027523e-05, + "loss": 0.1324, + "step": 10567 + }, + { + "epoch": 96.95, + "learning_rate": 9.617737003058104e-05, + "loss": 0.0849, + "step": 10568 + }, + { + "epoch": 96.96, + "learning_rate": 9.613914373088685e-05, + "loss": 0.1442, + "step": 10569 + }, + { + "epoch": 96.97, + "learning_rate": 9.610091743119267e-05, + "loss": 0.1004, + "step": 10570 + }, + { + "epoch": 96.98, + "learning_rate": 9.606269113149847e-05, + "loss": 0.1033, + "step": 10571 + }, + { + "epoch": 96.99, + "learning_rate": 9.602446483180429e-05, + "loss": 0.1199, + "step": 10572 + }, + { + "epoch": 97.0, + "learning_rate": 9.598623853211009e-05, + "loss": 0.0849, + "step": 10573 + }, + { + "epoch": 97.01, + "learning_rate": 9.594801223241591e-05, + "loss": 0.1637, + "step": 10574 + }, + { + "epoch": 97.02, + "learning_rate": 9.590978593272171e-05, + "loss": 0.135, + "step": 10575 + }, + { + "epoch": 97.03, + "learning_rate": 9.587155963302753e-05, + "loss": 0.1242, + "step": 10576 + }, + { + "epoch": 97.04, + "learning_rate": 9.583333333333334e-05, + "loss": 0.1277, + "step": 10577 + }, + { + "epoch": 97.05, + "learning_rate": 9.579510703363914e-05, + "loss": 0.1904, + "step": 10578 + }, + { + "epoch": 97.06, + "learning_rate": 9.575688073394494e-05, + "loss": 0.1065, + "step": 10579 + }, + { + "epoch": 97.06, + "learning_rate": 9.571865443425076e-05, + "loss": 0.0857, + "step": 10580 + }, + { + "epoch": 97.07, + "learning_rate": 9.568042813455658e-05, + "loss": 0.1157, + "step": 10581 + }, + { + "epoch": 97.08, + "learning_rate": 9.564220183486238e-05, + "loss": 0.1409, + "step": 10582 + }, + { + "epoch": 97.09, + "learning_rate": 9.56039755351682e-05, + "loss": 0.1194, + "step": 10583 + }, + { + "epoch": 97.1, + "learning_rate": 9.556574923547401e-05, + "loss": 0.1039, + "step": 10584 + }, + { + "epoch": 97.11, + "learning_rate": 9.552752293577982e-05, + "loss": 0.1274, + "step": 10585 + }, + { + "epoch": 97.12, + "learning_rate": 9.548929663608563e-05, + "loss": 0.1331, + "step": 10586 + }, + { + "epoch": 97.13, + "learning_rate": 9.545107033639145e-05, + "loss": 0.0853, + "step": 10587 + }, + { + "epoch": 97.14, + "learning_rate": 9.541284403669725e-05, + "loss": 0.0938, + "step": 10588 + }, + { + "epoch": 97.15, + "learning_rate": 9.537461773700307e-05, + "loss": 0.0918, + "step": 10589 + }, + { + "epoch": 97.16, + "learning_rate": 9.533639143730886e-05, + "loss": 0.1407, + "step": 10590 + }, + { + "epoch": 97.17, + "learning_rate": 9.529816513761468e-05, + "loss": 0.1013, + "step": 10591 + }, + { + "epoch": 97.17, + "learning_rate": 9.525993883792048e-05, + "loss": 0.0936, + "step": 10592 + }, + { + "epoch": 97.18, + "learning_rate": 9.52217125382263e-05, + "loss": 0.1746, + "step": 10593 + }, + { + "epoch": 97.19, + "learning_rate": 9.51834862385321e-05, + "loss": 0.1102, + "step": 10594 + }, + { + "epoch": 97.2, + "learning_rate": 9.514525993883792e-05, + "loss": 0.1698, + "step": 10595 + }, + { + "epoch": 97.21, + "learning_rate": 9.510703363914374e-05, + "loss": 0.0952, + "step": 10596 + }, + { + "epoch": 97.22, + "learning_rate": 9.506880733944954e-05, + "loss": 0.1334, + "step": 10597 + }, + { + "epoch": 97.23, + "learning_rate": 9.503058103975536e-05, + "loss": 0.1066, + "step": 10598 + }, + { + "epoch": 97.24, + "learning_rate": 9.499235474006117e-05, + "loss": 0.1187, + "step": 10599 + }, + { + "epoch": 97.25, + "learning_rate": 9.495412844036698e-05, + "loss": 0.0963, + "step": 10600 + }, + { + "epoch": 97.26, + "learning_rate": 9.491590214067279e-05, + "loss": 0.1839, + "step": 10601 + }, + { + "epoch": 97.27, + "learning_rate": 9.487767584097859e-05, + "loss": 0.1484, + "step": 10602 + }, + { + "epoch": 97.28, + "learning_rate": 9.48394495412844e-05, + "loss": 0.1716, + "step": 10603 + }, + { + "epoch": 97.28, + "learning_rate": 9.480122324159021e-05, + "loss": 0.1419, + "step": 10604 + }, + { + "epoch": 97.29, + "learning_rate": 9.476299694189602e-05, + "loss": 0.1404, + "step": 10605 + }, + { + "epoch": 97.3, + "learning_rate": 9.472477064220184e-05, + "loss": 0.1667, + "step": 10606 + }, + { + "epoch": 97.31, + "learning_rate": 9.468654434250764e-05, + "loss": 0.1162, + "step": 10607 + }, + { + "epoch": 97.32, + "learning_rate": 9.464831804281346e-05, + "loss": 0.0987, + "step": 10608 + }, + { + "epoch": 97.33, + "learning_rate": 9.461009174311926e-05, + "loss": 0.1306, + "step": 10609 + }, + { + "epoch": 97.34, + "learning_rate": 9.457186544342508e-05, + "loss": 0.0856, + "step": 10610 + }, + { + "epoch": 97.35, + "learning_rate": 9.45336391437309e-05, + "loss": 0.1073, + "step": 10611 + }, + { + "epoch": 97.36, + "learning_rate": 9.44954128440367e-05, + "loss": 0.0922, + "step": 10612 + }, + { + "epoch": 97.37, + "learning_rate": 9.445718654434252e-05, + "loss": 0.1296, + "step": 10613 + }, + { + "epoch": 97.38, + "learning_rate": 9.441896024464831e-05, + "loss": 0.1814, + "step": 10614 + }, + { + "epoch": 97.39, + "learning_rate": 9.438073394495413e-05, + "loss": 0.1006, + "step": 10615 + }, + { + "epoch": 97.39, + "learning_rate": 9.434250764525993e-05, + "loss": 0.1703, + "step": 10616 + }, + { + "epoch": 97.4, + "learning_rate": 9.430428134556575e-05, + "loss": 0.1023, + "step": 10617 + }, + { + "epoch": 97.41, + "learning_rate": 9.426605504587156e-05, + "loss": 0.1091, + "step": 10618 + }, + { + "epoch": 97.42, + "learning_rate": 9.422782874617737e-05, + "loss": 0.1287, + "step": 10619 + }, + { + "epoch": 97.43, + "learning_rate": 9.418960244648318e-05, + "loss": 0.1212, + "step": 10620 + }, + { + "epoch": 97.44, + "learning_rate": 9.4151376146789e-05, + "loss": 0.215, + "step": 10621 + }, + { + "epoch": 97.45, + "learning_rate": 9.41131498470948e-05, + "loss": 0.1602, + "step": 10622 + }, + { + "epoch": 97.46, + "learning_rate": 9.407492354740062e-05, + "loss": 0.0687, + "step": 10623 + }, + { + "epoch": 97.47, + "learning_rate": 9.403669724770644e-05, + "loss": 0.1355, + "step": 10624 + }, + { + "epoch": 97.48, + "learning_rate": 9.399847094801224e-05, + "loss": 0.0946, + "step": 10625 + }, + { + "epoch": 97.49, + "learning_rate": 9.396024464831804e-05, + "loss": 0.0603, + "step": 10626 + }, + { + "epoch": 97.5, + "learning_rate": 9.392201834862385e-05, + "loss": 0.0516, + "step": 10627 + }, + { + "epoch": 97.5, + "learning_rate": 9.388379204892967e-05, + "loss": 0.1725, + "step": 10628 + }, + { + "epoch": 97.51, + "learning_rate": 9.384556574923547e-05, + "loss": 0.1566, + "step": 10629 + }, + { + "epoch": 97.52, + "learning_rate": 9.380733944954129e-05, + "loss": 0.1736, + "step": 10630 + }, + { + "epoch": 97.53, + "learning_rate": 9.376911314984709e-05, + "loss": 0.1354, + "step": 10631 + }, + { + "epoch": 97.54, + "learning_rate": 9.373088685015291e-05, + "loss": 0.1036, + "step": 10632 + }, + { + "epoch": 97.55, + "learning_rate": 9.369266055045872e-05, + "loss": 0.0817, + "step": 10633 + }, + { + "epoch": 97.56, + "learning_rate": 9.365443425076453e-05, + "loss": 0.1002, + "step": 10634 + }, + { + "epoch": 97.57, + "learning_rate": 9.361620795107034e-05, + "loss": 0.1238, + "step": 10635 + }, + { + "epoch": 97.58, + "learning_rate": 9.357798165137616e-05, + "loss": 0.0902, + "step": 10636 + }, + { + "epoch": 97.59, + "learning_rate": 9.353975535168196e-05, + "loss": 0.09, + "step": 10637 + }, + { + "epoch": 97.6, + "learning_rate": 9.350152905198776e-05, + "loss": 0.1204, + "step": 10638 + }, + { + "epoch": 97.61, + "learning_rate": 9.346330275229357e-05, + "loss": 0.0981, + "step": 10639 + }, + { + "epoch": 97.61, + "learning_rate": 9.342507645259939e-05, + "loss": 0.1004, + "step": 10640 + }, + { + "epoch": 97.62, + "learning_rate": 9.33868501529052e-05, + "loss": 0.1542, + "step": 10641 + }, + { + "epoch": 97.63, + "learning_rate": 9.334862385321101e-05, + "loss": 0.1231, + "step": 10642 + }, + { + "epoch": 97.64, + "learning_rate": 9.331039755351683e-05, + "loss": 0.1078, + "step": 10643 + }, + { + "epoch": 97.65, + "learning_rate": 9.327217125382263e-05, + "loss": 0.1091, + "step": 10644 + }, + { + "epoch": 97.66, + "learning_rate": 9.323394495412845e-05, + "loss": 0.1045, + "step": 10645 + }, + { + "epoch": 97.67, + "learning_rate": 9.319571865443425e-05, + "loss": 0.0999, + "step": 10646 + }, + { + "epoch": 97.68, + "learning_rate": 9.315749235474007e-05, + "loss": 0.1106, + "step": 10647 + }, + { + "epoch": 97.69, + "learning_rate": 9.311926605504587e-05, + "loss": 0.1104, + "step": 10648 + }, + { + "epoch": 97.7, + "learning_rate": 9.308103975535169e-05, + "loss": 0.0963, + "step": 10649 + }, + { + "epoch": 97.71, + "learning_rate": 9.304281345565748e-05, + "loss": 0.1252, + "step": 10650 + }, + { + "epoch": 97.72, + "learning_rate": 9.30045871559633e-05, + "loss": 0.0703, + "step": 10651 + }, + { + "epoch": 97.72, + "learning_rate": 9.29663608562691e-05, + "loss": 0.1013, + "step": 10652 + }, + { + "epoch": 97.73, + "learning_rate": 9.292813455657492e-05, + "loss": 0.1037, + "step": 10653 + }, + { + "epoch": 97.74, + "learning_rate": 9.288990825688074e-05, + "loss": 0.0295, + "step": 10654 + }, + { + "epoch": 97.75, + "learning_rate": 9.285168195718655e-05, + "loss": 0.1799, + "step": 10655 + }, + { + "epoch": 97.76, + "learning_rate": 9.281345565749236e-05, + "loss": 0.1424, + "step": 10656 + }, + { + "epoch": 97.77, + "learning_rate": 9.277522935779817e-05, + "loss": 0.1223, + "step": 10657 + }, + { + "epoch": 97.78, + "learning_rate": 9.273700305810398e-05, + "loss": 0.1, + "step": 10658 + }, + { + "epoch": 97.79, + "learning_rate": 9.269877675840979e-05, + "loss": 0.129, + "step": 10659 + }, + { + "epoch": 97.8, + "learning_rate": 9.266055045871561e-05, + "loss": 0.1149, + "step": 10660 + }, + { + "epoch": 97.81, + "learning_rate": 9.262232415902141e-05, + "loss": 0.1512, + "step": 10661 + }, + { + "epoch": 97.82, + "learning_rate": 9.258409785932722e-05, + "loss": 0.1736, + "step": 10662 + }, + { + "epoch": 97.83, + "learning_rate": 9.254587155963302e-05, + "loss": 0.092, + "step": 10663 + }, + { + "epoch": 97.83, + "learning_rate": 9.250764525993884e-05, + "loss": 0.1595, + "step": 10664 + }, + { + "epoch": 97.84, + "learning_rate": 9.246941896024464e-05, + "loss": 0.1275, + "step": 10665 + }, + { + "epoch": 97.85, + "learning_rate": 9.243119266055046e-05, + "loss": 0.1404, + "step": 10666 + }, + { + "epoch": 97.86, + "learning_rate": 9.239296636085626e-05, + "loss": 0.1071, + "step": 10667 + }, + { + "epoch": 97.87, + "learning_rate": 9.235474006116208e-05, + "loss": 0.15, + "step": 10668 + }, + { + "epoch": 97.88, + "learning_rate": 9.23165137614679e-05, + "loss": 0.1968, + "step": 10669 + }, + { + "epoch": 97.89, + "learning_rate": 9.22782874617737e-05, + "loss": 0.1037, + "step": 10670 + }, + { + "epoch": 97.9, + "learning_rate": 9.224006116207952e-05, + "loss": 0.1442, + "step": 10671 + }, + { + "epoch": 97.91, + "learning_rate": 9.220183486238533e-05, + "loss": 0.145, + "step": 10672 + }, + { + "epoch": 97.92, + "learning_rate": 9.216360856269114e-05, + "loss": 0.0877, + "step": 10673 + }, + { + "epoch": 97.93, + "learning_rate": 9.212538226299693e-05, + "loss": 0.0991, + "step": 10674 + }, + { + "epoch": 97.94, + "learning_rate": 9.208715596330275e-05, + "loss": 0.1057, + "step": 10675 + }, + { + "epoch": 97.94, + "learning_rate": 9.204892966360856e-05, + "loss": 0.082, + "step": 10676 + }, + { + "epoch": 97.95, + "learning_rate": 9.201070336391437e-05, + "loss": 0.1038, + "step": 10677 + }, + { + "epoch": 97.96, + "learning_rate": 9.197247706422018e-05, + "loss": 0.1058, + "step": 10678 + }, + { + "epoch": 97.97, + "learning_rate": 9.1934250764526e-05, + "loss": 0.1413, + "step": 10679 + }, + { + "epoch": 97.98, + "learning_rate": 9.18960244648318e-05, + "loss": 0.1176, + "step": 10680 + }, + { + "epoch": 97.99, + "learning_rate": 9.185779816513762e-05, + "loss": 0.0353, + "step": 10681 + }, + { + "epoch": 98.0, + "learning_rate": 9.181957186544342e-05, + "loss": 0.0801, + "step": 10682 + }, + { + "epoch": 98.01, + "learning_rate": 9.178134556574924e-05, + "loss": 0.2246, + "step": 10683 + }, + { + "epoch": 98.02, + "learning_rate": 9.174311926605506e-05, + "loss": 0.1257, + "step": 10684 + }, + { + "epoch": 98.03, + "learning_rate": 9.170489296636086e-05, + "loss": 0.1353, + "step": 10685 + }, + { + "epoch": 98.04, + "learning_rate": 9.166666666666667e-05, + "loss": 0.1157, + "step": 10686 + }, + { + "epoch": 98.05, + "learning_rate": 9.162844036697247e-05, + "loss": 0.171, + "step": 10687 + }, + { + "epoch": 98.06, + "learning_rate": 9.159021406727829e-05, + "loss": 0.1292, + "step": 10688 + }, + { + "epoch": 98.06, + "learning_rate": 9.15519877675841e-05, + "loss": 0.1498, + "step": 10689 + }, + { + "epoch": 98.07, + "learning_rate": 9.151376146788991e-05, + "loss": 0.1411, + "step": 10690 + }, + { + "epoch": 98.08, + "learning_rate": 9.147553516819572e-05, + "loss": 0.1448, + "step": 10691 + }, + { + "epoch": 98.09, + "learning_rate": 9.143730886850153e-05, + "loss": 0.1244, + "step": 10692 + }, + { + "epoch": 98.1, + "learning_rate": 9.139908256880734e-05, + "loss": 0.1452, + "step": 10693 + }, + { + "epoch": 98.11, + "learning_rate": 9.136085626911316e-05, + "loss": 0.1093, + "step": 10694 + }, + { + "epoch": 98.12, + "learning_rate": 9.132262996941896e-05, + "loss": 0.1058, + "step": 10695 + }, + { + "epoch": 98.13, + "learning_rate": 9.128440366972478e-05, + "loss": 0.1084, + "step": 10696 + }, + { + "epoch": 98.14, + "learning_rate": 9.124617737003058e-05, + "loss": 0.1302, + "step": 10697 + }, + { + "epoch": 98.15, + "learning_rate": 9.120795107033639e-05, + "loss": 0.1432, + "step": 10698 + }, + { + "epoch": 98.16, + "learning_rate": 9.11697247706422e-05, + "loss": 0.0703, + "step": 10699 + }, + { + "epoch": 98.17, + "learning_rate": 9.113149847094801e-05, + "loss": 0.1469, + "step": 10700 + }, + { + "epoch": 98.17, + "learning_rate": 9.109327217125383e-05, + "loss": 0.0913, + "step": 10701 + }, + { + "epoch": 98.18, + "learning_rate": 9.105504587155963e-05, + "loss": 0.1069, + "step": 10702 + }, + { + "epoch": 98.19, + "learning_rate": 9.101681957186545e-05, + "loss": 0.1676, + "step": 10703 + }, + { + "epoch": 98.2, + "learning_rate": 9.097859327217125e-05, + "loss": 0.0309, + "step": 10704 + }, + { + "epoch": 98.21, + "learning_rate": 9.094036697247707e-05, + "loss": 0.0768, + "step": 10705 + }, + { + "epoch": 98.22, + "learning_rate": 9.090214067278288e-05, + "loss": 0.1327, + "step": 10706 + }, + { + "epoch": 98.23, + "learning_rate": 9.08639143730887e-05, + "loss": 0.0996, + "step": 10707 + }, + { + "epoch": 98.24, + "learning_rate": 9.08256880733945e-05, + "loss": 0.1094, + "step": 10708 + }, + { + "epoch": 98.25, + "learning_rate": 9.078746177370032e-05, + "loss": 0.0903, + "step": 10709 + }, + { + "epoch": 98.26, + "learning_rate": 9.07492354740061e-05, + "loss": 0.1773, + "step": 10710 + }, + { + "epoch": 98.27, + "learning_rate": 9.071100917431192e-05, + "loss": 0.1687, + "step": 10711 + }, + { + "epoch": 98.28, + "learning_rate": 9.067278287461773e-05, + "loss": 0.1288, + "step": 10712 + }, + { + "epoch": 98.28, + "learning_rate": 9.063455657492355e-05, + "loss": 0.1103, + "step": 10713 + }, + { + "epoch": 98.29, + "learning_rate": 9.059633027522936e-05, + "loss": 0.1706, + "step": 10714 + }, + { + "epoch": 98.3, + "learning_rate": 9.055810397553517e-05, + "loss": 0.1381, + "step": 10715 + }, + { + "epoch": 98.31, + "learning_rate": 9.051987767584099e-05, + "loss": 0.108, + "step": 10716 + }, + { + "epoch": 98.32, + "learning_rate": 9.048165137614679e-05, + "loss": 0.1368, + "step": 10717 + }, + { + "epoch": 98.33, + "learning_rate": 9.044342507645261e-05, + "loss": 0.1698, + "step": 10718 + }, + { + "epoch": 98.34, + "learning_rate": 9.040519877675841e-05, + "loss": 0.1133, + "step": 10719 + }, + { + "epoch": 98.35, + "learning_rate": 9.036697247706423e-05, + "loss": 0.145, + "step": 10720 + }, + { + "epoch": 98.36, + "learning_rate": 9.032874617737003e-05, + "loss": 0.1637, + "step": 10721 + }, + { + "epoch": 98.37, + "learning_rate": 9.029051987767584e-05, + "loss": 0.1439, + "step": 10722 + }, + { + "epoch": 98.38, + "learning_rate": 9.025229357798164e-05, + "loss": 0.1301, + "step": 10723 + }, + { + "epoch": 98.39, + "learning_rate": 9.021406727828746e-05, + "loss": 0.1295, + "step": 10724 + }, + { + "epoch": 98.39, + "learning_rate": 9.017584097859327e-05, + "loss": 0.1513, + "step": 10725 + }, + { + "epoch": 98.4, + "learning_rate": 9.013761467889908e-05, + "loss": 0.1343, + "step": 10726 + }, + { + "epoch": 98.41, + "learning_rate": 9.009938837920489e-05, + "loss": 0.1275, + "step": 10727 + }, + { + "epoch": 98.42, + "learning_rate": 9.00611620795107e-05, + "loss": 0.105, + "step": 10728 + }, + { + "epoch": 98.43, + "learning_rate": 9.002293577981652e-05, + "loss": 0.1102, + "step": 10729 + }, + { + "epoch": 98.44, + "learning_rate": 8.998470948012233e-05, + "loss": 0.1167, + "step": 10730 + }, + { + "epoch": 98.45, + "learning_rate": 8.994648318042815e-05, + "loss": 0.0779, + "step": 10731 + }, + { + "epoch": 98.46, + "learning_rate": 8.990825688073395e-05, + "loss": 0.0744, + "step": 10732 + }, + { + "epoch": 98.47, + "learning_rate": 8.987003058103977e-05, + "loss": 0.1967, + "step": 10733 + }, + { + "epoch": 98.48, + "learning_rate": 8.983180428134556e-05, + "loss": 0.1088, + "step": 10734 + }, + { + "epoch": 98.49, + "learning_rate": 8.979357798165138e-05, + "loss": 0.0881, + "step": 10735 + }, + { + "epoch": 98.5, + "learning_rate": 8.975535168195718e-05, + "loss": 0.0291, + "step": 10736 + }, + { + "epoch": 98.5, + "learning_rate": 8.9717125382263e-05, + "loss": 0.179, + "step": 10737 + }, + { + "epoch": 98.51, + "learning_rate": 8.96788990825688e-05, + "loss": 0.1708, + "step": 10738 + }, + { + "epoch": 98.52, + "learning_rate": 8.964067278287462e-05, + "loss": 0.1168, + "step": 10739 + }, + { + "epoch": 98.53, + "learning_rate": 8.960244648318042e-05, + "loss": 0.1364, + "step": 10740 + }, + { + "epoch": 98.54, + "learning_rate": 8.956422018348624e-05, + "loss": 0.1683, + "step": 10741 + }, + { + "epoch": 98.55, + "learning_rate": 8.952599388379206e-05, + "loss": 0.1355, + "step": 10742 + }, + { + "epoch": 98.56, + "learning_rate": 8.948776758409786e-05, + "loss": 0.1637, + "step": 10743 + }, + { + "epoch": 98.57, + "learning_rate": 8.944954128440368e-05, + "loss": 0.1265, + "step": 10744 + }, + { + "epoch": 98.58, + "learning_rate": 8.941131498470949e-05, + "loss": 0.1094, + "step": 10745 + }, + { + "epoch": 98.59, + "learning_rate": 8.937308868501529e-05, + "loss": 0.1575, + "step": 10746 + }, + { + "epoch": 98.6, + "learning_rate": 8.93348623853211e-05, + "loss": 0.1514, + "step": 10747 + }, + { + "epoch": 98.61, + "learning_rate": 8.929663608562691e-05, + "loss": 0.1838, + "step": 10748 + }, + { + "epoch": 98.61, + "learning_rate": 8.925840978593272e-05, + "loss": 0.0822, + "step": 10749 + }, + { + "epoch": 98.62, + "learning_rate": 8.922018348623854e-05, + "loss": 0.1029, + "step": 10750 + }, + { + "epoch": 98.63, + "learning_rate": 8.918195718654434e-05, + "loss": 0.1407, + "step": 10751 + }, + { + "epoch": 98.64, + "learning_rate": 8.914373088685016e-05, + "loss": 0.0543, + "step": 10752 + }, + { + "epoch": 98.65, + "learning_rate": 8.910550458715596e-05, + "loss": 0.0513, + "step": 10753 + }, + { + "epoch": 98.66, + "learning_rate": 8.906727828746178e-05, + "loss": 0.1082, + "step": 10754 + }, + { + "epoch": 98.67, + "learning_rate": 8.902905198776758e-05, + "loss": 0.1045, + "step": 10755 + }, + { + "epoch": 98.68, + "learning_rate": 8.89908256880734e-05, + "loss": 0.1243, + "step": 10756 + }, + { + "epoch": 98.69, + "learning_rate": 8.895259938837922e-05, + "loss": 0.0912, + "step": 10757 + }, + { + "epoch": 98.7, + "learning_rate": 8.891437308868501e-05, + "loss": 0.1492, + "step": 10758 + }, + { + "epoch": 98.71, + "learning_rate": 8.887614678899083e-05, + "loss": 0.1033, + "step": 10759 + }, + { + "epoch": 98.72, + "learning_rate": 8.883792048929663e-05, + "loss": 0.1331, + "step": 10760 + }, + { + "epoch": 98.72, + "learning_rate": 8.879969418960245e-05, + "loss": 0.094, + "step": 10761 + }, + { + "epoch": 98.73, + "learning_rate": 8.876146788990825e-05, + "loss": 0.0711, + "step": 10762 + }, + { + "epoch": 98.74, + "learning_rate": 8.872324159021407e-05, + "loss": 0.0863, + "step": 10763 + }, + { + "epoch": 98.75, + "learning_rate": 8.868501529051988e-05, + "loss": 0.187, + "step": 10764 + }, + { + "epoch": 98.76, + "learning_rate": 8.86467889908257e-05, + "loss": 0.1062, + "step": 10765 + }, + { + "epoch": 98.77, + "learning_rate": 8.86085626911315e-05, + "loss": 0.1589, + "step": 10766 + }, + { + "epoch": 98.78, + "learning_rate": 8.857033639143732e-05, + "loss": 0.1476, + "step": 10767 + }, + { + "epoch": 98.79, + "learning_rate": 8.853211009174312e-05, + "loss": 0.1451, + "step": 10768 + }, + { + "epoch": 98.8, + "learning_rate": 8.849388379204894e-05, + "loss": 0.1703, + "step": 10769 + }, + { + "epoch": 98.81, + "learning_rate": 8.845565749235473e-05, + "loss": 0.1497, + "step": 10770 + }, + { + "epoch": 98.82, + "learning_rate": 8.841743119266055e-05, + "loss": 0.0998, + "step": 10771 + }, + { + "epoch": 98.83, + "learning_rate": 8.837920489296636e-05, + "loss": 0.1499, + "step": 10772 + }, + { + "epoch": 98.83, + "learning_rate": 8.834097859327217e-05, + "loss": 0.1156, + "step": 10773 + }, + { + "epoch": 98.84, + "learning_rate": 8.830275229357799e-05, + "loss": 0.1122, + "step": 10774 + }, + { + "epoch": 98.85, + "learning_rate": 8.826452599388379e-05, + "loss": 0.0793, + "step": 10775 + }, + { + "epoch": 98.86, + "learning_rate": 8.822629969418961e-05, + "loss": 0.122, + "step": 10776 + }, + { + "epoch": 98.87, + "learning_rate": 8.818807339449541e-05, + "loss": 0.1644, + "step": 10777 + }, + { + "epoch": 98.88, + "learning_rate": 8.814984709480123e-05, + "loss": 0.1835, + "step": 10778 + }, + { + "epoch": 98.89, + "learning_rate": 8.811162079510704e-05, + "loss": 0.0807, + "step": 10779 + }, + { + "epoch": 98.9, + "learning_rate": 8.807339449541285e-05, + "loss": 0.1641, + "step": 10780 + }, + { + "epoch": 98.91, + "learning_rate": 8.803516819571866e-05, + "loss": 0.0923, + "step": 10781 + }, + { + "epoch": 98.92, + "learning_rate": 8.799694189602446e-05, + "loss": 0.1026, + "step": 10782 + }, + { + "epoch": 98.93, + "learning_rate": 8.795871559633027e-05, + "loss": 0.1223, + "step": 10783 + }, + { + "epoch": 98.94, + "learning_rate": 8.792048929663608e-05, + "loss": 0.1044, + "step": 10784 + }, + { + "epoch": 98.94, + "learning_rate": 8.788226299694189e-05, + "loss": 0.2322, + "step": 10785 + }, + { + "epoch": 98.95, + "learning_rate": 8.78440366972477e-05, + "loss": 0.06, + "step": 10786 + }, + { + "epoch": 98.96, + "learning_rate": 8.780581039755352e-05, + "loss": 0.102, + "step": 10787 + }, + { + "epoch": 98.97, + "learning_rate": 8.776758409785933e-05, + "loss": 0.1075, + "step": 10788 + }, + { + "epoch": 98.98, + "learning_rate": 8.772935779816515e-05, + "loss": 0.1429, + "step": 10789 + }, + { + "epoch": 98.99, + "learning_rate": 8.769113149847095e-05, + "loss": 0.0324, + "step": 10790 + }, + { + "epoch": 99.0, + "learning_rate": 8.765290519877677e-05, + "loss": 0.1515, + "step": 10791 + }, + { + "epoch": 99.01, + "learning_rate": 8.761467889908257e-05, + "loss": 0.2012, + "step": 10792 + }, + { + "epoch": 99.02, + "learning_rate": 8.757645259938839e-05, + "loss": 0.1535, + "step": 10793 + }, + { + "epoch": 99.03, + "learning_rate": 8.753822629969418e-05, + "loss": 0.137, + "step": 10794 + }, + { + "epoch": 99.04, + "learning_rate": 8.75e-05, + "loss": 0.143, + "step": 10795 + }, + { + "epoch": 99.05, + "learning_rate": 8.74617737003058e-05, + "loss": 0.1264, + "step": 10796 + }, + { + "epoch": 99.06, + "learning_rate": 8.742354740061162e-05, + "loss": 0.1716, + "step": 10797 + }, + { + "epoch": 99.06, + "learning_rate": 8.738532110091743e-05, + "loss": 0.1515, + "step": 10798 + }, + { + "epoch": 99.07, + "learning_rate": 8.734709480122324e-05, + "loss": 0.1232, + "step": 10799 + }, + { + "epoch": 99.08, + "learning_rate": 8.730886850152905e-05, + "loss": 0.1492, + "step": 10800 + }, + { + "epoch": 99.09, + "learning_rate": 8.727064220183487e-05, + "loss": 0.0875, + "step": 10801 + }, + { + "epoch": 99.1, + "learning_rate": 8.723241590214068e-05, + "loss": 0.16, + "step": 10802 + }, + { + "epoch": 99.11, + "learning_rate": 8.719418960244649e-05, + "loss": 0.1293, + "step": 10803 + }, + { + "epoch": 99.12, + "learning_rate": 8.71559633027523e-05, + "loss": 0.0972, + "step": 10804 + }, + { + "epoch": 99.13, + "learning_rate": 8.711773700305811e-05, + "loss": 0.1757, + "step": 10805 + }, + { + "epoch": 99.14, + "learning_rate": 8.707951070336391e-05, + "loss": 0.1331, + "step": 10806 + }, + { + "epoch": 99.15, + "learning_rate": 8.704128440366972e-05, + "loss": 0.1428, + "step": 10807 + }, + { + "epoch": 99.16, + "learning_rate": 8.700305810397554e-05, + "loss": 0.0453, + "step": 10808 + }, + { + "epoch": 99.17, + "learning_rate": 8.696483180428134e-05, + "loss": 0.147, + "step": 10809 + }, + { + "epoch": 99.17, + "learning_rate": 8.692660550458716e-05, + "loss": 0.1011, + "step": 10810 + }, + { + "epoch": 99.18, + "learning_rate": 8.688837920489296e-05, + "loss": 0.138, + "step": 10811 + }, + { + "epoch": 99.19, + "learning_rate": 8.685015290519878e-05, + "loss": 0.1202, + "step": 10812 + }, + { + "epoch": 99.2, + "learning_rate": 8.681192660550458e-05, + "loss": 0.156, + "step": 10813 + }, + { + "epoch": 99.21, + "learning_rate": 8.67737003058104e-05, + "loss": 0.0772, + "step": 10814 + }, + { + "epoch": 99.22, + "learning_rate": 8.673547400611621e-05, + "loss": 0.143, + "step": 10815 + }, + { + "epoch": 99.23, + "learning_rate": 8.669724770642202e-05, + "loss": 0.0994, + "step": 10816 + }, + { + "epoch": 99.24, + "learning_rate": 8.665902140672784e-05, + "loss": 0.129, + "step": 10817 + }, + { + "epoch": 99.25, + "learning_rate": 8.662079510703363e-05, + "loss": 0.087, + "step": 10818 + }, + { + "epoch": 99.26, + "learning_rate": 8.658256880733945e-05, + "loss": 0.1696, + "step": 10819 + }, + { + "epoch": 99.27, + "learning_rate": 8.654434250764526e-05, + "loss": 0.1857, + "step": 10820 + }, + { + "epoch": 99.28, + "learning_rate": 8.650611620795107e-05, + "loss": 0.1341, + "step": 10821 + }, + { + "epoch": 99.28, + "learning_rate": 8.646788990825688e-05, + "loss": 0.139, + "step": 10822 + }, + { + "epoch": 99.29, + "learning_rate": 8.64296636085627e-05, + "loss": 0.1224, + "step": 10823 + }, + { + "epoch": 99.3, + "learning_rate": 8.63914373088685e-05, + "loss": 0.135, + "step": 10824 + }, + { + "epoch": 99.31, + "learning_rate": 8.635321100917432e-05, + "loss": 0.1548, + "step": 10825 + }, + { + "epoch": 99.32, + "learning_rate": 8.631498470948012e-05, + "loss": 0.1271, + "step": 10826 + }, + { + "epoch": 99.33, + "learning_rate": 8.627675840978594e-05, + "loss": 0.1563, + "step": 10827 + }, + { + "epoch": 99.34, + "learning_rate": 8.623853211009174e-05, + "loss": 0.0791, + "step": 10828 + }, + { + "epoch": 99.35, + "learning_rate": 8.620030581039756e-05, + "loss": 0.1151, + "step": 10829 + }, + { + "epoch": 99.36, + "learning_rate": 8.616207951070335e-05, + "loss": 0.0829, + "step": 10830 + }, + { + "epoch": 99.37, + "learning_rate": 8.612385321100917e-05, + "loss": 0.0801, + "step": 10831 + }, + { + "epoch": 99.38, + "learning_rate": 8.608562691131499e-05, + "loss": 0.1483, + "step": 10832 + }, + { + "epoch": 99.39, + "learning_rate": 8.604740061162079e-05, + "loss": 0.17, + "step": 10833 + }, + { + "epoch": 99.39, + "learning_rate": 8.600917431192661e-05, + "loss": 0.1207, + "step": 10834 + }, + { + "epoch": 99.4, + "learning_rate": 8.597094801223241e-05, + "loss": 0.0917, + "step": 10835 + }, + { + "epoch": 99.41, + "learning_rate": 8.593272171253823e-05, + "loss": 0.1287, + "step": 10836 + }, + { + "epoch": 99.42, + "learning_rate": 8.589449541284404e-05, + "loss": 0.107, + "step": 10837 + }, + { + "epoch": 99.43, + "learning_rate": 8.585626911314985e-05, + "loss": 0.0868, + "step": 10838 + }, + { + "epoch": 99.44, + "learning_rate": 8.581804281345566e-05, + "loss": 0.094, + "step": 10839 + }, + { + "epoch": 99.45, + "learning_rate": 8.577981651376148e-05, + "loss": 0.0835, + "step": 10840 + }, + { + "epoch": 99.46, + "learning_rate": 8.574159021406728e-05, + "loss": 0.0495, + "step": 10841 + }, + { + "epoch": 99.47, + "learning_rate": 8.570336391437309e-05, + "loss": 0.104, + "step": 10842 + }, + { + "epoch": 99.48, + "learning_rate": 8.566513761467889e-05, + "loss": 0.1175, + "step": 10843 + }, + { + "epoch": 99.49, + "learning_rate": 8.562691131498471e-05, + "loss": 0.1384, + "step": 10844 + }, + { + "epoch": 99.5, + "learning_rate": 8.558868501529051e-05, + "loss": 0.0741, + "step": 10845 + }, + { + "epoch": 99.5, + "learning_rate": 8.555045871559633e-05, + "loss": 0.2468, + "step": 10846 + }, + { + "epoch": 99.51, + "learning_rate": 8.551223241590215e-05, + "loss": 0.1536, + "step": 10847 + }, + { + "epoch": 99.52, + "learning_rate": 8.547400611620795e-05, + "loss": 0.1348, + "step": 10848 + }, + { + "epoch": 99.53, + "learning_rate": 8.543577981651377e-05, + "loss": 0.1301, + "step": 10849 + }, + { + "epoch": 99.54, + "learning_rate": 8.539755351681957e-05, + "loss": 0.1523, + "step": 10850 + }, + { + "epoch": 99.55, + "learning_rate": 8.535932721712539e-05, + "loss": 0.0997, + "step": 10851 + }, + { + "epoch": 99.56, + "learning_rate": 8.53211009174312e-05, + "loss": 0.1762, + "step": 10852 + }, + { + "epoch": 99.57, + "learning_rate": 8.528287461773701e-05, + "loss": 0.113, + "step": 10853 + }, + { + "epoch": 99.58, + "learning_rate": 8.52446483180428e-05, + "loss": 0.141, + "step": 10854 + }, + { + "epoch": 99.59, + "learning_rate": 8.520642201834862e-05, + "loss": 0.1972, + "step": 10855 + }, + { + "epoch": 99.6, + "learning_rate": 8.516819571865443e-05, + "loss": 0.0946, + "step": 10856 + }, + { + "epoch": 99.61, + "learning_rate": 8.512996941896024e-05, + "loss": 0.1621, + "step": 10857 + }, + { + "epoch": 99.61, + "learning_rate": 8.509174311926605e-05, + "loss": 0.086, + "step": 10858 + }, + { + "epoch": 99.62, + "learning_rate": 8.505351681957187e-05, + "loss": 0.1551, + "step": 10859 + }, + { + "epoch": 99.63, + "learning_rate": 8.501529051987768e-05, + "loss": 0.1505, + "step": 10860 + }, + { + "epoch": 99.64, + "learning_rate": 8.497706422018349e-05, + "loss": 0.1204, + "step": 10861 + }, + { + "epoch": 99.65, + "learning_rate": 8.49388379204893e-05, + "loss": 0.0843, + "step": 10862 + }, + { + "epoch": 99.66, + "learning_rate": 8.490061162079511e-05, + "loss": 0.0816, + "step": 10863 + }, + { + "epoch": 99.67, + "learning_rate": 8.486238532110093e-05, + "loss": 0.1079, + "step": 10864 + }, + { + "epoch": 99.68, + "learning_rate": 8.482415902140673e-05, + "loss": 0.1357, + "step": 10865 + }, + { + "epoch": 99.69, + "learning_rate": 8.478593272171254e-05, + "loss": 0.0843, + "step": 10866 + }, + { + "epoch": 99.7, + "learning_rate": 8.474770642201834e-05, + "loss": 0.1788, + "step": 10867 + }, + { + "epoch": 99.71, + "learning_rate": 8.470948012232416e-05, + "loss": 0.084, + "step": 10868 + }, + { + "epoch": 99.72, + "learning_rate": 8.467125382262996e-05, + "loss": 0.1195, + "step": 10869 + }, + { + "epoch": 99.72, + "learning_rate": 8.463302752293578e-05, + "loss": 0.0697, + "step": 10870 + }, + { + "epoch": 99.73, + "learning_rate": 8.459480122324159e-05, + "loss": 0.0917, + "step": 10871 + }, + { + "epoch": 99.74, + "learning_rate": 8.45565749235474e-05, + "loss": 0.2404, + "step": 10872 + }, + { + "epoch": 99.75, + "learning_rate": 8.451834862385321e-05, + "loss": 0.1657, + "step": 10873 + }, + { + "epoch": 99.76, + "learning_rate": 8.448012232415903e-05, + "loss": 0.1577, + "step": 10874 + }, + { + "epoch": 99.77, + "learning_rate": 8.444189602446484e-05, + "loss": 0.1542, + "step": 10875 + }, + { + "epoch": 99.78, + "learning_rate": 8.440366972477065e-05, + "loss": 0.1136, + "step": 10876 + }, + { + "epoch": 99.79, + "learning_rate": 8.436544342507647e-05, + "loss": 0.2198, + "step": 10877 + }, + { + "epoch": 99.8, + "learning_rate": 8.432721712538226e-05, + "loss": 0.1766, + "step": 10878 + }, + { + "epoch": 99.81, + "learning_rate": 8.428899082568807e-05, + "loss": 0.092, + "step": 10879 + }, + { + "epoch": 99.82, + "learning_rate": 8.425076452599388e-05, + "loss": 0.1227, + "step": 10880 + }, + { + "epoch": 99.83, + "learning_rate": 8.42125382262997e-05, + "loss": 0.1639, + "step": 10881 + }, + { + "epoch": 99.83, + "learning_rate": 8.41743119266055e-05, + "loss": 0.1598, + "step": 10882 + }, + { + "epoch": 99.84, + "learning_rate": 8.413608562691132e-05, + "loss": 0.1785, + "step": 10883 + }, + { + "epoch": 99.85, + "learning_rate": 8.409785932721712e-05, + "loss": 0.1099, + "step": 10884 + }, + { + "epoch": 99.86, + "learning_rate": 8.405963302752294e-05, + "loss": 0.1522, + "step": 10885 + }, + { + "epoch": 99.87, + "learning_rate": 8.402140672782874e-05, + "loss": 0.1384, + "step": 10886 + }, + { + "epoch": 99.88, + "learning_rate": 8.398318042813456e-05, + "loss": 0.1241, + "step": 10887 + }, + { + "epoch": 99.89, + "learning_rate": 8.394495412844037e-05, + "loss": 0.1853, + "step": 10888 + }, + { + "epoch": 99.9, + "learning_rate": 8.390672782874618e-05, + "loss": 0.1023, + "step": 10889 + }, + { + "epoch": 99.91, + "learning_rate": 8.386850152905199e-05, + "loss": 0.1009, + "step": 10890 + }, + { + "epoch": 99.92, + "learning_rate": 8.38302752293578e-05, + "loss": 0.1259, + "step": 10891 + }, + { + "epoch": 99.93, + "learning_rate": 8.379204892966361e-05, + "loss": 0.1039, + "step": 10892 + }, + { + "epoch": 99.94, + "learning_rate": 8.375382262996942e-05, + "loss": 0.0836, + "step": 10893 + }, + { + "epoch": 99.94, + "learning_rate": 8.371559633027523e-05, + "loss": 0.1303, + "step": 10894 + }, + { + "epoch": 99.95, + "learning_rate": 8.367737003058104e-05, + "loss": 0.0629, + "step": 10895 + }, + { + "epoch": 99.96, + "learning_rate": 8.363914373088686e-05, + "loss": 0.1409, + "step": 10896 + }, + { + "epoch": 99.97, + "learning_rate": 8.360091743119266e-05, + "loss": 0.1058, + "step": 10897 + }, + { + "epoch": 99.98, + "learning_rate": 8.356269113149848e-05, + "loss": 0.1542, + "step": 10898 + }, + { + "epoch": 99.99, + "learning_rate": 8.352446483180428e-05, + "loss": 0.1056, + "step": 10899 + }, + { + "epoch": 100.0, + "learning_rate": 8.34862385321101e-05, + "loss": 0.1382, + "step": 10900 + }, + { + "epoch": 100.01, + "learning_rate": 8.34480122324159e-05, + "loss": 0.2297, + "step": 10901 + }, + { + "epoch": 100.02, + "learning_rate": 8.340978593272171e-05, + "loss": 0.176, + "step": 10902 + }, + { + "epoch": 100.03, + "learning_rate": 8.337155963302751e-05, + "loss": 0.1407, + "step": 10903 + }, + { + "epoch": 100.04, + "learning_rate": 8.333333333333333e-05, + "loss": 0.0956, + "step": 10904 + }, + { + "epoch": 100.05, + "learning_rate": 8.329510703363915e-05, + "loss": 0.1639, + "step": 10905 + }, + { + "epoch": 100.06, + "learning_rate": 8.325688073394495e-05, + "loss": 0.1244, + "step": 10906 + }, + { + "epoch": 100.06, + "learning_rate": 8.321865443425077e-05, + "loss": 0.1043, + "step": 10907 + }, + { + "epoch": 100.07, + "learning_rate": 8.318042813455657e-05, + "loss": 0.1223, + "step": 10908 + }, + { + "epoch": 100.08, + "learning_rate": 8.314220183486239e-05, + "loss": 0.1644, + "step": 10909 + }, + { + "epoch": 100.09, + "learning_rate": 8.31039755351682e-05, + "loss": 0.0975, + "step": 10910 + }, + { + "epoch": 100.1, + "learning_rate": 8.306574923547401e-05, + "loss": 0.092, + "step": 10911 + }, + { + "epoch": 100.11, + "learning_rate": 8.302752293577982e-05, + "loss": 0.1564, + "step": 10912 + }, + { + "epoch": 100.12, + "learning_rate": 8.298929663608564e-05, + "loss": 0.1079, + "step": 10913 + }, + { + "epoch": 100.13, + "learning_rate": 8.295107033639144e-05, + "loss": 0.0921, + "step": 10914 + }, + { + "epoch": 100.14, + "learning_rate": 8.291284403669725e-05, + "loss": 0.0912, + "step": 10915 + }, + { + "epoch": 100.15, + "learning_rate": 8.287461773700305e-05, + "loss": 0.1305, + "step": 10916 + }, + { + "epoch": 100.16, + "learning_rate": 8.283639143730887e-05, + "loss": 0.0927, + "step": 10917 + }, + { + "epoch": 100.17, + "learning_rate": 8.279816513761467e-05, + "loss": 0.139, + "step": 10918 + }, + { + "epoch": 100.17, + "learning_rate": 8.275993883792049e-05, + "loss": 0.1169, + "step": 10919 + }, + { + "epoch": 100.18, + "learning_rate": 8.272171253822631e-05, + "loss": 0.1092, + "step": 10920 + }, + { + "epoch": 100.19, + "learning_rate": 8.268348623853211e-05, + "loss": 0.0988, + "step": 10921 + }, + { + "epoch": 100.2, + "learning_rate": 8.264525993883793e-05, + "loss": 0.1263, + "step": 10922 + }, + { + "epoch": 100.21, + "learning_rate": 8.260703363914373e-05, + "loss": 0.1033, + "step": 10923 + }, + { + "epoch": 100.22, + "learning_rate": 8.256880733944955e-05, + "loss": 0.1157, + "step": 10924 + }, + { + "epoch": 100.23, + "learning_rate": 8.253058103975536e-05, + "loss": 0.086, + "step": 10925 + }, + { + "epoch": 100.24, + "learning_rate": 8.249235474006117e-05, + "loss": 0.0727, + "step": 10926 + }, + { + "epoch": 100.25, + "learning_rate": 8.245412844036696e-05, + "loss": 0.0329, + "step": 10927 + }, + { + "epoch": 100.26, + "learning_rate": 8.241590214067278e-05, + "loss": 0.1893, + "step": 10928 + }, + { + "epoch": 100.27, + "learning_rate": 8.237767584097859e-05, + "loss": 0.1486, + "step": 10929 + }, + { + "epoch": 100.28, + "learning_rate": 8.23394495412844e-05, + "loss": 0.1911, + "step": 10930 + }, + { + "epoch": 100.28, + "learning_rate": 8.230122324159021e-05, + "loss": 0.112, + "step": 10931 + }, + { + "epoch": 100.29, + "learning_rate": 8.226299694189603e-05, + "loss": 0.1417, + "step": 10932 + }, + { + "epoch": 100.3, + "learning_rate": 8.222477064220183e-05, + "loss": 0.1225, + "step": 10933 + }, + { + "epoch": 100.31, + "learning_rate": 8.218654434250765e-05, + "loss": 0.1323, + "step": 10934 + }, + { + "epoch": 100.32, + "learning_rate": 8.214831804281347e-05, + "loss": 0.1809, + "step": 10935 + }, + { + "epoch": 100.33, + "learning_rate": 8.211009174311927e-05, + "loss": 0.1204, + "step": 10936 + }, + { + "epoch": 100.34, + "learning_rate": 8.207186544342509e-05, + "loss": 0.1507, + "step": 10937 + }, + { + "epoch": 100.35, + "learning_rate": 8.203363914373089e-05, + "loss": 0.0865, + "step": 10938 + }, + { + "epoch": 100.36, + "learning_rate": 8.19954128440367e-05, + "loss": 0.1586, + "step": 10939 + }, + { + "epoch": 100.37, + "learning_rate": 8.19571865443425e-05, + "loss": 0.1893, + "step": 10940 + }, + { + "epoch": 100.38, + "learning_rate": 8.191896024464832e-05, + "loss": 0.1218, + "step": 10941 + }, + { + "epoch": 100.39, + "learning_rate": 8.188073394495412e-05, + "loss": 0.1345, + "step": 10942 + }, + { + "epoch": 100.39, + "learning_rate": 8.184250764525994e-05, + "loss": 0.1171, + "step": 10943 + }, + { + "epoch": 100.4, + "learning_rate": 8.180428134556575e-05, + "loss": 0.1258, + "step": 10944 + }, + { + "epoch": 100.41, + "learning_rate": 8.176605504587156e-05, + "loss": 0.0827, + "step": 10945 + }, + { + "epoch": 100.42, + "learning_rate": 8.172782874617737e-05, + "loss": 0.1376, + "step": 10946 + }, + { + "epoch": 100.43, + "learning_rate": 8.168960244648319e-05, + "loss": 0.1663, + "step": 10947 + }, + { + "epoch": 100.44, + "learning_rate": 8.1651376146789e-05, + "loss": 0.119, + "step": 10948 + }, + { + "epoch": 100.45, + "learning_rate": 8.161314984709481e-05, + "loss": 0.0682, + "step": 10949 + }, + { + "epoch": 100.46, + "learning_rate": 8.157492354740063e-05, + "loss": 0.0879, + "step": 10950 + }, + { + "epoch": 100.47, + "learning_rate": 8.153669724770642e-05, + "loss": 0.1122, + "step": 10951 + }, + { + "epoch": 100.48, + "learning_rate": 8.149847094801223e-05, + "loss": 0.1447, + "step": 10952 + }, + { + "epoch": 100.49, + "learning_rate": 8.146024464831804e-05, + "loss": 0.1115, + "step": 10953 + }, + { + "epoch": 100.5, + "learning_rate": 8.142201834862386e-05, + "loss": 0.0876, + "step": 10954 + }, + { + "epoch": 100.5, + "learning_rate": 8.138379204892966e-05, + "loss": 0.2055, + "step": 10955 + }, + { + "epoch": 100.51, + "learning_rate": 8.134556574923548e-05, + "loss": 0.1742, + "step": 10956 + }, + { + "epoch": 100.52, + "learning_rate": 8.130733944954128e-05, + "loss": 0.1612, + "step": 10957 + }, + { + "epoch": 100.53, + "learning_rate": 8.12691131498471e-05, + "loss": 0.1215, + "step": 10958 + }, + { + "epoch": 100.54, + "learning_rate": 8.12308868501529e-05, + "loss": 0.1598, + "step": 10959 + }, + { + "epoch": 100.55, + "learning_rate": 8.119266055045872e-05, + "loss": 0.0829, + "step": 10960 + }, + { + "epoch": 100.56, + "learning_rate": 8.115443425076453e-05, + "loss": 0.1668, + "step": 10961 + }, + { + "epoch": 100.57, + "learning_rate": 8.111620795107035e-05, + "loss": 0.0964, + "step": 10962 + }, + { + "epoch": 100.58, + "learning_rate": 8.107798165137614e-05, + "loss": 0.1044, + "step": 10963 + }, + { + "epoch": 100.59, + "learning_rate": 8.103975535168195e-05, + "loss": 0.0917, + "step": 10964 + }, + { + "epoch": 100.6, + "learning_rate": 8.100152905198777e-05, + "loss": 0.0911, + "step": 10965 + }, + { + "epoch": 100.61, + "learning_rate": 8.096330275229358e-05, + "loss": 0.0838, + "step": 10966 + }, + { + "epoch": 100.61, + "learning_rate": 8.09250764525994e-05, + "loss": 0.1171, + "step": 10967 + }, + { + "epoch": 100.62, + "learning_rate": 8.08868501529052e-05, + "loss": 0.1525, + "step": 10968 + }, + { + "epoch": 100.63, + "learning_rate": 8.084862385321102e-05, + "loss": 0.0612, + "step": 10969 + }, + { + "epoch": 100.64, + "learning_rate": 8.081039755351682e-05, + "loss": 0.0533, + "step": 10970 + }, + { + "epoch": 100.65, + "learning_rate": 8.077217125382264e-05, + "loss": 0.1138, + "step": 10971 + }, + { + "epoch": 100.66, + "learning_rate": 8.073394495412844e-05, + "loss": 0.111, + "step": 10972 + }, + { + "epoch": 100.67, + "learning_rate": 8.069571865443426e-05, + "loss": 0.0599, + "step": 10973 + }, + { + "epoch": 100.68, + "learning_rate": 8.065749235474006e-05, + "loss": 0.1704, + "step": 10974 + }, + { + "epoch": 100.69, + "learning_rate": 8.061926605504587e-05, + "loss": 0.1029, + "step": 10975 + }, + { + "epoch": 100.7, + "learning_rate": 8.058103975535167e-05, + "loss": 0.1379, + "step": 10976 + }, + { + "epoch": 100.71, + "learning_rate": 8.054281345565749e-05, + "loss": 0.0974, + "step": 10977 + }, + { + "epoch": 100.72, + "learning_rate": 8.050458715596331e-05, + "loss": 0.1202, + "step": 10978 + }, + { + "epoch": 100.72, + "learning_rate": 8.046636085626911e-05, + "loss": 0.1289, + "step": 10979 + }, + { + "epoch": 100.73, + "learning_rate": 8.042813455657493e-05, + "loss": 0.0734, + "step": 10980 + }, + { + "epoch": 100.74, + "learning_rate": 8.038990825688074e-05, + "loss": 0.1213, + "step": 10981 + }, + { + "epoch": 100.75, + "learning_rate": 8.035168195718655e-05, + "loss": 0.176, + "step": 10982 + }, + { + "epoch": 100.76, + "learning_rate": 8.031345565749236e-05, + "loss": 0.1457, + "step": 10983 + }, + { + "epoch": 100.77, + "learning_rate": 8.027522935779818e-05, + "loss": 0.1069, + "step": 10984 + }, + { + "epoch": 100.78, + "learning_rate": 8.023700305810398e-05, + "loss": 0.1208, + "step": 10985 + }, + { + "epoch": 100.79, + "learning_rate": 8.01987767584098e-05, + "loss": 0.2068, + "step": 10986 + }, + { + "epoch": 100.8, + "learning_rate": 8.016055045871559e-05, + "loss": 0.0944, + "step": 10987 + }, + { + "epoch": 100.81, + "learning_rate": 8.01223241590214e-05, + "loss": 0.0853, + "step": 10988 + }, + { + "epoch": 100.82, + "learning_rate": 8.008409785932721e-05, + "loss": 0.1, + "step": 10989 + }, + { + "epoch": 100.83, + "learning_rate": 8.004587155963303e-05, + "loss": 0.0953, + "step": 10990 + }, + { + "epoch": 100.83, + "learning_rate": 8.000764525993883e-05, + "loss": 0.1048, + "step": 10991 + }, + { + "epoch": 100.84, + "learning_rate": 7.996941896024465e-05, + "loss": 0.1213, + "step": 10992 + }, + { + "epoch": 100.85, + "learning_rate": 7.993119266055047e-05, + "loss": 0.1279, + "step": 10993 + }, + { + "epoch": 100.86, + "learning_rate": 7.989296636085627e-05, + "loss": 0.1786, + "step": 10994 + }, + { + "epoch": 100.87, + "learning_rate": 7.985474006116209e-05, + "loss": 0.1107, + "step": 10995 + }, + { + "epoch": 100.88, + "learning_rate": 7.98165137614679e-05, + "loss": 0.1014, + "step": 10996 + }, + { + "epoch": 100.89, + "learning_rate": 7.977828746177371e-05, + "loss": 0.1331, + "step": 10997 + }, + { + "epoch": 100.9, + "learning_rate": 7.974006116207952e-05, + "loss": 0.1205, + "step": 10998 + }, + { + "epoch": 100.91, + "learning_rate": 7.970183486238532e-05, + "loss": 0.1238, + "step": 10999 + }, + { + "epoch": 100.92, + "learning_rate": 7.966360856269112e-05, + "loss": 0.0896, + "step": 11000 + }, + { + "epoch": 100.92, + "eval_cer": 0.1398071935111471, + "eval_loss": 0.7186796069145203, + "eval_runtime": 87.2793, + "eval_samples_per_second": 18.87, + "eval_steps_per_second": 2.36, + "eval_wer": 0.4885098559901951, + "step": 11000 + }, + { + "epoch": 100.93, + "learning_rate": 7.962538226299694e-05, + "loss": 0.0813, + "step": 11001 + }, + { + "epoch": 100.94, + "learning_rate": 7.958715596330275e-05, + "loss": 0.0822, + "step": 11002 + }, + { + "epoch": 100.94, + "learning_rate": 7.954892966360856e-05, + "loss": 0.1016, + "step": 11003 + }, + { + "epoch": 100.95, + "learning_rate": 7.951070336391437e-05, + "loss": 0.1111, + "step": 11004 + }, + { + "epoch": 100.96, + "learning_rate": 7.947247706422019e-05, + "loss": 0.0882, + "step": 11005 + }, + { + "epoch": 100.97, + "learning_rate": 7.943425076452599e-05, + "loss": 0.1406, + "step": 11006 + }, + { + "epoch": 100.98, + "learning_rate": 7.939602446483181e-05, + "loss": 0.0808, + "step": 11007 + }, + { + "epoch": 100.99, + "learning_rate": 7.935779816513763e-05, + "loss": 0.083, + "step": 11008 + }, + { + "epoch": 101.0, + "learning_rate": 7.931957186544343e-05, + "loss": 0.1158, + "step": 11009 + }, + { + "epoch": 101.01, + "learning_rate": 7.928134556574925e-05, + "loss": 0.1394, + "step": 11010 + }, + { + "epoch": 101.02, + "learning_rate": 7.924311926605504e-05, + "loss": 0.1347, + "step": 11011 + }, + { + "epoch": 101.03, + "learning_rate": 7.920489296636086e-05, + "loss": 0.1475, + "step": 11012 + }, + { + "epoch": 101.04, + "learning_rate": 7.916666666666666e-05, + "loss": 0.165, + "step": 11013 + }, + { + "epoch": 101.05, + "learning_rate": 7.912844036697248e-05, + "loss": 0.1418, + "step": 11014 + }, + { + "epoch": 101.06, + "learning_rate": 7.909021406727828e-05, + "loss": 0.0649, + "step": 11015 + }, + { + "epoch": 101.06, + "learning_rate": 7.90519877675841e-05, + "loss": 0.1036, + "step": 11016 + }, + { + "epoch": 101.07, + "learning_rate": 7.90137614678899e-05, + "loss": 0.1124, + "step": 11017 + }, + { + "epoch": 101.08, + "learning_rate": 7.897553516819572e-05, + "loss": 0.1409, + "step": 11018 + }, + { + "epoch": 101.09, + "learning_rate": 7.893730886850153e-05, + "loss": 0.0802, + "step": 11019 + }, + { + "epoch": 101.1, + "learning_rate": 7.889908256880735e-05, + "loss": 0.1069, + "step": 11020 + }, + { + "epoch": 101.11, + "learning_rate": 7.886085626911315e-05, + "loss": 0.1126, + "step": 11021 + }, + { + "epoch": 101.12, + "learning_rate": 7.882262996941897e-05, + "loss": 0.0992, + "step": 11022 + }, + { + "epoch": 101.13, + "learning_rate": 7.878440366972477e-05, + "loss": 0.1535, + "step": 11023 + }, + { + "epoch": 101.14, + "learning_rate": 7.874617737003058e-05, + "loss": 0.1428, + "step": 11024 + }, + { + "epoch": 101.15, + "learning_rate": 7.87079510703364e-05, + "loss": 0.2055, + "step": 11025 + }, + { + "epoch": 101.16, + "learning_rate": 7.86697247706422e-05, + "loss": 0.0525, + "step": 11026 + }, + { + "epoch": 101.17, + "learning_rate": 7.863149847094802e-05, + "loss": 0.1214, + "step": 11027 + }, + { + "epoch": 101.17, + "learning_rate": 7.859327217125382e-05, + "loss": 0.1075, + "step": 11028 + }, + { + "epoch": 101.18, + "learning_rate": 7.855504587155964e-05, + "loss": 0.1118, + "step": 11029 + }, + { + "epoch": 101.19, + "learning_rate": 7.851681957186544e-05, + "loss": 0.1439, + "step": 11030 + }, + { + "epoch": 101.2, + "learning_rate": 7.847859327217126e-05, + "loss": 0.1298, + "step": 11031 + }, + { + "epoch": 101.21, + "learning_rate": 7.844036697247707e-05, + "loss": 0.1248, + "step": 11032 + }, + { + "epoch": 101.22, + "learning_rate": 7.840214067278288e-05, + "loss": 0.078, + "step": 11033 + }, + { + "epoch": 101.23, + "learning_rate": 7.836391437308869e-05, + "loss": 0.1159, + "step": 11034 + }, + { + "epoch": 101.24, + "learning_rate": 7.832568807339449e-05, + "loss": 0.1376, + "step": 11035 + }, + { + "epoch": 101.25, + "learning_rate": 7.82874617737003e-05, + "loss": 0.0324, + "step": 11036 + }, + { + "epoch": 101.26, + "learning_rate": 7.824923547400611e-05, + "loss": 0.1985, + "step": 11037 + }, + { + "epoch": 101.27, + "learning_rate": 7.821100917431193e-05, + "loss": 0.1914, + "step": 11038 + }, + { + "epoch": 101.28, + "learning_rate": 7.817278287461774e-05, + "loss": 0.1351, + "step": 11039 + }, + { + "epoch": 101.28, + "learning_rate": 7.813455657492355e-05, + "loss": 0.1434, + "step": 11040 + }, + { + "epoch": 101.29, + "learning_rate": 7.809633027522936e-05, + "loss": 0.1122, + "step": 11041 + }, + { + "epoch": 101.3, + "learning_rate": 7.805810397553518e-05, + "loss": 0.1489, + "step": 11042 + }, + { + "epoch": 101.31, + "learning_rate": 7.801987767584098e-05, + "loss": 0.1446, + "step": 11043 + }, + { + "epoch": 101.32, + "learning_rate": 7.79816513761468e-05, + "loss": 0.144, + "step": 11044 + }, + { + "epoch": 101.33, + "learning_rate": 7.79434250764526e-05, + "loss": 0.1575, + "step": 11045 + }, + { + "epoch": 101.34, + "learning_rate": 7.790519877675842e-05, + "loss": 0.1222, + "step": 11046 + }, + { + "epoch": 101.35, + "learning_rate": 7.786697247706421e-05, + "loss": 0.1185, + "step": 11047 + }, + { + "epoch": 101.36, + "learning_rate": 7.782874617737003e-05, + "loss": 0.0871, + "step": 11048 + }, + { + "epoch": 101.37, + "learning_rate": 7.779051987767583e-05, + "loss": 0.1156, + "step": 11049 + }, + { + "epoch": 101.38, + "learning_rate": 7.775229357798165e-05, + "loss": 0.0691, + "step": 11050 + }, + { + "epoch": 101.39, + "learning_rate": 7.771406727828746e-05, + "loss": 0.097, + "step": 11051 + }, + { + "epoch": 101.39, + "learning_rate": 7.767584097859327e-05, + "loss": 0.0974, + "step": 11052 + }, + { + "epoch": 101.4, + "learning_rate": 7.763761467889909e-05, + "loss": 0.0954, + "step": 11053 + }, + { + "epoch": 101.41, + "learning_rate": 7.75993883792049e-05, + "loss": 0.0491, + "step": 11054 + }, + { + "epoch": 101.42, + "learning_rate": 7.756116207951071e-05, + "loss": 0.1046, + "step": 11055 + }, + { + "epoch": 101.43, + "learning_rate": 7.752293577981652e-05, + "loss": 0.1356, + "step": 11056 + }, + { + "epoch": 101.44, + "learning_rate": 7.748470948012234e-05, + "loss": 0.1075, + "step": 11057 + }, + { + "epoch": 101.45, + "learning_rate": 7.744648318042814e-05, + "loss": 0.0698, + "step": 11058 + }, + { + "epoch": 101.46, + "learning_rate": 7.740825688073394e-05, + "loss": 0.12, + "step": 11059 + }, + { + "epoch": 101.47, + "learning_rate": 7.737003058103975e-05, + "loss": 0.1658, + "step": 11060 + }, + { + "epoch": 101.48, + "learning_rate": 7.733180428134557e-05, + "loss": 0.1282, + "step": 11061 + }, + { + "epoch": 101.49, + "learning_rate": 7.729357798165137e-05, + "loss": 0.179, + "step": 11062 + }, + { + "epoch": 101.5, + "learning_rate": 7.725535168195719e-05, + "loss": 0.056, + "step": 11063 + }, + { + "epoch": 101.5, + "learning_rate": 7.721712538226299e-05, + "loss": 0.1875, + "step": 11064 + }, + { + "epoch": 101.51, + "learning_rate": 7.717889908256881e-05, + "loss": 0.1639, + "step": 11065 + }, + { + "epoch": 101.52, + "learning_rate": 7.714067278287463e-05, + "loss": 0.1782, + "step": 11066 + }, + { + "epoch": 101.53, + "learning_rate": 7.710244648318043e-05, + "loss": 0.1536, + "step": 11067 + }, + { + "epoch": 101.54, + "learning_rate": 7.706422018348625e-05, + "loss": 0.1289, + "step": 11068 + }, + { + "epoch": 101.55, + "learning_rate": 7.702599388379205e-05, + "loss": 0.0834, + "step": 11069 + }, + { + "epoch": 101.56, + "learning_rate": 7.698776758409787e-05, + "loss": 0.1472, + "step": 11070 + }, + { + "epoch": 101.57, + "learning_rate": 7.694954128440366e-05, + "loss": 0.1068, + "step": 11071 + }, + { + "epoch": 101.58, + "learning_rate": 7.691131498470948e-05, + "loss": 0.1045, + "step": 11072 + }, + { + "epoch": 101.59, + "learning_rate": 7.687308868501529e-05, + "loss": 0.1141, + "step": 11073 + }, + { + "epoch": 101.6, + "learning_rate": 7.68348623853211e-05, + "loss": 0.0776, + "step": 11074 + }, + { + "epoch": 101.61, + "learning_rate": 7.679663608562691e-05, + "loss": 0.1061, + "step": 11075 + }, + { + "epoch": 101.61, + "learning_rate": 7.675840978593273e-05, + "loss": 0.1435, + "step": 11076 + }, + { + "epoch": 101.62, + "learning_rate": 7.672018348623853e-05, + "loss": 0.1105, + "step": 11077 + }, + { + "epoch": 101.63, + "learning_rate": 7.668195718654435e-05, + "loss": 0.1395, + "step": 11078 + }, + { + "epoch": 101.64, + "learning_rate": 7.664373088685015e-05, + "loss": 0.0876, + "step": 11079 + }, + { + "epoch": 101.65, + "learning_rate": 7.660550458715597e-05, + "loss": 0.0901, + "step": 11080 + }, + { + "epoch": 101.66, + "learning_rate": 7.656727828746179e-05, + "loss": 0.1325, + "step": 11081 + }, + { + "epoch": 101.67, + "learning_rate": 7.652905198776759e-05, + "loss": 0.1022, + "step": 11082 + }, + { + "epoch": 101.68, + "learning_rate": 7.64908256880734e-05, + "loss": 0.0998, + "step": 11083 + }, + { + "epoch": 101.69, + "learning_rate": 7.64525993883792e-05, + "loss": 0.1162, + "step": 11084 + }, + { + "epoch": 101.7, + "learning_rate": 7.641437308868502e-05, + "loss": 0.0981, + "step": 11085 + }, + { + "epoch": 101.71, + "learning_rate": 7.637614678899082e-05, + "loss": 0.1372, + "step": 11086 + }, + { + "epoch": 101.72, + "learning_rate": 7.633792048929664e-05, + "loss": 0.1863, + "step": 11087 + }, + { + "epoch": 101.72, + "learning_rate": 7.629969418960244e-05, + "loss": 0.0779, + "step": 11088 + }, + { + "epoch": 101.73, + "learning_rate": 7.626146788990826e-05, + "loss": 0.0802, + "step": 11089 + }, + { + "epoch": 101.74, + "learning_rate": 7.622324159021407e-05, + "loss": 0.1016, + "step": 11090 + }, + { + "epoch": 101.75, + "learning_rate": 7.618501529051988e-05, + "loss": 0.1495, + "step": 11091 + }, + { + "epoch": 101.76, + "learning_rate": 7.614678899082569e-05, + "loss": 0.1221, + "step": 11092 + }, + { + "epoch": 101.77, + "learning_rate": 7.61085626911315e-05, + "loss": 0.1248, + "step": 11093 + }, + { + "epoch": 101.78, + "learning_rate": 7.607033639143731e-05, + "loss": 0.1786, + "step": 11094 + }, + { + "epoch": 101.79, + "learning_rate": 7.603211009174312e-05, + "loss": 0.1386, + "step": 11095 + }, + { + "epoch": 101.8, + "learning_rate": 7.599388379204893e-05, + "loss": 0.1527, + "step": 11096 + }, + { + "epoch": 101.81, + "learning_rate": 7.595565749235474e-05, + "loss": 0.1038, + "step": 11097 + }, + { + "epoch": 101.82, + "learning_rate": 7.591743119266055e-05, + "loss": 0.0889, + "step": 11098 + }, + { + "epoch": 101.83, + "learning_rate": 7.587920489296636e-05, + "loss": 0.1346, + "step": 11099 + }, + { + "epoch": 101.83, + "learning_rate": 7.584097859327218e-05, + "loss": 0.1581, + "step": 11100 + }, + { + "epoch": 101.84, + "learning_rate": 7.580275229357798e-05, + "loss": 0.0915, + "step": 11101 + }, + { + "epoch": 101.85, + "learning_rate": 7.57645259938838e-05, + "loss": 0.0971, + "step": 11102 + }, + { + "epoch": 101.86, + "learning_rate": 7.57262996941896e-05, + "loss": 0.1084, + "step": 11103 + }, + { + "epoch": 101.87, + "learning_rate": 7.568807339449542e-05, + "loss": 0.0833, + "step": 11104 + }, + { + "epoch": 101.88, + "learning_rate": 7.564984709480123e-05, + "loss": 0.1549, + "step": 11105 + }, + { + "epoch": 101.89, + "learning_rate": 7.561162079510704e-05, + "loss": 0.1318, + "step": 11106 + }, + { + "epoch": 101.9, + "learning_rate": 7.557339449541283e-05, + "loss": 0.0807, + "step": 11107 + }, + { + "epoch": 101.91, + "learning_rate": 7.553516819571865e-05, + "loss": 0.0874, + "step": 11108 + }, + { + "epoch": 101.92, + "learning_rate": 7.549694189602446e-05, + "loss": 0.142, + "step": 11109 + }, + { + "epoch": 101.93, + "learning_rate": 7.545871559633027e-05, + "loss": 0.1129, + "step": 11110 + }, + { + "epoch": 101.94, + "learning_rate": 7.542048929663609e-05, + "loss": 0.123, + "step": 11111 + }, + { + "epoch": 101.94, + "learning_rate": 7.53822629969419e-05, + "loss": 0.1125, + "step": 11112 + }, + { + "epoch": 101.95, + "learning_rate": 7.534403669724771e-05, + "loss": 0.123, + "step": 11113 + }, + { + "epoch": 101.96, + "learning_rate": 7.530581039755352e-05, + "loss": 0.0576, + "step": 11114 + }, + { + "epoch": 101.97, + "learning_rate": 7.526758409785934e-05, + "loss": 0.1386, + "step": 11115 + }, + { + "epoch": 101.98, + "learning_rate": 7.522935779816514e-05, + "loss": 0.0699, + "step": 11116 + }, + { + "epoch": 101.99, + "learning_rate": 7.519113149847096e-05, + "loss": 0.1142, + "step": 11117 + }, + { + "epoch": 102.0, + "learning_rate": 7.515290519877676e-05, + "loss": 0.1734, + "step": 11118 + }, + { + "epoch": 102.01, + "learning_rate": 7.511467889908257e-05, + "loss": 0.191, + "step": 11119 + }, + { + "epoch": 102.02, + "learning_rate": 7.507645259938837e-05, + "loss": 0.1476, + "step": 11120 + }, + { + "epoch": 102.03, + "learning_rate": 7.503822629969419e-05, + "loss": 0.1395, + "step": 11121 + }, + { + "epoch": 102.04, + "learning_rate": 7.5e-05, + "loss": 0.1528, + "step": 11122 + }, + { + "epoch": 102.05, + "learning_rate": 7.496177370030581e-05, + "loss": 0.1695, + "step": 11123 + }, + { + "epoch": 102.06, + "learning_rate": 7.492354740061162e-05, + "loss": 0.0988, + "step": 11124 + }, + { + "epoch": 102.06, + "learning_rate": 7.488532110091743e-05, + "loss": 0.0951, + "step": 11125 + }, + { + "epoch": 102.07, + "learning_rate": 7.484709480122325e-05, + "loss": 0.0885, + "step": 11126 + }, + { + "epoch": 102.08, + "learning_rate": 7.480886850152906e-05, + "loss": 0.0933, + "step": 11127 + }, + { + "epoch": 102.09, + "learning_rate": 7.477064220183487e-05, + "loss": 0.0872, + "step": 11128 + }, + { + "epoch": 102.1, + "learning_rate": 7.473241590214068e-05, + "loss": 0.148, + "step": 11129 + }, + { + "epoch": 102.11, + "learning_rate": 7.46941896024465e-05, + "loss": 0.1187, + "step": 11130 + }, + { + "epoch": 102.12, + "learning_rate": 7.465596330275229e-05, + "loss": 0.0782, + "step": 11131 + }, + { + "epoch": 102.13, + "learning_rate": 7.46177370030581e-05, + "loss": 0.096, + "step": 11132 + }, + { + "epoch": 102.14, + "learning_rate": 7.457951070336391e-05, + "loss": 0.137, + "step": 11133 + }, + { + "epoch": 102.15, + "learning_rate": 7.454128440366973e-05, + "loss": 0.0954, + "step": 11134 + }, + { + "epoch": 102.16, + "learning_rate": 7.450305810397553e-05, + "loss": 0.0373, + "step": 11135 + }, + { + "epoch": 102.17, + "learning_rate": 7.446483180428135e-05, + "loss": 0.1257, + "step": 11136 + }, + { + "epoch": 102.17, + "learning_rate": 7.442660550458715e-05, + "loss": 0.1016, + "step": 11137 + }, + { + "epoch": 102.18, + "learning_rate": 7.438837920489297e-05, + "loss": 0.1559, + "step": 11138 + }, + { + "epoch": 102.19, + "learning_rate": 7.435015290519877e-05, + "loss": 0.0925, + "step": 11139 + }, + { + "epoch": 102.2, + "learning_rate": 7.431192660550459e-05, + "loss": 0.0864, + "step": 11140 + }, + { + "epoch": 102.21, + "learning_rate": 7.427370030581041e-05, + "loss": 0.0698, + "step": 11141 + }, + { + "epoch": 102.22, + "learning_rate": 7.423547400611621e-05, + "loss": 0.1164, + "step": 11142 + }, + { + "epoch": 102.23, + "learning_rate": 7.419724770642202e-05, + "loss": 0.0591, + "step": 11143 + }, + { + "epoch": 102.24, + "learning_rate": 7.415902140672782e-05, + "loss": 0.0944, + "step": 11144 + }, + { + "epoch": 102.25, + "learning_rate": 7.412079510703364e-05, + "loss": 0.0473, + "step": 11145 + }, + { + "epoch": 102.26, + "learning_rate": 7.408256880733945e-05, + "loss": 0.1818, + "step": 11146 + }, + { + "epoch": 102.27, + "learning_rate": 7.404434250764526e-05, + "loss": 0.1575, + "step": 11147 + }, + { + "epoch": 102.28, + "learning_rate": 7.400611620795107e-05, + "loss": 0.1178, + "step": 11148 + }, + { + "epoch": 102.28, + "learning_rate": 7.396788990825689e-05, + "loss": 0.1755, + "step": 11149 + }, + { + "epoch": 102.29, + "learning_rate": 7.392966360856269e-05, + "loss": 0.1178, + "step": 11150 + }, + { + "epoch": 102.3, + "learning_rate": 7.389143730886851e-05, + "loss": 0.1879, + "step": 11151 + }, + { + "epoch": 102.31, + "learning_rate": 7.385321100917431e-05, + "loss": 0.0894, + "step": 11152 + }, + { + "epoch": 102.32, + "learning_rate": 7.381498470948013e-05, + "loss": 0.1241, + "step": 11153 + }, + { + "epoch": 102.33, + "learning_rate": 7.377675840978593e-05, + "loss": 0.0901, + "step": 11154 + }, + { + "epoch": 102.34, + "learning_rate": 7.373853211009174e-05, + "loss": 0.1234, + "step": 11155 + }, + { + "epoch": 102.35, + "learning_rate": 7.370030581039756e-05, + "loss": 0.1375, + "step": 11156 + }, + { + "epoch": 102.36, + "learning_rate": 7.366207951070336e-05, + "loss": 0.1482, + "step": 11157 + }, + { + "epoch": 102.37, + "learning_rate": 7.362385321100918e-05, + "loss": 0.1418, + "step": 11158 + }, + { + "epoch": 102.38, + "learning_rate": 7.358562691131498e-05, + "loss": 0.0872, + "step": 11159 + }, + { + "epoch": 102.39, + "learning_rate": 7.35474006116208e-05, + "loss": 0.0972, + "step": 11160 + }, + { + "epoch": 102.39, + "learning_rate": 7.35091743119266e-05, + "loss": 0.1231, + "step": 11161 + }, + { + "epoch": 102.4, + "learning_rate": 7.347094801223242e-05, + "loss": 0.0731, + "step": 11162 + }, + { + "epoch": 102.41, + "learning_rate": 7.343272171253823e-05, + "loss": 0.1166, + "step": 11163 + }, + { + "epoch": 102.42, + "learning_rate": 7.339449541284404e-05, + "loss": 0.1103, + "step": 11164 + }, + { + "epoch": 102.43, + "learning_rate": 7.335626911314985e-05, + "loss": 0.1143, + "step": 11165 + }, + { + "epoch": 102.44, + "learning_rate": 7.335626911314985e-05, + "loss": 0.1215, + "step": 11166 + }, + { + "epoch": 102.45, + "learning_rate": 7.331804281345567e-05, + "loss": 0.1698, + "step": 11167 + }, + { + "epoch": 102.46, + "learning_rate": 7.327981651376146e-05, + "loss": 0.1585, + "step": 11168 + }, + { + "epoch": 102.47, + "learning_rate": 7.324159021406728e-05, + "loss": 0.0951, + "step": 11169 + }, + { + "epoch": 102.48, + "learning_rate": 7.320336391437308e-05, + "loss": 0.108, + "step": 11170 + }, + { + "epoch": 102.49, + "learning_rate": 7.31651376146789e-05, + "loss": 0.0873, + "step": 11171 + }, + { + "epoch": 102.5, + "learning_rate": 7.312691131498472e-05, + "loss": 0.0659, + "step": 11172 + }, + { + "epoch": 102.5, + "learning_rate": 7.308868501529052e-05, + "loss": 0.1516, + "step": 11173 + }, + { + "epoch": 102.51, + "learning_rate": 7.305045871559634e-05, + "loss": 0.1472, + "step": 11174 + }, + { + "epoch": 102.52, + "learning_rate": 7.301223241590214e-05, + "loss": 0.1883, + "step": 11175 + }, + { + "epoch": 102.53, + "learning_rate": 7.297400611620796e-05, + "loss": 0.1625, + "step": 11176 + }, + { + "epoch": 102.54, + "learning_rate": 7.293577981651376e-05, + "loss": 0.1027, + "step": 11177 + }, + { + "epoch": 102.55, + "learning_rate": 7.289755351681958e-05, + "loss": 0.1776, + "step": 11178 + }, + { + "epoch": 102.56, + "learning_rate": 7.285932721712539e-05, + "loss": 0.165, + "step": 11179 + }, + { + "epoch": 102.57, + "learning_rate": 7.282110091743119e-05, + "loss": 0.1194, + "step": 11180 + }, + { + "epoch": 102.58, + "learning_rate": 7.2782874617737e-05, + "loss": 0.1612, + "step": 11181 + }, + { + "epoch": 102.59, + "learning_rate": 7.274464831804281e-05, + "loss": 0.1338, + "step": 11182 + }, + { + "epoch": 102.6, + "learning_rate": 7.270642201834862e-05, + "loss": 0.1754, + "step": 11183 + }, + { + "epoch": 102.61, + "learning_rate": 7.266819571865443e-05, + "loss": 0.1359, + "step": 11184 + }, + { + "epoch": 102.61, + "learning_rate": 7.262996941896025e-05, + "loss": 0.1637, + "step": 11185 + }, + { + "epoch": 102.62, + "learning_rate": 7.259174311926606e-05, + "loss": 0.0867, + "step": 11186 + }, + { + "epoch": 102.63, + "learning_rate": 7.255351681957187e-05, + "loss": 0.1268, + "step": 11187 + }, + { + "epoch": 102.64, + "learning_rate": 7.251529051987768e-05, + "loss": 0.1836, + "step": 11188 + }, + { + "epoch": 102.65, + "learning_rate": 7.24770642201835e-05, + "loss": 0.0632, + "step": 11189 + }, + { + "epoch": 102.66, + "learning_rate": 7.24388379204893e-05, + "loss": 0.1236, + "step": 11190 + }, + { + "epoch": 102.67, + "learning_rate": 7.240061162079512e-05, + "loss": 0.1253, + "step": 11191 + }, + { + "epoch": 102.68, + "learning_rate": 7.236238532110091e-05, + "loss": 0.0994, + "step": 11192 + }, + { + "epoch": 102.69, + "learning_rate": 7.232415902140673e-05, + "loss": 0.1173, + "step": 11193 + }, + { + "epoch": 102.7, + "learning_rate": 7.228593272171253e-05, + "loss": 0.1701, + "step": 11194 + }, + { + "epoch": 102.71, + "learning_rate": 7.224770642201835e-05, + "loss": 0.0897, + "step": 11195 + }, + { + "epoch": 102.72, + "learning_rate": 7.220948012232415e-05, + "loss": 0.0652, + "step": 11196 + }, + { + "epoch": 102.72, + "learning_rate": 7.217125382262997e-05, + "loss": 0.1094, + "step": 11197 + }, + { + "epoch": 102.73, + "learning_rate": 7.213302752293578e-05, + "loss": 0.0742, + "step": 11198 + }, + { + "epoch": 102.74, + "learning_rate": 7.20948012232416e-05, + "loss": 0.0911, + "step": 11199 + }, + { + "epoch": 102.75, + "learning_rate": 7.205657492354741e-05, + "loss": 0.1845, + "step": 11200 + }, + { + "epoch": 102.76, + "learning_rate": 7.201834862385322e-05, + "loss": 0.1504, + "step": 11201 + }, + { + "epoch": 102.77, + "learning_rate": 7.198012232415903e-05, + "loss": 0.1374, + "step": 11202 + }, + { + "epoch": 102.78, + "learning_rate": 7.194189602446484e-05, + "loss": 0.0966, + "step": 11203 + }, + { + "epoch": 102.79, + "learning_rate": 7.190366972477064e-05, + "loss": 0.1248, + "step": 11204 + }, + { + "epoch": 102.8, + "learning_rate": 7.186544342507645e-05, + "loss": 0.1547, + "step": 11205 + }, + { + "epoch": 102.81, + "learning_rate": 7.182721712538226e-05, + "loss": 0.1105, + "step": 11206 + }, + { + "epoch": 102.82, + "learning_rate": 7.178899082568807e-05, + "loss": 0.1084, + "step": 11207 + }, + { + "epoch": 102.83, + "learning_rate": 7.175076452599389e-05, + "loss": 0.1415, + "step": 11208 + }, + { + "epoch": 102.83, + "learning_rate": 7.171253822629969e-05, + "loss": 0.0765, + "step": 11209 + }, + { + "epoch": 102.84, + "learning_rate": 7.167431192660551e-05, + "loss": 0.0887, + "step": 11210 + }, + { + "epoch": 102.85, + "learning_rate": 7.163608562691131e-05, + "loss": 0.112, + "step": 11211 + }, + { + "epoch": 102.86, + "learning_rate": 7.159785932721713e-05, + "loss": 0.11, + "step": 11212 + }, + { + "epoch": 102.87, + "learning_rate": 7.155963302752293e-05, + "loss": 0.123, + "step": 11213 + }, + { + "epoch": 102.88, + "learning_rate": 7.152140672782875e-05, + "loss": 0.1122, + "step": 11214 + }, + { + "epoch": 102.89, + "learning_rate": 7.148318042813457e-05, + "loss": 0.1513, + "step": 11215 + }, + { + "epoch": 102.9, + "learning_rate": 7.144495412844036e-05, + "loss": 0.0862, + "step": 11216 + }, + { + "epoch": 102.91, + "learning_rate": 7.140672782874618e-05, + "loss": 0.1085, + "step": 11217 + }, + { + "epoch": 102.92, + "learning_rate": 7.136850152905198e-05, + "loss": 0.2468, + "step": 11218 + }, + { + "epoch": 102.93, + "learning_rate": 7.13302752293578e-05, + "loss": 0.1524, + "step": 11219 + }, + { + "epoch": 102.94, + "learning_rate": 7.12920489296636e-05, + "loss": 0.0804, + "step": 11220 + }, + { + "epoch": 102.94, + "learning_rate": 7.125382262996942e-05, + "loss": 0.0913, + "step": 11221 + }, + { + "epoch": 102.95, + "learning_rate": 7.121559633027523e-05, + "loss": 0.0671, + "step": 11222 + }, + { + "epoch": 102.96, + "learning_rate": 7.117737003058105e-05, + "loss": 0.1184, + "step": 11223 + }, + { + "epoch": 102.97, + "learning_rate": 7.113914373088685e-05, + "loss": 0.163, + "step": 11224 + }, + { + "epoch": 102.98, + "learning_rate": 7.110091743119267e-05, + "loss": 0.0984, + "step": 11225 + }, + { + "epoch": 102.99, + "learning_rate": 7.106269113149847e-05, + "loss": 0.0497, + "step": 11226 + }, + { + "epoch": 103.0, + "learning_rate": 7.102446483180429e-05, + "loss": 0.0899, + "step": 11227 + }, + { + "epoch": 103.01, + "learning_rate": 7.098623853211008e-05, + "loss": 0.1941, + "step": 11228 + }, + { + "epoch": 103.02, + "learning_rate": 7.09480122324159e-05, + "loss": 0.1481, + "step": 11229 + }, + { + "epoch": 103.03, + "learning_rate": 7.090978593272172e-05, + "loss": 0.1091, + "step": 11230 + }, + { + "epoch": 103.04, + "learning_rate": 7.087155963302752e-05, + "loss": 0.1308, + "step": 11231 + }, + { + "epoch": 103.05, + "learning_rate": 7.083333333333334e-05, + "loss": 0.1226, + "step": 11232 + }, + { + "epoch": 103.06, + "learning_rate": 7.079510703363914e-05, + "loss": 0.1157, + "step": 11233 + }, + { + "epoch": 103.06, + "learning_rate": 7.075688073394496e-05, + "loss": 0.1244, + "step": 11234 + }, + { + "epoch": 103.07, + "learning_rate": 7.071865443425076e-05, + "loss": 0.1801, + "step": 11235 + }, + { + "epoch": 103.08, + "learning_rate": 7.068042813455658e-05, + "loss": 0.1436, + "step": 11236 + }, + { + "epoch": 103.09, + "learning_rate": 7.064220183486239e-05, + "loss": 0.1303, + "step": 11237 + }, + { + "epoch": 103.1, + "learning_rate": 7.06039755351682e-05, + "loss": 0.0873, + "step": 11238 + }, + { + "epoch": 103.11, + "learning_rate": 7.056574923547401e-05, + "loss": 0.1155, + "step": 11239 + }, + { + "epoch": 103.12, + "learning_rate": 7.052752293577981e-05, + "loss": 0.1145, + "step": 11240 + }, + { + "epoch": 103.13, + "learning_rate": 7.048929663608562e-05, + "loss": 0.1498, + "step": 11241 + }, + { + "epoch": 103.14, + "learning_rate": 7.045107033639144e-05, + "loss": 0.1176, + "step": 11242 + }, + { + "epoch": 103.15, + "learning_rate": 7.041284403669724e-05, + "loss": 0.0729, + "step": 11243 + }, + { + "epoch": 103.16, + "learning_rate": 7.037461773700306e-05, + "loss": 0.0889, + "step": 11244 + }, + { + "epoch": 103.17, + "learning_rate": 7.033639143730888e-05, + "loss": 0.0649, + "step": 11245 + }, + { + "epoch": 103.17, + "learning_rate": 7.029816513761468e-05, + "loss": 0.1375, + "step": 11246 + }, + { + "epoch": 103.18, + "learning_rate": 7.02599388379205e-05, + "loss": 0.1463, + "step": 11247 + }, + { + "epoch": 103.19, + "learning_rate": 7.02217125382263e-05, + "loss": 0.1142, + "step": 11248 + }, + { + "epoch": 103.2, + "learning_rate": 7.018348623853212e-05, + "loss": 0.1624, + "step": 11249 + }, + { + "epoch": 103.21, + "learning_rate": 7.014525993883792e-05, + "loss": 0.0876, + "step": 11250 + }, + { + "epoch": 103.22, + "learning_rate": 7.010703363914374e-05, + "loss": 0.0545, + "step": 11251 + }, + { + "epoch": 103.23, + "learning_rate": 7.006880733944953e-05, + "loss": 0.1497, + "step": 11252 + }, + { + "epoch": 103.24, + "learning_rate": 7.003058103975535e-05, + "loss": 0.1247, + "step": 11253 + }, + { + "epoch": 103.25, + "learning_rate": 6.999235474006115e-05, + "loss": 0.0988, + "step": 11254 + }, + { + "epoch": 103.26, + "learning_rate": 6.995412844036697e-05, + "loss": 0.1806, + "step": 11255 + }, + { + "epoch": 103.27, + "learning_rate": 6.991590214067278e-05, + "loss": 0.1574, + "step": 11256 + }, + { + "epoch": 103.28, + "learning_rate": 6.98776758409786e-05, + "loss": 0.1737, + "step": 11257 + }, + { + "epoch": 103.28, + "learning_rate": 6.98394495412844e-05, + "loss": 0.1564, + "step": 11258 + }, + { + "epoch": 103.29, + "learning_rate": 6.980122324159022e-05, + "loss": 0.1362, + "step": 11259 + }, + { + "epoch": 103.3, + "learning_rate": 6.976299694189603e-05, + "loss": 0.1037, + "step": 11260 + }, + { + "epoch": 103.31, + "learning_rate": 6.972477064220184e-05, + "loss": 0.1133, + "step": 11261 + }, + { + "epoch": 103.32, + "learning_rate": 6.968654434250766e-05, + "loss": 0.1031, + "step": 11262 + }, + { + "epoch": 103.33, + "learning_rate": 6.964831804281346e-05, + "loss": 0.1548, + "step": 11263 + }, + { + "epoch": 103.34, + "learning_rate": 6.961009174311927e-05, + "loss": 0.1332, + "step": 11264 + }, + { + "epoch": 103.35, + "learning_rate": 6.957186544342507e-05, + "loss": 0.1033, + "step": 11265 + }, + { + "epoch": 103.36, + "learning_rate": 6.953363914373089e-05, + "loss": 0.1327, + "step": 11266 + }, + { + "epoch": 103.37, + "learning_rate": 6.949541284403669e-05, + "loss": 0.1399, + "step": 11267 + }, + { + "epoch": 103.38, + "learning_rate": 6.945718654434251e-05, + "loss": 0.1037, + "step": 11268 + }, + { + "epoch": 103.39, + "learning_rate": 6.941896024464831e-05, + "loss": 0.1038, + "step": 11269 + }, + { + "epoch": 103.39, + "learning_rate": 6.938073394495413e-05, + "loss": 0.0884, + "step": 11270 + }, + { + "epoch": 103.4, + "learning_rate": 6.934250764525994e-05, + "loss": 0.1116, + "step": 11271 + }, + { + "epoch": 103.41, + "learning_rate": 6.930428134556575e-05, + "loss": 0.19, + "step": 11272 + }, + { + "epoch": 103.42, + "learning_rate": 6.926605504587156e-05, + "loss": 0.1744, + "step": 11273 + }, + { + "epoch": 103.43, + "learning_rate": 6.922782874617738e-05, + "loss": 0.0599, + "step": 11274 + }, + { + "epoch": 103.44, + "learning_rate": 6.91896024464832e-05, + "loss": 0.1129, + "step": 11275 + }, + { + "epoch": 103.45, + "learning_rate": 6.915137614678898e-05, + "loss": 0.125, + "step": 11276 + }, + { + "epoch": 103.46, + "learning_rate": 6.91131498470948e-05, + "loss": 0.1459, + "step": 11277 + }, + { + "epoch": 103.47, + "learning_rate": 6.90749235474006e-05, + "loss": 0.0761, + "step": 11278 + }, + { + "epoch": 103.48, + "learning_rate": 6.903669724770642e-05, + "loss": 0.088, + "step": 11279 + }, + { + "epoch": 103.49, + "learning_rate": 6.899847094801223e-05, + "loss": 0.0505, + "step": 11280 + }, + { + "epoch": 103.5, + "learning_rate": 6.896024464831805e-05, + "loss": 0.1263, + "step": 11281 + }, + { + "epoch": 103.5, + "learning_rate": 6.892201834862385e-05, + "loss": 0.2539, + "step": 11282 + }, + { + "epoch": 103.51, + "learning_rate": 6.888379204892967e-05, + "loss": 0.1863, + "step": 11283 + }, + { + "epoch": 103.52, + "learning_rate": 6.884556574923547e-05, + "loss": 0.1314, + "step": 11284 + }, + { + "epoch": 103.53, + "learning_rate": 6.880733944954129e-05, + "loss": 0.1682, + "step": 11285 + }, + { + "epoch": 103.54, + "learning_rate": 6.87691131498471e-05, + "loss": 0.1546, + "step": 11286 + }, + { + "epoch": 103.55, + "learning_rate": 6.873088685015291e-05, + "loss": 0.1144, + "step": 11287 + }, + { + "epoch": 103.56, + "learning_rate": 6.86926605504587e-05, + "loss": 0.1901, + "step": 11288 + }, + { + "epoch": 103.57, + "learning_rate": 6.865443425076452e-05, + "loss": 0.1368, + "step": 11289 + }, + { + "epoch": 103.58, + "learning_rate": 6.861620795107034e-05, + "loss": 0.0873, + "step": 11290 + }, + { + "epoch": 103.59, + "learning_rate": 6.857798165137614e-05, + "loss": 0.1168, + "step": 11291 + }, + { + "epoch": 103.6, + "learning_rate": 6.853975535168196e-05, + "loss": 0.1478, + "step": 11292 + }, + { + "epoch": 103.61, + "learning_rate": 6.850152905198777e-05, + "loss": 0.144, + "step": 11293 + }, + { + "epoch": 103.61, + "learning_rate": 6.846330275229358e-05, + "loss": 0.1213, + "step": 11294 + }, + { + "epoch": 103.62, + "learning_rate": 6.842507645259939e-05, + "loss": 0.1511, + "step": 11295 + }, + { + "epoch": 103.63, + "learning_rate": 6.83868501529052e-05, + "loss": 0.1748, + "step": 11296 + }, + { + "epoch": 103.64, + "learning_rate": 6.834862385321101e-05, + "loss": 0.1049, + "step": 11297 + }, + { + "epoch": 103.65, + "learning_rate": 6.831039755351683e-05, + "loss": 0.0577, + "step": 11298 + }, + { + "epoch": 103.66, + "learning_rate": 6.827217125382263e-05, + "loss": 0.0921, + "step": 11299 + }, + { + "epoch": 103.67, + "learning_rate": 6.823394495412844e-05, + "loss": 0.0921, + "step": 11300 + }, + { + "epoch": 103.68, + "learning_rate": 6.819571865443424e-05, + "loss": 0.1061, + "step": 11301 + }, + { + "epoch": 103.69, + "learning_rate": 6.815749235474006e-05, + "loss": 0.1897, + "step": 11302 + }, + { + "epoch": 103.7, + "learning_rate": 6.811926605504588e-05, + "loss": 0.1633, + "step": 11303 + }, + { + "epoch": 103.71, + "learning_rate": 6.808103975535168e-05, + "loss": 0.1004, + "step": 11304 + }, + { + "epoch": 103.72, + "learning_rate": 6.80428134556575e-05, + "loss": 0.0612, + "step": 11305 + }, + { + "epoch": 103.72, + "learning_rate": 6.80045871559633e-05, + "loss": 0.2191, + "step": 11306 + }, + { + "epoch": 103.73, + "learning_rate": 6.796636085626912e-05, + "loss": 0.0994, + "step": 11307 + }, + { + "epoch": 103.74, + "learning_rate": 6.792813455657493e-05, + "loss": 0.1293, + "step": 11308 + }, + { + "epoch": 103.75, + "learning_rate": 6.788990825688074e-05, + "loss": 0.1925, + "step": 11309 + }, + { + "epoch": 103.76, + "learning_rate": 6.785168195718655e-05, + "loss": 0.1621, + "step": 11310 + }, + { + "epoch": 103.77, + "learning_rate": 6.781345565749237e-05, + "loss": 0.1614, + "step": 11311 + }, + { + "epoch": 103.78, + "learning_rate": 6.777522935779816e-05, + "loss": 0.1071, + "step": 11312 + }, + { + "epoch": 103.79, + "learning_rate": 6.773700305810397e-05, + "loss": 0.0989, + "step": 11313 + }, + { + "epoch": 103.8, + "learning_rate": 6.769877675840978e-05, + "loss": 0.1431, + "step": 11314 + }, + { + "epoch": 103.81, + "learning_rate": 6.76605504587156e-05, + "loss": 0.1522, + "step": 11315 + }, + { + "epoch": 103.82, + "learning_rate": 6.76223241590214e-05, + "loss": 0.0904, + "step": 11316 + }, + { + "epoch": 103.83, + "learning_rate": 6.758409785932722e-05, + "loss": 0.1127, + "step": 11317 + }, + { + "epoch": 103.83, + "learning_rate": 6.754587155963304e-05, + "loss": 0.11, + "step": 11318 + }, + { + "epoch": 103.84, + "learning_rate": 6.750764525993884e-05, + "loss": 0.1094, + "step": 11319 + }, + { + "epoch": 103.85, + "learning_rate": 6.746941896024466e-05, + "loss": 0.0753, + "step": 11320 + }, + { + "epoch": 103.86, + "learning_rate": 6.743119266055046e-05, + "loss": 0.1576, + "step": 11321 + }, + { + "epoch": 103.87, + "learning_rate": 6.739296636085628e-05, + "loss": 0.1151, + "step": 11322 + }, + { + "epoch": 103.88, + "learning_rate": 6.735474006116208e-05, + "loss": 0.0991, + "step": 11323 + }, + { + "epoch": 103.89, + "learning_rate": 6.731651376146789e-05, + "loss": 0.1016, + "step": 11324 + }, + { + "epoch": 103.9, + "learning_rate": 6.727828746177369e-05, + "loss": 0.1625, + "step": 11325 + }, + { + "epoch": 103.91, + "learning_rate": 6.724006116207951e-05, + "loss": 0.0678, + "step": 11326 + }, + { + "epoch": 103.92, + "learning_rate": 6.720183486238531e-05, + "loss": 0.1259, + "step": 11327 + }, + { + "epoch": 103.93, + "learning_rate": 6.716360856269113e-05, + "loss": 0.1278, + "step": 11328 + }, + { + "epoch": 103.94, + "learning_rate": 6.712538226299694e-05, + "loss": 0.1027, + "step": 11329 + }, + { + "epoch": 103.94, + "learning_rate": 6.708715596330275e-05, + "loss": 0.0429, + "step": 11330 + }, + { + "epoch": 103.95, + "learning_rate": 6.704892966360856e-05, + "loss": 0.1134, + "step": 11331 + }, + { + "epoch": 103.96, + "learning_rate": 6.701070336391438e-05, + "loss": 0.1004, + "step": 11332 + }, + { + "epoch": 103.97, + "learning_rate": 6.69724770642202e-05, + "loss": 0.0996, + "step": 11333 + }, + { + "epoch": 103.98, + "learning_rate": 6.6934250764526e-05, + "loss": 0.0876, + "step": 11334 + }, + { + "epoch": 103.99, + "learning_rate": 6.689602446483182e-05, + "loss": 0.0574, + "step": 11335 + }, + { + "epoch": 104.0, + "learning_rate": 6.685779816513761e-05, + "loss": 0.155, + "step": 11336 + }, + { + "epoch": 104.01, + "learning_rate": 6.681957186544343e-05, + "loss": 0.2294, + "step": 11337 + }, + { + "epoch": 104.02, + "learning_rate": 6.678134556574923e-05, + "loss": 0.1138, + "step": 11338 + }, + { + "epoch": 104.03, + "learning_rate": 6.674311926605505e-05, + "loss": 0.1198, + "step": 11339 + }, + { + "epoch": 104.04, + "learning_rate": 6.670489296636085e-05, + "loss": 0.1548, + "step": 11340 + }, + { + "epoch": 104.05, + "learning_rate": 6.666666666666667e-05, + "loss": 0.1431, + "step": 11341 + }, + { + "epoch": 104.06, + "learning_rate": 6.662844036697247e-05, + "loss": 0.1609, + "step": 11342 + }, + { + "epoch": 104.06, + "learning_rate": 6.659021406727829e-05, + "loss": 0.1529, + "step": 11343 + }, + { + "epoch": 104.07, + "learning_rate": 6.65519877675841e-05, + "loss": 0.1122, + "step": 11344 + }, + { + "epoch": 104.08, + "learning_rate": 6.651376146788991e-05, + "loss": 0.1567, + "step": 11345 + }, + { + "epoch": 104.09, + "learning_rate": 6.647553516819572e-05, + "loss": 0.0602, + "step": 11346 + }, + { + "epoch": 104.1, + "learning_rate": 6.643730886850154e-05, + "loss": 0.1439, + "step": 11347 + }, + { + "epoch": 104.11, + "learning_rate": 6.639908256880734e-05, + "loss": 0.1088, + "step": 11348 + }, + { + "epoch": 104.12, + "learning_rate": 6.636085626911314e-05, + "loss": 0.1279, + "step": 11349 + }, + { + "epoch": 104.13, + "learning_rate": 6.632262996941896e-05, + "loss": 0.0929, + "step": 11350 + }, + { + "epoch": 104.14, + "learning_rate": 6.628440366972477e-05, + "loss": 0.0871, + "step": 11351 + }, + { + "epoch": 104.15, + "learning_rate": 6.624617737003058e-05, + "loss": 0.0977, + "step": 11352 + }, + { + "epoch": 104.16, + "learning_rate": 6.620795107033639e-05, + "loss": 0.1222, + "step": 11353 + }, + { + "epoch": 104.17, + "learning_rate": 6.616972477064221e-05, + "loss": 0.1289, + "step": 11354 + }, + { + "epoch": 104.17, + "learning_rate": 6.613149847094801e-05, + "loss": 0.1059, + "step": 11355 + }, + { + "epoch": 104.18, + "learning_rate": 6.609327217125383e-05, + "loss": 0.1447, + "step": 11356 + }, + { + "epoch": 104.19, + "learning_rate": 6.605504587155963e-05, + "loss": 0.1167, + "step": 11357 + }, + { + "epoch": 104.2, + "learning_rate": 6.601681957186545e-05, + "loss": 0.1275, + "step": 11358 + }, + { + "epoch": 104.21, + "learning_rate": 6.597859327217126e-05, + "loss": 0.1227, + "step": 11359 + }, + { + "epoch": 104.22, + "learning_rate": 6.594036697247706e-05, + "loss": 0.0688, + "step": 11360 + }, + { + "epoch": 104.23, + "learning_rate": 6.590214067278286e-05, + "loss": 0.0741, + "step": 11361 + }, + { + "epoch": 104.24, + "learning_rate": 6.586391437308868e-05, + "loss": 0.0573, + "step": 11362 + }, + { + "epoch": 104.25, + "learning_rate": 6.58256880733945e-05, + "loss": 0.0134, + "step": 11363 + }, + { + "epoch": 104.26, + "learning_rate": 6.57874617737003e-05, + "loss": 0.1313, + "step": 11364 + }, + { + "epoch": 104.27, + "learning_rate": 6.574923547400612e-05, + "loss": 0.1145, + "step": 11365 + }, + { + "epoch": 104.28, + "learning_rate": 6.571100917431193e-05, + "loss": 0.1388, + "step": 11366 + }, + { + "epoch": 104.28, + "learning_rate": 6.567278287461774e-05, + "loss": 0.1432, + "step": 11367 + }, + { + "epoch": 104.29, + "learning_rate": 6.563455657492355e-05, + "loss": 0.0947, + "step": 11368 + }, + { + "epoch": 104.3, + "learning_rate": 6.559633027522937e-05, + "loss": 0.1189, + "step": 11369 + }, + { + "epoch": 104.31, + "learning_rate": 6.555810397553517e-05, + "loss": 0.0935, + "step": 11370 + }, + { + "epoch": 104.32, + "learning_rate": 6.551987767584099e-05, + "loss": 0.0994, + "step": 11371 + }, + { + "epoch": 104.33, + "learning_rate": 6.548165137614678e-05, + "loss": 0.1071, + "step": 11372 + }, + { + "epoch": 104.34, + "learning_rate": 6.54434250764526e-05, + "loss": 0.1436, + "step": 11373 + }, + { + "epoch": 104.35, + "learning_rate": 6.54051987767584e-05, + "loss": 0.1256, + "step": 11374 + }, + { + "epoch": 104.36, + "learning_rate": 6.536697247706422e-05, + "loss": 0.1015, + "step": 11375 + }, + { + "epoch": 104.37, + "learning_rate": 6.532874617737002e-05, + "loss": 0.1488, + "step": 11376 + }, + { + "epoch": 104.38, + "learning_rate": 6.529051987767584e-05, + "loss": 0.1438, + "step": 11377 + }, + { + "epoch": 104.39, + "learning_rate": 6.525229357798166e-05, + "loss": 0.1137, + "step": 11378 + }, + { + "epoch": 104.39, + "learning_rate": 6.521406727828746e-05, + "loss": 0.1468, + "step": 11379 + }, + { + "epoch": 104.4, + "learning_rate": 6.517584097859328e-05, + "loss": 0.0868, + "step": 11380 + }, + { + "epoch": 104.41, + "learning_rate": 6.513761467889909e-05, + "loss": 0.1026, + "step": 11381 + }, + { + "epoch": 104.42, + "learning_rate": 6.50993883792049e-05, + "loss": 0.1571, + "step": 11382 + }, + { + "epoch": 104.43, + "learning_rate": 6.506116207951071e-05, + "loss": 0.1149, + "step": 11383 + }, + { + "epoch": 104.44, + "learning_rate": 6.502293577981651e-05, + "loss": 0.0666, + "step": 11384 + }, + { + "epoch": 104.45, + "learning_rate": 6.498470948012232e-05, + "loss": 0.1015, + "step": 11385 + }, + { + "epoch": 104.46, + "learning_rate": 6.494648318042813e-05, + "loss": 0.1462, + "step": 11386 + }, + { + "epoch": 104.47, + "learning_rate": 6.490825688073394e-05, + "loss": 0.0863, + "step": 11387 + }, + { + "epoch": 104.48, + "learning_rate": 6.487003058103976e-05, + "loss": 0.0883, + "step": 11388 + }, + { + "epoch": 104.49, + "learning_rate": 6.483180428134556e-05, + "loss": 0.0725, + "step": 11389 + }, + { + "epoch": 104.5, + "learning_rate": 6.479357798165138e-05, + "loss": 0.0967, + "step": 11390 + }, + { + "epoch": 104.5, + "learning_rate": 6.475535168195718e-05, + "loss": 0.196, + "step": 11391 + }, + { + "epoch": 104.51, + "learning_rate": 6.4717125382263e-05, + "loss": 0.1644, + "step": 11392 + }, + { + "epoch": 104.52, + "learning_rate": 6.467889908256882e-05, + "loss": 0.1383, + "step": 11393 + }, + { + "epoch": 104.53, + "learning_rate": 6.464067278287462e-05, + "loss": 0.132, + "step": 11394 + }, + { + "epoch": 104.54, + "learning_rate": 6.460244648318044e-05, + "loss": 0.1782, + "step": 11395 + }, + { + "epoch": 104.55, + "learning_rate": 6.456422018348623e-05, + "loss": 0.1295, + "step": 11396 + }, + { + "epoch": 104.56, + "learning_rate": 6.452599388379205e-05, + "loss": 0.1272, + "step": 11397 + }, + { + "epoch": 104.57, + "learning_rate": 6.448776758409785e-05, + "loss": 0.0718, + "step": 11398 + }, + { + "epoch": 104.58, + "learning_rate": 6.444954128440367e-05, + "loss": 0.1174, + "step": 11399 + }, + { + "epoch": 104.59, + "learning_rate": 6.441131498470948e-05, + "loss": 0.1051, + "step": 11400 + }, + { + "epoch": 104.6, + "learning_rate": 6.437308868501529e-05, + "loss": 0.134, + "step": 11401 + }, + { + "epoch": 104.61, + "learning_rate": 6.43348623853211e-05, + "loss": 0.1008, + "step": 11402 + }, + { + "epoch": 104.61, + "learning_rate": 6.429663608562692e-05, + "loss": 0.0761, + "step": 11403 + }, + { + "epoch": 104.62, + "learning_rate": 6.425840978593272e-05, + "loss": 0.0731, + "step": 11404 + }, + { + "epoch": 104.63, + "learning_rate": 6.422018348623854e-05, + "loss": 0.1681, + "step": 11405 + }, + { + "epoch": 104.64, + "learning_rate": 6.418195718654436e-05, + "loss": 0.0989, + "step": 11406 + }, + { + "epoch": 104.65, + "learning_rate": 6.414373088685016e-05, + "loss": 0.1082, + "step": 11407 + }, + { + "epoch": 104.66, + "learning_rate": 6.410550458715596e-05, + "loss": 0.0583, + "step": 11408 + }, + { + "epoch": 104.67, + "learning_rate": 6.406727828746177e-05, + "loss": 0.0894, + "step": 11409 + }, + { + "epoch": 104.68, + "learning_rate": 6.402905198776759e-05, + "loss": 0.1302, + "step": 11410 + }, + { + "epoch": 104.69, + "learning_rate": 6.399082568807339e-05, + "loss": 0.121, + "step": 11411 + }, + { + "epoch": 104.7, + "learning_rate": 6.395259938837921e-05, + "loss": 0.1465, + "step": 11412 + }, + { + "epoch": 104.71, + "learning_rate": 6.391437308868501e-05, + "loss": 0.13, + "step": 11413 + }, + { + "epoch": 104.72, + "learning_rate": 6.387614678899083e-05, + "loss": 0.0753, + "step": 11414 + }, + { + "epoch": 104.72, + "learning_rate": 6.383792048929663e-05, + "loss": 0.0618, + "step": 11415 + }, + { + "epoch": 104.73, + "learning_rate": 6.379969418960245e-05, + "loss": 0.0995, + "step": 11416 + }, + { + "epoch": 104.74, + "learning_rate": 6.376146788990826e-05, + "loss": 0.0745, + "step": 11417 + }, + { + "epoch": 104.75, + "learning_rate": 6.372324159021407e-05, + "loss": 0.1379, + "step": 11418 + }, + { + "epoch": 104.76, + "learning_rate": 6.368501529051988e-05, + "loss": 0.1634, + "step": 11419 + }, + { + "epoch": 104.77, + "learning_rate": 6.364678899082568e-05, + "loss": 0.1276, + "step": 11420 + }, + { + "epoch": 104.78, + "learning_rate": 6.36085626911315e-05, + "loss": 0.127, + "step": 11421 + }, + { + "epoch": 104.79, + "learning_rate": 6.35703363914373e-05, + "loss": 0.1125, + "step": 11422 + }, + { + "epoch": 104.8, + "learning_rate": 6.353211009174312e-05, + "loss": 0.1365, + "step": 11423 + }, + { + "epoch": 104.81, + "learning_rate": 6.349388379204893e-05, + "loss": 0.0994, + "step": 11424 + }, + { + "epoch": 104.82, + "learning_rate": 6.345565749235475e-05, + "loss": 0.1503, + "step": 11425 + }, + { + "epoch": 104.83, + "learning_rate": 6.341743119266055e-05, + "loss": 0.126, + "step": 11426 + }, + { + "epoch": 104.83, + "learning_rate": 6.337920489296637e-05, + "loss": 0.1108, + "step": 11427 + }, + { + "epoch": 104.84, + "learning_rate": 6.334097859327217e-05, + "loss": 0.1183, + "step": 11428 + }, + { + "epoch": 104.85, + "learning_rate": 6.330275229357799e-05, + "loss": 0.0623, + "step": 11429 + }, + { + "epoch": 104.86, + "learning_rate": 6.32645259938838e-05, + "loss": 0.1117, + "step": 11430 + }, + { + "epoch": 104.87, + "learning_rate": 6.322629969418961e-05, + "loss": 0.1016, + "step": 11431 + }, + { + "epoch": 104.88, + "learning_rate": 6.31880733944954e-05, + "loss": 0.0679, + "step": 11432 + }, + { + "epoch": 104.89, + "learning_rate": 6.314984709480122e-05, + "loss": 0.1478, + "step": 11433 + }, + { + "epoch": 104.9, + "learning_rate": 6.311162079510702e-05, + "loss": 0.093, + "step": 11434 + }, + { + "epoch": 104.91, + "learning_rate": 6.307339449541284e-05, + "loss": 0.1683, + "step": 11435 + }, + { + "epoch": 104.92, + "learning_rate": 6.303516819571866e-05, + "loss": 0.0996, + "step": 11436 + }, + { + "epoch": 104.93, + "learning_rate": 6.299694189602446e-05, + "loss": 0.1441, + "step": 11437 + }, + { + "epoch": 104.94, + "learning_rate": 6.295871559633028e-05, + "loss": 0.1693, + "step": 11438 + }, + { + "epoch": 104.94, + "learning_rate": 6.292048929663609e-05, + "loss": 0.1138, + "step": 11439 + }, + { + "epoch": 104.95, + "learning_rate": 6.28822629969419e-05, + "loss": 0.1006, + "step": 11440 + }, + { + "epoch": 104.96, + "learning_rate": 6.284403669724771e-05, + "loss": 0.1205, + "step": 11441 + }, + { + "epoch": 104.97, + "learning_rate": 6.280581039755353e-05, + "loss": 0.0731, + "step": 11442 + }, + { + "epoch": 104.98, + "learning_rate": 6.276758409785933e-05, + "loss": 0.1866, + "step": 11443 + }, + { + "epoch": 104.99, + "learning_rate": 6.272935779816513e-05, + "loss": 0.0313, + "step": 11444 + }, + { + "epoch": 105.0, + "learning_rate": 6.269113149847094e-05, + "loss": 0.102, + "step": 11445 + }, + { + "epoch": 105.01, + "learning_rate": 6.265290519877676e-05, + "loss": 0.1632, + "step": 11446 + }, + { + "epoch": 105.02, + "learning_rate": 6.261467889908256e-05, + "loss": 0.143, + "step": 11447 + }, + { + "epoch": 105.03, + "learning_rate": 6.257645259938838e-05, + "loss": 0.1529, + "step": 11448 + }, + { + "epoch": 105.04, + "learning_rate": 6.253822629969418e-05, + "loss": 0.0866, + "step": 11449 + }, + { + "epoch": 105.05, + "learning_rate": 6.25e-05, + "loss": 0.1579, + "step": 11450 + }, + { + "epoch": 105.06, + "learning_rate": 6.246177370030582e-05, + "loss": 0.1255, + "step": 11451 + }, + { + "epoch": 105.06, + "learning_rate": 6.242354740061162e-05, + "loss": 0.1302, + "step": 11452 + }, + { + "epoch": 105.07, + "learning_rate": 6.238532110091744e-05, + "loss": 0.1074, + "step": 11453 + }, + { + "epoch": 105.08, + "learning_rate": 6.234709480122325e-05, + "loss": 0.1428, + "step": 11454 + }, + { + "epoch": 105.09, + "learning_rate": 6.230886850152905e-05, + "loss": 0.0776, + "step": 11455 + }, + { + "epoch": 105.1, + "learning_rate": 6.227064220183487e-05, + "loss": 0.0935, + "step": 11456 + }, + { + "epoch": 105.11, + "learning_rate": 6.223241590214067e-05, + "loss": 0.0945, + "step": 11457 + }, + { + "epoch": 105.12, + "learning_rate": 6.219418960244649e-05, + "loss": 0.1178, + "step": 11458 + }, + { + "epoch": 105.13, + "learning_rate": 6.21559633027523e-05, + "loss": 0.1224, + "step": 11459 + }, + { + "epoch": 105.14, + "learning_rate": 6.21177370030581e-05, + "loss": 0.1132, + "step": 11460 + }, + { + "epoch": 105.15, + "learning_rate": 6.207951070336392e-05, + "loss": 0.1062, + "step": 11461 + }, + { + "epoch": 105.16, + "learning_rate": 6.204128440366972e-05, + "loss": 0.0491, + "step": 11462 + }, + { + "epoch": 105.17, + "learning_rate": 6.200305810397554e-05, + "loss": 0.062, + "step": 11463 + }, + { + "epoch": 105.17, + "learning_rate": 6.196483180428134e-05, + "loss": 0.193, + "step": 11464 + }, + { + "epoch": 105.18, + "learning_rate": 6.192660550458716e-05, + "loss": 0.1407, + "step": 11465 + }, + { + "epoch": 105.19, + "learning_rate": 6.188837920489296e-05, + "loss": 0.1149, + "step": 11466 + }, + { + "epoch": 105.2, + "learning_rate": 6.185015290519878e-05, + "loss": 0.0882, + "step": 11467 + }, + { + "epoch": 105.21, + "learning_rate": 6.181192660550459e-05, + "loss": 0.1061, + "step": 11468 + }, + { + "epoch": 105.22, + "learning_rate": 6.17737003058104e-05, + "loss": 0.1255, + "step": 11469 + }, + { + "epoch": 105.23, + "learning_rate": 6.173547400611621e-05, + "loss": 0.0615, + "step": 11470 + }, + { + "epoch": 105.24, + "learning_rate": 6.169724770642203e-05, + "loss": 0.0923, + "step": 11471 + }, + { + "epoch": 105.25, + "learning_rate": 6.165902140672783e-05, + "loss": 0.076, + "step": 11472 + }, + { + "epoch": 105.26, + "learning_rate": 6.162079510703364e-05, + "loss": 0.1726, + "step": 11473 + }, + { + "epoch": 105.27, + "learning_rate": 6.158256880733945e-05, + "loss": 0.183, + "step": 11474 + }, + { + "epoch": 105.28, + "learning_rate": 6.154434250764526e-05, + "loss": 0.1364, + "step": 11475 + }, + { + "epoch": 105.28, + "learning_rate": 6.150611620795108e-05, + "loss": 0.1449, + "step": 11476 + }, + { + "epoch": 105.29, + "learning_rate": 6.146788990825688e-05, + "loss": 0.1163, + "step": 11477 + }, + { + "epoch": 105.3, + "learning_rate": 6.142966360856268e-05, + "loss": 0.1053, + "step": 11478 + }, + { + "epoch": 105.31, + "learning_rate": 6.13914373088685e-05, + "loss": 0.0769, + "step": 11479 + }, + { + "epoch": 105.32, + "learning_rate": 6.135321100917432e-05, + "loss": 0.1087, + "step": 11480 + }, + { + "epoch": 105.33, + "learning_rate": 6.131498470948012e-05, + "loss": 0.1254, + "step": 11481 + }, + { + "epoch": 105.34, + "learning_rate": 6.127675840978594e-05, + "loss": 0.0864, + "step": 11482 + }, + { + "epoch": 105.35, + "learning_rate": 6.123853211009175e-05, + "loss": 0.1186, + "step": 11483 + }, + { + "epoch": 105.36, + "learning_rate": 6.120030581039755e-05, + "loss": 0.1032, + "step": 11484 + }, + { + "epoch": 105.37, + "learning_rate": 6.116207951070337e-05, + "loss": 0.1424, + "step": 11485 + }, + { + "epoch": 105.38, + "learning_rate": 6.112385321100917e-05, + "loss": 0.1722, + "step": 11486 + }, + { + "epoch": 105.39, + "learning_rate": 6.108562691131499e-05, + "loss": 0.1387, + "step": 11487 + }, + { + "epoch": 105.39, + "learning_rate": 6.10474006116208e-05, + "loss": 0.0699, + "step": 11488 + }, + { + "epoch": 105.4, + "learning_rate": 6.100917431192661e-05, + "loss": 0.165, + "step": 11489 + }, + { + "epoch": 105.41, + "learning_rate": 6.097094801223242e-05, + "loss": 0.1588, + "step": 11490 + }, + { + "epoch": 105.42, + "learning_rate": 6.093272171253823e-05, + "loss": 0.1059, + "step": 11491 + }, + { + "epoch": 105.43, + "learning_rate": 6.089449541284404e-05, + "loss": 0.1225, + "step": 11492 + }, + { + "epoch": 105.44, + "learning_rate": 6.085626911314985e-05, + "loss": 0.1148, + "step": 11493 + }, + { + "epoch": 105.45, + "learning_rate": 6.081804281345566e-05, + "loss": 0.0952, + "step": 11494 + }, + { + "epoch": 105.46, + "learning_rate": 6.077981651376147e-05, + "loss": 0.0633, + "step": 11495 + }, + { + "epoch": 105.47, + "learning_rate": 6.0741590214067276e-05, + "loss": 0.0707, + "step": 11496 + }, + { + "epoch": 105.48, + "learning_rate": 6.070336391437309e-05, + "loss": 0.0671, + "step": 11497 + }, + { + "epoch": 105.49, + "learning_rate": 6.06651376146789e-05, + "loss": 0.1597, + "step": 11498 + }, + { + "epoch": 105.5, + "learning_rate": 6.062691131498471e-05, + "loss": 0.0456, + "step": 11499 + }, + { + "epoch": 105.5, + "learning_rate": 6.058868501529052e-05, + "loss": 0.183, + "step": 11500 + }, + { + "epoch": 105.5, + "eval_cer": 0.13916250618450052, + "eval_loss": 0.7310239672660828, + "eval_runtime": 87.2755, + "eval_samples_per_second": 18.871, + "eval_steps_per_second": 2.36, + "eval_wer": 0.4838116637728526, + "step": 11500 + }, + { + "epoch": 105.51, + "learning_rate": 6.055045871559633e-05, + "loss": 0.1149, + "step": 11501 + }, + { + "epoch": 105.52, + "learning_rate": 6.051223241590214e-05, + "loss": 0.1846, + "step": 11502 + }, + { + "epoch": 105.53, + "learning_rate": 6.0474006116207954e-05, + "loss": 0.1567, + "step": 11503 + }, + { + "epoch": 105.54, + "learning_rate": 6.0435779816513765e-05, + "loss": 0.1304, + "step": 11504 + }, + { + "epoch": 105.55, + "learning_rate": 6.0397553516819576e-05, + "loss": 0.1121, + "step": 11505 + }, + { + "epoch": 105.56, + "learning_rate": 6.035932721712539e-05, + "loss": 0.089, + "step": 11506 + }, + { + "epoch": 105.57, + "learning_rate": 6.03211009174312e-05, + "loss": 0.1207, + "step": 11507 + }, + { + "epoch": 105.58, + "learning_rate": 6.0282874617737e-05, + "loss": 0.1284, + "step": 11508 + }, + { + "epoch": 105.59, + "learning_rate": 6.0244648318042813e-05, + "loss": 0.1378, + "step": 11509 + }, + { + "epoch": 105.6, + "learning_rate": 6.0206422018348624e-05, + "loss": 0.1494, + "step": 11510 + }, + { + "epoch": 105.61, + "learning_rate": 6.0168195718654436e-05, + "loss": 0.1252, + "step": 11511 + }, + { + "epoch": 105.61, + "learning_rate": 6.012996941896025e-05, + "loss": 0.1335, + "step": 11512 + }, + { + "epoch": 105.62, + "learning_rate": 6.009174311926606e-05, + "loss": 0.0935, + "step": 11513 + }, + { + "epoch": 105.63, + "learning_rate": 6.005351681957186e-05, + "loss": 0.2125, + "step": 11514 + }, + { + "epoch": 105.64, + "learning_rate": 6.001529051987767e-05, + "loss": 0.1059, + "step": 11515 + }, + { + "epoch": 105.65, + "learning_rate": 5.9977064220183484e-05, + "loss": 0.1099, + "step": 11516 + }, + { + "epoch": 105.66, + "learning_rate": 5.99388379204893e-05, + "loss": 0.1152, + "step": 11517 + }, + { + "epoch": 105.67, + "learning_rate": 5.990061162079511e-05, + "loss": 0.1102, + "step": 11518 + }, + { + "epoch": 105.68, + "learning_rate": 5.9862385321100924e-05, + "loss": 0.1047, + "step": 11519 + }, + { + "epoch": 105.69, + "learning_rate": 5.982415902140673e-05, + "loss": 0.0868, + "step": 11520 + }, + { + "epoch": 105.7, + "learning_rate": 5.978593272171254e-05, + "loss": 0.1656, + "step": 11521 + }, + { + "epoch": 105.71, + "learning_rate": 5.974770642201835e-05, + "loss": 0.1092, + "step": 11522 + }, + { + "epoch": 105.72, + "learning_rate": 5.970948012232416e-05, + "loss": 0.0496, + "step": 11523 + }, + { + "epoch": 105.72, + "learning_rate": 5.967125382262997e-05, + "loss": 0.084, + "step": 11524 + }, + { + "epoch": 105.73, + "learning_rate": 5.9633027522935784e-05, + "loss": 0.072, + "step": 11525 + }, + { + "epoch": 105.74, + "learning_rate": 5.959480122324159e-05, + "loss": 0.0761, + "step": 11526 + }, + { + "epoch": 105.75, + "learning_rate": 5.95565749235474e-05, + "loss": 0.1955, + "step": 11527 + }, + { + "epoch": 105.76, + "learning_rate": 5.951834862385321e-05, + "loss": 0.1839, + "step": 11528 + }, + { + "epoch": 105.77, + "learning_rate": 5.948012232415902e-05, + "loss": 0.1422, + "step": 11529 + }, + { + "epoch": 105.78, + "learning_rate": 5.944189602446483e-05, + "loss": 0.1684, + "step": 11530 + }, + { + "epoch": 105.79, + "learning_rate": 5.940366972477065e-05, + "loss": 0.1158, + "step": 11531 + }, + { + "epoch": 105.8, + "learning_rate": 5.9365443425076454e-05, + "loss": 0.0765, + "step": 11532 + }, + { + "epoch": 105.81, + "learning_rate": 5.9327217125382265e-05, + "loss": 0.09, + "step": 11533 + }, + { + "epoch": 105.82, + "learning_rate": 5.9288990825688076e-05, + "loss": 0.1207, + "step": 11534 + }, + { + "epoch": 105.83, + "learning_rate": 5.925076452599389e-05, + "loss": 0.1188, + "step": 11535 + }, + { + "epoch": 105.83, + "learning_rate": 5.92125382262997e-05, + "loss": 0.0742, + "step": 11536 + }, + { + "epoch": 105.84, + "learning_rate": 5.917431192660551e-05, + "loss": 0.0842, + "step": 11537 + }, + { + "epoch": 105.85, + "learning_rate": 5.9136085626911314e-05, + "loss": 0.1567, + "step": 11538 + }, + { + "epoch": 105.86, + "learning_rate": 5.9097859327217125e-05, + "loss": 0.1012, + "step": 11539 + }, + { + "epoch": 105.87, + "learning_rate": 5.9059633027522936e-05, + "loss": 0.1172, + "step": 11540 + }, + { + "epoch": 105.88, + "learning_rate": 5.902140672782875e-05, + "loss": 0.1014, + "step": 11541 + }, + { + "epoch": 105.89, + "learning_rate": 5.898318042813456e-05, + "loss": 0.1458, + "step": 11542 + }, + { + "epoch": 105.9, + "learning_rate": 5.894495412844037e-05, + "loss": 0.1219, + "step": 11543 + }, + { + "epoch": 105.91, + "learning_rate": 5.8906727828746174e-05, + "loss": 0.0695, + "step": 11544 + }, + { + "epoch": 105.92, + "learning_rate": 5.8868501529051985e-05, + "loss": 0.1069, + "step": 11545 + }, + { + "epoch": 105.93, + "learning_rate": 5.88302752293578e-05, + "loss": 0.1152, + "step": 11546 + }, + { + "epoch": 105.94, + "learning_rate": 5.8792048929663614e-05, + "loss": 0.0983, + "step": 11547 + }, + { + "epoch": 105.94, + "learning_rate": 5.8753822629969425e-05, + "loss": 0.0939, + "step": 11548 + }, + { + "epoch": 105.95, + "learning_rate": 5.8715596330275236e-05, + "loss": 0.0931, + "step": 11549 + }, + { + "epoch": 105.96, + "learning_rate": 5.867737003058104e-05, + "loss": 0.0735, + "step": 11550 + }, + { + "epoch": 105.97, + "learning_rate": 5.863914373088685e-05, + "loss": 0.0886, + "step": 11551 + }, + { + "epoch": 105.98, + "learning_rate": 5.860091743119266e-05, + "loss": 0.1104, + "step": 11552 + }, + { + "epoch": 105.99, + "learning_rate": 5.856269113149847e-05, + "loss": 0.0426, + "step": 11553 + }, + { + "epoch": 106.0, + "learning_rate": 5.8524464831804284e-05, + "loss": 0.1062, + "step": 11554 + }, + { + "epoch": 106.01, + "learning_rate": 5.8486238532110095e-05, + "loss": 0.2032, + "step": 11555 + }, + { + "epoch": 106.02, + "learning_rate": 5.84480122324159e-05, + "loss": 0.1204, + "step": 11556 + }, + { + "epoch": 106.03, + "learning_rate": 5.840978593272171e-05, + "loss": 0.1138, + "step": 11557 + }, + { + "epoch": 106.04, + "learning_rate": 5.837155963302752e-05, + "loss": 0.1326, + "step": 11558 + }, + { + "epoch": 106.05, + "learning_rate": 5.833333333333333e-05, + "loss": 0.1282, + "step": 11559 + }, + { + "epoch": 106.06, + "learning_rate": 5.8295107033639144e-05, + "loss": 0.1384, + "step": 11560 + }, + { + "epoch": 106.06, + "learning_rate": 5.825688073394496e-05, + "loss": 0.1203, + "step": 11561 + }, + { + "epoch": 106.07, + "learning_rate": 5.8218654434250766e-05, + "loss": 0.1184, + "step": 11562 + }, + { + "epoch": 106.08, + "learning_rate": 5.818042813455658e-05, + "loss": 0.0839, + "step": 11563 + }, + { + "epoch": 106.09, + "learning_rate": 5.814220183486239e-05, + "loss": 0.1442, + "step": 11564 + }, + { + "epoch": 106.1, + "learning_rate": 5.81039755351682e-05, + "loss": 0.1065, + "step": 11565 + }, + { + "epoch": 106.11, + "learning_rate": 5.806574923547401e-05, + "loss": 0.0874, + "step": 11566 + }, + { + "epoch": 106.12, + "learning_rate": 5.802752293577982e-05, + "loss": 0.1089, + "step": 11567 + }, + { + "epoch": 106.13, + "learning_rate": 5.7989296636085626e-05, + "loss": 0.1201, + "step": 11568 + }, + { + "epoch": 106.14, + "learning_rate": 5.7951070336391437e-05, + "loss": 0.0827, + "step": 11569 + }, + { + "epoch": 106.15, + "learning_rate": 5.791284403669725e-05, + "loss": 0.0763, + "step": 11570 + }, + { + "epoch": 106.16, + "learning_rate": 5.787461773700306e-05, + "loss": 0.0766, + "step": 11571 + }, + { + "epoch": 106.17, + "learning_rate": 5.783639143730887e-05, + "loss": 0.094, + "step": 11572 + }, + { + "epoch": 106.17, + "learning_rate": 5.779816513761468e-05, + "loss": 0.0752, + "step": 11573 + }, + { + "epoch": 106.18, + "learning_rate": 5.7759938837920485e-05, + "loss": 0.104, + "step": 11574 + }, + { + "epoch": 106.19, + "learning_rate": 5.7721712538226296e-05, + "loss": 0.0489, + "step": 11575 + }, + { + "epoch": 106.2, + "learning_rate": 5.7683486238532114e-05, + "loss": 0.1036, + "step": 11576 + }, + { + "epoch": 106.21, + "learning_rate": 5.7645259938837925e-05, + "loss": 0.0757, + "step": 11577 + }, + { + "epoch": 106.22, + "learning_rate": 5.7607033639143736e-05, + "loss": 0.0562, + "step": 11578 + }, + { + "epoch": 106.23, + "learning_rate": 5.756880733944955e-05, + "loss": 0.0309, + "step": 11579 + }, + { + "epoch": 106.24, + "learning_rate": 5.753058103975535e-05, + "loss": 0.0858, + "step": 11580 + }, + { + "epoch": 106.25, + "learning_rate": 5.749235474006116e-05, + "loss": 0.0731, + "step": 11581 + }, + { + "epoch": 106.26, + "learning_rate": 5.7454128440366974e-05, + "loss": 0.1881, + "step": 11582 + }, + { + "epoch": 106.27, + "learning_rate": 5.7415902140672785e-05, + "loss": 0.207, + "step": 11583 + }, + { + "epoch": 106.28, + "learning_rate": 5.7377675840978596e-05, + "loss": 0.1399, + "step": 11584 + }, + { + "epoch": 106.28, + "learning_rate": 5.733944954128441e-05, + "loss": 0.1141, + "step": 11585 + }, + { + "epoch": 106.29, + "learning_rate": 5.730122324159021e-05, + "loss": 0.2015, + "step": 11586 + }, + { + "epoch": 106.3, + "learning_rate": 5.726299694189602e-05, + "loss": 0.0885, + "step": 11587 + }, + { + "epoch": 106.31, + "learning_rate": 5.722477064220183e-05, + "loss": 0.0874, + "step": 11588 + }, + { + "epoch": 106.32, + "learning_rate": 5.7186544342507644e-05, + "loss": 0.1123, + "step": 11589 + }, + { + "epoch": 106.33, + "learning_rate": 5.714831804281346e-05, + "loss": 0.1209, + "step": 11590 + }, + { + "epoch": 106.34, + "learning_rate": 5.711009174311927e-05, + "loss": 0.1151, + "step": 11591 + }, + { + "epoch": 106.35, + "learning_rate": 5.707186544342508e-05, + "loss": 0.0776, + "step": 11592 + }, + { + "epoch": 106.36, + "learning_rate": 5.703363914373089e-05, + "loss": 0.0875, + "step": 11593 + }, + { + "epoch": 106.37, + "learning_rate": 5.69954128440367e-05, + "loss": 0.0764, + "step": 11594 + }, + { + "epoch": 106.38, + "learning_rate": 5.695718654434251e-05, + "loss": 0.1086, + "step": 11595 + }, + { + "epoch": 106.39, + "learning_rate": 5.691896024464832e-05, + "loss": 0.097, + "step": 11596 + }, + { + "epoch": 106.39, + "learning_rate": 5.688073394495413e-05, + "loss": 0.11, + "step": 11597 + }, + { + "epoch": 106.4, + "learning_rate": 5.684250764525994e-05, + "loss": 0.1089, + "step": 11598 + }, + { + "epoch": 106.41, + "learning_rate": 5.680428134556575e-05, + "loss": 0.0564, + "step": 11599 + }, + { + "epoch": 106.42, + "learning_rate": 5.676605504587156e-05, + "loss": 0.1189, + "step": 11600 + }, + { + "epoch": 106.43, + "learning_rate": 5.672782874617737e-05, + "loss": 0.1014, + "step": 11601 + }, + { + "epoch": 106.44, + "learning_rate": 5.668960244648318e-05, + "loss": 0.1162, + "step": 11602 + }, + { + "epoch": 106.45, + "learning_rate": 5.665137614678899e-05, + "loss": 0.1201, + "step": 11603 + }, + { + "epoch": 106.46, + "learning_rate": 5.66131498470948e-05, + "loss": 0.1046, + "step": 11604 + }, + { + "epoch": 106.47, + "learning_rate": 5.6574923547400615e-05, + "loss": 0.1008, + "step": 11605 + }, + { + "epoch": 106.48, + "learning_rate": 5.6536697247706426e-05, + "loss": 0.0986, + "step": 11606 + }, + { + "epoch": 106.49, + "learning_rate": 5.649847094801224e-05, + "loss": 0.0808, + "step": 11607 + }, + { + "epoch": 106.5, + "learning_rate": 5.646024464831805e-05, + "loss": 0.1069, + "step": 11608 + }, + { + "epoch": 106.5, + "learning_rate": 5.642201834862386e-05, + "loss": 0.1777, + "step": 11609 + }, + { + "epoch": 106.51, + "learning_rate": 5.638379204892966e-05, + "loss": 0.1382, + "step": 11610 + }, + { + "epoch": 106.52, + "learning_rate": 5.6345565749235474e-05, + "loss": 0.1161, + "step": 11611 + }, + { + "epoch": 106.53, + "learning_rate": 5.6307339449541285e-05, + "loss": 0.1498, + "step": 11612 + }, + { + "epoch": 106.54, + "learning_rate": 5.6269113149847096e-05, + "loss": 0.0779, + "step": 11613 + }, + { + "epoch": 106.55, + "learning_rate": 5.623088685015291e-05, + "loss": 0.1076, + "step": 11614 + }, + { + "epoch": 106.56, + "learning_rate": 5.619266055045872e-05, + "loss": 0.0796, + "step": 11615 + }, + { + "epoch": 106.57, + "learning_rate": 5.615443425076452e-05, + "loss": 0.1182, + "step": 11616 + }, + { + "epoch": 106.58, + "learning_rate": 5.6116207951070334e-05, + "loss": 0.081, + "step": 11617 + }, + { + "epoch": 106.59, + "learning_rate": 5.6077981651376145e-05, + "loss": 0.1399, + "step": 11618 + }, + { + "epoch": 106.6, + "learning_rate": 5.6039755351681956e-05, + "loss": 0.1031, + "step": 11619 + }, + { + "epoch": 106.61, + "learning_rate": 5.6001529051987774e-05, + "loss": 0.1391, + "step": 11620 + }, + { + "epoch": 106.61, + "learning_rate": 5.5963302752293585e-05, + "loss": 0.0922, + "step": 11621 + }, + { + "epoch": 106.62, + "learning_rate": 5.592507645259939e-05, + "loss": 0.1174, + "step": 11622 + }, + { + "epoch": 106.63, + "learning_rate": 5.58868501529052e-05, + "loss": 0.0506, + "step": 11623 + }, + { + "epoch": 106.64, + "learning_rate": 5.584862385321101e-05, + "loss": 0.1023, + "step": 11624 + }, + { + "epoch": 106.65, + "learning_rate": 5.581039755351682e-05, + "loss": 0.0876, + "step": 11625 + }, + { + "epoch": 106.66, + "learning_rate": 5.577217125382263e-05, + "loss": 0.0809, + "step": 11626 + }, + { + "epoch": 106.67, + "learning_rate": 5.5733944954128444e-05, + "loss": 0.1515, + "step": 11627 + }, + { + "epoch": 106.68, + "learning_rate": 5.569571865443425e-05, + "loss": 0.1335, + "step": 11628 + }, + { + "epoch": 106.69, + "learning_rate": 5.565749235474006e-05, + "loss": 0.1391, + "step": 11629 + }, + { + "epoch": 106.7, + "learning_rate": 5.561926605504587e-05, + "loss": 0.0829, + "step": 11630 + }, + { + "epoch": 106.71, + "learning_rate": 5.558103975535168e-05, + "loss": 0.0618, + "step": 11631 + }, + { + "epoch": 106.72, + "learning_rate": 5.554281345565749e-05, + "loss": 0.09, + "step": 11632 + }, + { + "epoch": 106.72, + "learning_rate": 5.5504587155963304e-05, + "loss": 0.104, + "step": 11633 + }, + { + "epoch": 106.73, + "learning_rate": 5.546636085626911e-05, + "loss": 0.0635, + "step": 11634 + }, + { + "epoch": 106.74, + "learning_rate": 5.5428134556574926e-05, + "loss": 0.0582, + "step": 11635 + }, + { + "epoch": 106.75, + "learning_rate": 5.538990825688074e-05, + "loss": 0.1867, + "step": 11636 + }, + { + "epoch": 106.76, + "learning_rate": 5.535168195718655e-05, + "loss": 0.1426, + "step": 11637 + }, + { + "epoch": 106.77, + "learning_rate": 5.531345565749236e-05, + "loss": 0.1135, + "step": 11638 + }, + { + "epoch": 106.78, + "learning_rate": 5.527522935779817e-05, + "loss": 0.0902, + "step": 11639 + }, + { + "epoch": 106.79, + "learning_rate": 5.5237003058103975e-05, + "loss": 0.0947, + "step": 11640 + }, + { + "epoch": 106.8, + "learning_rate": 5.5198776758409786e-05, + "loss": 0.1, + "step": 11641 + }, + { + "epoch": 106.81, + "learning_rate": 5.51605504587156e-05, + "loss": 0.1338, + "step": 11642 + }, + { + "epoch": 106.82, + "learning_rate": 5.512232415902141e-05, + "loss": 0.093, + "step": 11643 + }, + { + "epoch": 106.83, + "learning_rate": 5.508409785932722e-05, + "loss": 0.0862, + "step": 11644 + }, + { + "epoch": 106.83, + "learning_rate": 5.504587155963303e-05, + "loss": 0.1616, + "step": 11645 + }, + { + "epoch": 106.84, + "learning_rate": 5.5007645259938834e-05, + "loss": 0.1099, + "step": 11646 + }, + { + "epoch": 106.85, + "learning_rate": 5.4969418960244645e-05, + "loss": 0.0975, + "step": 11647 + }, + { + "epoch": 106.86, + "learning_rate": 5.4931192660550456e-05, + "loss": 0.1055, + "step": 11648 + }, + { + "epoch": 106.87, + "learning_rate": 5.4892966360856274e-05, + "loss": 0.1799, + "step": 11649 + }, + { + "epoch": 106.88, + "learning_rate": 5.4854740061162085e-05, + "loss": 0.1202, + "step": 11650 + }, + { + "epoch": 106.89, + "learning_rate": 5.4816513761467896e-05, + "loss": 0.1298, + "step": 11651 + }, + { + "epoch": 106.9, + "learning_rate": 5.47782874617737e-05, + "loss": 0.0825, + "step": 11652 + }, + { + "epoch": 106.91, + "learning_rate": 5.474006116207951e-05, + "loss": 0.0928, + "step": 11653 + }, + { + "epoch": 106.92, + "learning_rate": 5.470183486238532e-05, + "loss": 0.0623, + "step": 11654 + }, + { + "epoch": 106.93, + "learning_rate": 5.4663608562691134e-05, + "loss": 0.1421, + "step": 11655 + }, + { + "epoch": 106.94, + "learning_rate": 5.4625382262996945e-05, + "loss": 0.0974, + "step": 11656 + }, + { + "epoch": 106.94, + "learning_rate": 5.4587155963302756e-05, + "loss": 0.1427, + "step": 11657 + }, + { + "epoch": 106.95, + "learning_rate": 5.454892966360856e-05, + "loss": 0.1166, + "step": 11658 + }, + { + "epoch": 106.96, + "learning_rate": 5.451070336391437e-05, + "loss": 0.1276, + "step": 11659 + }, + { + "epoch": 106.97, + "learning_rate": 5.447247706422018e-05, + "loss": 0.0465, + "step": 11660 + }, + { + "epoch": 106.98, + "learning_rate": 5.4434250764525994e-05, + "loss": 0.0645, + "step": 11661 + }, + { + "epoch": 106.99, + "learning_rate": 5.4396024464831805e-05, + "loss": 0.0682, + "step": 11662 + }, + { + "epoch": 107.0, + "learning_rate": 5.4357798165137616e-05, + "loss": 0.1218, + "step": 11663 + }, + { + "epoch": 107.01, + "learning_rate": 5.431957186544343e-05, + "loss": 0.1622, + "step": 11664 + }, + { + "epoch": 107.02, + "learning_rate": 5.428134556574924e-05, + "loss": 0.1284, + "step": 11665 + }, + { + "epoch": 107.03, + "learning_rate": 5.424311926605505e-05, + "loss": 0.1085, + "step": 11666 + }, + { + "epoch": 107.04, + "learning_rate": 5.420489296636086e-05, + "loss": 0.0532, + "step": 11667 + }, + { + "epoch": 107.05, + "learning_rate": 5.416666666666667e-05, + "loss": 0.149, + "step": 11668 + }, + { + "epoch": 107.06, + "learning_rate": 5.412844036697248e-05, + "loss": 0.1032, + "step": 11669 + }, + { + "epoch": 107.06, + "learning_rate": 5.4090214067278286e-05, + "loss": 0.1112, + "step": 11670 + }, + { + "epoch": 107.07, + "learning_rate": 5.40519877675841e-05, + "loss": 0.0755, + "step": 11671 + }, + { + "epoch": 107.08, + "learning_rate": 5.401376146788991e-05, + "loss": 0.0646, + "step": 11672 + }, + { + "epoch": 107.09, + "learning_rate": 5.397553516819572e-05, + "loss": 0.091, + "step": 11673 + }, + { + "epoch": 107.1, + "learning_rate": 5.393730886850153e-05, + "loss": 0.1021, + "step": 11674 + }, + { + "epoch": 107.11, + "learning_rate": 5.389908256880734e-05, + "loss": 0.1521, + "step": 11675 + }, + { + "epoch": 107.12, + "learning_rate": 5.3860856269113146e-05, + "loss": 0.1017, + "step": 11676 + }, + { + "epoch": 107.13, + "learning_rate": 5.382262996941896e-05, + "loss": 0.108, + "step": 11677 + }, + { + "epoch": 107.14, + "learning_rate": 5.378440366972477e-05, + "loss": 0.1644, + "step": 11678 + }, + { + "epoch": 107.15, + "learning_rate": 5.3746177370030586e-05, + "loss": 0.0953, + "step": 11679 + }, + { + "epoch": 107.16, + "learning_rate": 5.37079510703364e-05, + "loss": 0.0849, + "step": 11680 + }, + { + "epoch": 107.17, + "learning_rate": 5.366972477064221e-05, + "loss": 0.132, + "step": 11681 + }, + { + "epoch": 107.17, + "learning_rate": 5.363149847094801e-05, + "loss": 0.0911, + "step": 11682 + }, + { + "epoch": 107.18, + "learning_rate": 5.359327217125382e-05, + "loss": 0.1281, + "step": 11683 + }, + { + "epoch": 107.19, + "learning_rate": 5.3555045871559634e-05, + "loss": 0.0747, + "step": 11684 + }, + { + "epoch": 107.2, + "learning_rate": 5.3516819571865445e-05, + "loss": 0.0964, + "step": 11685 + }, + { + "epoch": 107.21, + "learning_rate": 5.3478593272171257e-05, + "loss": 0.094, + "step": 11686 + }, + { + "epoch": 107.22, + "learning_rate": 5.344036697247707e-05, + "loss": 0.1391, + "step": 11687 + }, + { + "epoch": 107.23, + "learning_rate": 5.340214067278287e-05, + "loss": 0.0954, + "step": 11688 + }, + { + "epoch": 107.24, + "learning_rate": 5.336391437308868e-05, + "loss": 0.0806, + "step": 11689 + }, + { + "epoch": 107.25, + "learning_rate": 5.3325688073394494e-05, + "loss": 0.0536, + "step": 11690 + }, + { + "epoch": 107.26, + "learning_rate": 5.3287461773700305e-05, + "loss": 0.1382, + "step": 11691 + }, + { + "epoch": 107.27, + "learning_rate": 5.3249235474006116e-05, + "loss": 0.1509, + "step": 11692 + }, + { + "epoch": 107.28, + "learning_rate": 5.3211009174311934e-05, + "loss": 0.1001, + "step": 11693 + }, + { + "epoch": 107.28, + "learning_rate": 5.317278287461774e-05, + "loss": 0.1319, + "step": 11694 + }, + { + "epoch": 107.29, + "learning_rate": 5.313455657492355e-05, + "loss": 0.1095, + "step": 11695 + }, + { + "epoch": 107.3, + "learning_rate": 5.309633027522936e-05, + "loss": 0.0815, + "step": 11696 + }, + { + "epoch": 107.31, + "learning_rate": 5.305810397553517e-05, + "loss": 0.1518, + "step": 11697 + }, + { + "epoch": 107.32, + "learning_rate": 5.301987767584098e-05, + "loss": 0.1402, + "step": 11698 + }, + { + "epoch": 107.33, + "learning_rate": 5.2981651376146794e-05, + "loss": 0.1094, + "step": 11699 + }, + { + "epoch": 107.34, + "learning_rate": 5.29434250764526e-05, + "loss": 0.0687, + "step": 11700 + }, + { + "epoch": 107.35, + "learning_rate": 5.290519877675841e-05, + "loss": 0.1222, + "step": 11701 + }, + { + "epoch": 107.36, + "learning_rate": 5.286697247706422e-05, + "loss": 0.0862, + "step": 11702 + }, + { + "epoch": 107.37, + "learning_rate": 5.282874617737003e-05, + "loss": 0.0944, + "step": 11703 + }, + { + "epoch": 107.38, + "learning_rate": 5.279051987767584e-05, + "loss": 0.053, + "step": 11704 + }, + { + "epoch": 107.39, + "learning_rate": 5.275229357798165e-05, + "loss": 0.0615, + "step": 11705 + }, + { + "epoch": 107.39, + "learning_rate": 5.271406727828746e-05, + "loss": 0.1179, + "step": 11706 + }, + { + "epoch": 107.4, + "learning_rate": 5.267584097859327e-05, + "loss": 0.1071, + "step": 11707 + }, + { + "epoch": 107.41, + "learning_rate": 5.2637614678899086e-05, + "loss": 0.1362, + "step": 11708 + }, + { + "epoch": 107.42, + "learning_rate": 5.25993883792049e-05, + "loss": 0.0738, + "step": 11709 + }, + { + "epoch": 107.43, + "learning_rate": 5.256116207951071e-05, + "loss": 0.0996, + "step": 11710 + }, + { + "epoch": 107.44, + "learning_rate": 5.252293577981652e-05, + "loss": 0.0708, + "step": 11711 + }, + { + "epoch": 107.45, + "learning_rate": 5.2484709480122324e-05, + "loss": 0.1078, + "step": 11712 + }, + { + "epoch": 107.46, + "learning_rate": 5.2446483180428135e-05, + "loss": 0.0393, + "step": 11713 + }, + { + "epoch": 107.47, + "learning_rate": 5.2408256880733946e-05, + "loss": 0.1037, + "step": 11714 + }, + { + "epoch": 107.48, + "learning_rate": 5.237003058103976e-05, + "loss": 0.1404, + "step": 11715 + }, + { + "epoch": 107.49, + "learning_rate": 5.233180428134557e-05, + "loss": 0.1171, + "step": 11716 + }, + { + "epoch": 107.5, + "learning_rate": 5.229357798165138e-05, + "loss": 0.0624, + "step": 11717 + }, + { + "epoch": 107.5, + "learning_rate": 5.2255351681957184e-05, + "loss": 0.1997, + "step": 11718 + }, + { + "epoch": 107.51, + "learning_rate": 5.2217125382262995e-05, + "loss": 0.1714, + "step": 11719 + }, + { + "epoch": 107.52, + "learning_rate": 5.2178899082568806e-05, + "loss": 0.1157, + "step": 11720 + }, + { + "epoch": 107.53, + "learning_rate": 5.214067278287462e-05, + "loss": 0.1192, + "step": 11721 + }, + { + "epoch": 107.54, + "learning_rate": 5.210244648318043e-05, + "loss": 0.0757, + "step": 11722 + }, + { + "epoch": 107.55, + "learning_rate": 5.2064220183486246e-05, + "loss": 0.1128, + "step": 11723 + }, + { + "epoch": 107.56, + "learning_rate": 5.202599388379205e-05, + "loss": 0.0894, + "step": 11724 + }, + { + "epoch": 107.57, + "learning_rate": 5.198776758409786e-05, + "loss": 0.2193, + "step": 11725 + }, + { + "epoch": 107.58, + "learning_rate": 5.194954128440367e-05, + "loss": 0.1408, + "step": 11726 + }, + { + "epoch": 107.59, + "learning_rate": 5.191131498470948e-05, + "loss": 0.1915, + "step": 11727 + }, + { + "epoch": 107.6, + "learning_rate": 5.1873088685015294e-05, + "loss": 0.1049, + "step": 11728 + }, + { + "epoch": 107.61, + "learning_rate": 5.1834862385321105e-05, + "loss": 0.0807, + "step": 11729 + }, + { + "epoch": 107.61, + "learning_rate": 5.179663608562691e-05, + "loss": 0.1106, + "step": 11730 + }, + { + "epoch": 107.62, + "learning_rate": 5.175840978593272e-05, + "loss": 0.1258, + "step": 11731 + }, + { + "epoch": 107.63, + "learning_rate": 5.172018348623853e-05, + "loss": 0.101, + "step": 11732 + }, + { + "epoch": 107.64, + "learning_rate": 5.168195718654434e-05, + "loss": 0.0994, + "step": 11733 + }, + { + "epoch": 107.65, + "learning_rate": 5.1643730886850154e-05, + "loss": 0.1093, + "step": 11734 + }, + { + "epoch": 107.66, + "learning_rate": 5.1605504587155965e-05, + "loss": 0.0916, + "step": 11735 + }, + { + "epoch": 107.67, + "learning_rate": 5.156727828746177e-05, + "loss": 0.105, + "step": 11736 + }, + { + "epoch": 107.68, + "learning_rate": 5.152905198776758e-05, + "loss": 0.0835, + "step": 11737 + }, + { + "epoch": 107.69, + "learning_rate": 5.14908256880734e-05, + "loss": 0.0863, + "step": 11738 + }, + { + "epoch": 107.7, + "learning_rate": 5.145259938837921e-05, + "loss": 0.1619, + "step": 11739 + }, + { + "epoch": 107.71, + "learning_rate": 5.141437308868502e-05, + "loss": 0.1197, + "step": 11740 + }, + { + "epoch": 107.72, + "learning_rate": 5.137614678899083e-05, + "loss": 0.0641, + "step": 11741 + }, + { + "epoch": 107.72, + "learning_rate": 5.1337920489296635e-05, + "loss": 0.164, + "step": 11742 + }, + { + "epoch": 107.73, + "learning_rate": 5.1299694189602447e-05, + "loss": 0.0825, + "step": 11743 + }, + { + "epoch": 107.74, + "learning_rate": 5.126146788990826e-05, + "loss": 0.0959, + "step": 11744 + }, + { + "epoch": 107.75, + "learning_rate": 5.122324159021407e-05, + "loss": 0.1271, + "step": 11745 + }, + { + "epoch": 107.76, + "learning_rate": 5.118501529051988e-05, + "loss": 0.1417, + "step": 11746 + }, + { + "epoch": 107.77, + "learning_rate": 5.114678899082569e-05, + "loss": 0.1477, + "step": 11747 + }, + { + "epoch": 107.78, + "learning_rate": 5.1108562691131495e-05, + "loss": 0.0807, + "step": 11748 + }, + { + "epoch": 107.79, + "learning_rate": 5.1070336391437306e-05, + "loss": 0.1875, + "step": 11749 + }, + { + "epoch": 107.8, + "learning_rate": 5.103211009174312e-05, + "loss": 0.1407, + "step": 11750 + }, + { + "epoch": 107.81, + "learning_rate": 5.099388379204893e-05, + "loss": 0.1304, + "step": 11751 + }, + { + "epoch": 107.82, + "learning_rate": 5.0955657492354746e-05, + "loss": 0.0641, + "step": 11752 + }, + { + "epoch": 107.83, + "learning_rate": 5.091743119266056e-05, + "loss": 0.08, + "step": 11753 + }, + { + "epoch": 107.83, + "learning_rate": 5.087920489296636e-05, + "loss": 0.1035, + "step": 11754 + }, + { + "epoch": 107.84, + "learning_rate": 5.084097859327217e-05, + "loss": 0.1463, + "step": 11755 + }, + { + "epoch": 107.85, + "learning_rate": 5.0802752293577984e-05, + "loss": 0.0854, + "step": 11756 + }, + { + "epoch": 107.86, + "learning_rate": 5.0764525993883795e-05, + "loss": 0.0784, + "step": 11757 + }, + { + "epoch": 107.87, + "learning_rate": 5.0726299694189606e-05, + "loss": 0.1003, + "step": 11758 + }, + { + "epoch": 107.88, + "learning_rate": 5.068807339449542e-05, + "loss": 0.0643, + "step": 11759 + }, + { + "epoch": 107.89, + "learning_rate": 5.064984709480122e-05, + "loss": 0.1341, + "step": 11760 + }, + { + "epoch": 107.9, + "learning_rate": 5.061162079510703e-05, + "loss": 0.1387, + "step": 11761 + }, + { + "epoch": 107.91, + "learning_rate": 5.057339449541284e-05, + "loss": 0.0687, + "step": 11762 + }, + { + "epoch": 107.92, + "learning_rate": 5.0535168195718654e-05, + "loss": 0.135, + "step": 11763 + }, + { + "epoch": 107.93, + "learning_rate": 5.0496941896024465e-05, + "loss": 0.0926, + "step": 11764 + }, + { + "epoch": 107.94, + "learning_rate": 5.0458715596330276e-05, + "loss": 0.066, + "step": 11765 + }, + { + "epoch": 107.94, + "learning_rate": 5.042048929663608e-05, + "loss": 0.1003, + "step": 11766 + }, + { + "epoch": 107.95, + "learning_rate": 5.03822629969419e-05, + "loss": 0.1264, + "step": 11767 + }, + { + "epoch": 107.96, + "learning_rate": 5.034403669724771e-05, + "loss": 0.034, + "step": 11768 + }, + { + "epoch": 107.97, + "learning_rate": 5.030581039755352e-05, + "loss": 0.0775, + "step": 11769 + }, + { + "epoch": 107.98, + "learning_rate": 5.026758409785933e-05, + "loss": 0.1243, + "step": 11770 + }, + { + "epoch": 107.99, + "learning_rate": 5.022935779816514e-05, + "loss": 0.0525, + "step": 11771 + }, + { + "epoch": 108.0, + "learning_rate": 5.019113149847095e-05, + "loss": 0.1321, + "step": 11772 + }, + { + "epoch": 108.01, + "learning_rate": 5.015290519877676e-05, + "loss": 0.1469, + "step": 11773 + }, + { + "epoch": 108.02, + "learning_rate": 5.011467889908257e-05, + "loss": 0.1507, + "step": 11774 + }, + { + "epoch": 108.03, + "learning_rate": 5.007645259938838e-05, + "loss": 0.1157, + "step": 11775 + }, + { + "epoch": 108.04, + "learning_rate": 5.003822629969419e-05, + "loss": 0.1836, + "step": 11776 + }, + { + "epoch": 108.05, + "learning_rate": 5e-05, + "loss": 0.1412, + "step": 11777 + }, + { + "epoch": 108.06, + "learning_rate": 4.996177370030581e-05, + "loss": 0.1072, + "step": 11778 + }, + { + "epoch": 108.06, + "learning_rate": 4.992354740061162e-05, + "loss": 0.0615, + "step": 11779 + }, + { + "epoch": 108.07, + "learning_rate": 4.988532110091743e-05, + "loss": 0.0912, + "step": 11780 + }, + { + "epoch": 108.08, + "learning_rate": 4.984709480122324e-05, + "loss": 0.056, + "step": 11781 + }, + { + "epoch": 108.09, + "learning_rate": 4.980886850152906e-05, + "loss": 0.1506, + "step": 11782 + }, + { + "epoch": 108.1, + "learning_rate": 4.977064220183487e-05, + "loss": 0.1406, + "step": 11783 + }, + { + "epoch": 108.11, + "learning_rate": 4.973241590214067e-05, + "loss": 0.1281, + "step": 11784 + }, + { + "epoch": 108.12, + "learning_rate": 4.9694189602446484e-05, + "loss": 0.0919, + "step": 11785 + }, + { + "epoch": 108.13, + "learning_rate": 4.9655963302752295e-05, + "loss": 0.1333, + "step": 11786 + }, + { + "epoch": 108.14, + "learning_rate": 4.9617737003058106e-05, + "loss": 0.0999, + "step": 11787 + }, + { + "epoch": 108.15, + "learning_rate": 4.957951070336392e-05, + "loss": 0.0972, + "step": 11788 + }, + { + "epoch": 108.16, + "learning_rate": 4.954128440366973e-05, + "loss": 0.0635, + "step": 11789 + }, + { + "epoch": 108.17, + "learning_rate": 4.950305810397553e-05, + "loss": 0.1252, + "step": 11790 + }, + { + "epoch": 108.17, + "learning_rate": 4.9464831804281344e-05, + "loss": 0.0921, + "step": 11791 + }, + { + "epoch": 108.18, + "learning_rate": 4.9426605504587155e-05, + "loss": 0.102, + "step": 11792 + }, + { + "epoch": 108.19, + "learning_rate": 4.9388379204892966e-05, + "loss": 0.0686, + "step": 11793 + }, + { + "epoch": 108.2, + "learning_rate": 4.935015290519878e-05, + "loss": 0.0879, + "step": 11794 + }, + { + "epoch": 108.21, + "learning_rate": 4.931192660550459e-05, + "loss": 0.0858, + "step": 11795 + }, + { + "epoch": 108.22, + "learning_rate": 4.927370030581039e-05, + "loss": 0.1057, + "step": 11796 + }, + { + "epoch": 108.23, + "learning_rate": 4.923547400611621e-05, + "loss": 0.1558, + "step": 11797 + }, + { + "epoch": 108.24, + "learning_rate": 4.919724770642202e-05, + "loss": 0.0503, + "step": 11798 + }, + { + "epoch": 108.25, + "learning_rate": 4.915902140672783e-05, + "loss": 0.0569, + "step": 11799 + }, + { + "epoch": 108.26, + "learning_rate": 4.912079510703364e-05, + "loss": 0.1542, + "step": 11800 + }, + { + "epoch": 108.27, + "learning_rate": 4.9082568807339454e-05, + "loss": 0.1403, + "step": 11801 + }, + { + "epoch": 108.28, + "learning_rate": 4.904434250764526e-05, + "loss": 0.1593, + "step": 11802 + }, + { + "epoch": 108.28, + "learning_rate": 4.900611620795107e-05, + "loss": 0.1755, + "step": 11803 + }, + { + "epoch": 108.29, + "learning_rate": 4.896788990825688e-05, + "loss": 0.1152, + "step": 11804 + }, + { + "epoch": 108.3, + "learning_rate": 4.892966360856269e-05, + "loss": 0.0923, + "step": 11805 + }, + { + "epoch": 108.31, + "learning_rate": 4.88914373088685e-05, + "loss": 0.1175, + "step": 11806 + }, + { + "epoch": 108.32, + "learning_rate": 4.8853211009174314e-05, + "loss": 0.085, + "step": 11807 + }, + { + "epoch": 108.33, + "learning_rate": 4.881498470948012e-05, + "loss": 0.0799, + "step": 11808 + }, + { + "epoch": 108.34, + "learning_rate": 4.877675840978593e-05, + "loss": 0.1805, + "step": 11809 + }, + { + "epoch": 108.35, + "learning_rate": 4.873853211009174e-05, + "loss": 0.112, + "step": 11810 + }, + { + "epoch": 108.36, + "learning_rate": 4.870030581039756e-05, + "loss": 0.1682, + "step": 11811 + }, + { + "epoch": 108.37, + "learning_rate": 4.866207951070337e-05, + "loss": 0.1274, + "step": 11812 + }, + { + "epoch": 108.38, + "learning_rate": 4.862385321100918e-05, + "loss": 0.1297, + "step": 11813 + }, + { + "epoch": 108.39, + "learning_rate": 4.8585626911314985e-05, + "loss": 0.1578, + "step": 11814 + }, + { + "epoch": 108.39, + "learning_rate": 4.8547400611620796e-05, + "loss": 0.1241, + "step": 11815 + }, + { + "epoch": 108.4, + "learning_rate": 4.850917431192661e-05, + "loss": 0.0552, + "step": 11816 + }, + { + "epoch": 108.41, + "learning_rate": 4.847094801223242e-05, + "loss": 0.086, + "step": 11817 + }, + { + "epoch": 108.42, + "learning_rate": 4.843272171253823e-05, + "loss": 0.06, + "step": 11818 + }, + { + "epoch": 108.43, + "learning_rate": 4.839449541284404e-05, + "loss": 0.0909, + "step": 11819 + }, + { + "epoch": 108.44, + "learning_rate": 4.8356269113149844e-05, + "loss": 0.0681, + "step": 11820 + }, + { + "epoch": 108.45, + "learning_rate": 4.8318042813455655e-05, + "loss": 0.0932, + "step": 11821 + }, + { + "epoch": 108.46, + "learning_rate": 4.8279816513761466e-05, + "loss": 0.0535, + "step": 11822 + }, + { + "epoch": 108.47, + "learning_rate": 4.824159021406728e-05, + "loss": 0.1123, + "step": 11823 + }, + { + "epoch": 108.48, + "learning_rate": 4.820336391437309e-05, + "loss": 0.0653, + "step": 11824 + }, + { + "epoch": 108.49, + "learning_rate": 4.81651376146789e-05, + "loss": 0.1182, + "step": 11825 + }, + { + "epoch": 108.5, + "learning_rate": 4.812691131498471e-05, + "loss": 0.0623, + "step": 11826 + }, + { + "epoch": 108.5, + "learning_rate": 4.808868501529052e-05, + "loss": 0.145, + "step": 11827 + }, + { + "epoch": 108.51, + "learning_rate": 4.805045871559633e-05, + "loss": 0.1242, + "step": 11828 + }, + { + "epoch": 108.52, + "learning_rate": 4.8012232415902144e-05, + "loss": 0.1009, + "step": 11829 + }, + { + "epoch": 108.53, + "learning_rate": 4.7974006116207955e-05, + "loss": 0.0781, + "step": 11830 + }, + { + "epoch": 108.54, + "learning_rate": 4.7935779816513766e-05, + "loss": 0.1108, + "step": 11831 + }, + { + "epoch": 108.55, + "learning_rate": 4.789755351681957e-05, + "loss": 0.1157, + "step": 11832 + }, + { + "epoch": 108.56, + "learning_rate": 4.785932721712538e-05, + "loss": 0.073, + "step": 11833 + }, + { + "epoch": 108.57, + "learning_rate": 4.782110091743119e-05, + "loss": 0.1351, + "step": 11834 + }, + { + "epoch": 108.58, + "learning_rate": 4.7782874617737003e-05, + "loss": 0.0899, + "step": 11835 + }, + { + "epoch": 108.59, + "learning_rate": 4.7744648318042815e-05, + "loss": 0.1105, + "step": 11836 + }, + { + "epoch": 108.6, + "learning_rate": 4.7706422018348626e-05, + "loss": 0.0915, + "step": 11837 + }, + { + "epoch": 108.61, + "learning_rate": 4.766819571865443e-05, + "loss": 0.0895, + "step": 11838 + }, + { + "epoch": 108.61, + "learning_rate": 4.762996941896024e-05, + "loss": 0.0877, + "step": 11839 + }, + { + "epoch": 108.62, + "learning_rate": 4.759174311926605e-05, + "loss": 0.1515, + "step": 11840 + }, + { + "epoch": 108.63, + "learning_rate": 4.755351681957187e-05, + "loss": 0.1047, + "step": 11841 + }, + { + "epoch": 108.64, + "learning_rate": 4.751529051987768e-05, + "loss": 0.0695, + "step": 11842 + }, + { + "epoch": 108.65, + "learning_rate": 4.747706422018349e-05, + "loss": 0.1372, + "step": 11843 + }, + { + "epoch": 108.66, + "learning_rate": 4.7438837920489296e-05, + "loss": 0.1035, + "step": 11844 + }, + { + "epoch": 108.67, + "learning_rate": 4.740061162079511e-05, + "loss": 0.0471, + "step": 11845 + }, + { + "epoch": 108.68, + "learning_rate": 4.736238532110092e-05, + "loss": 0.1052, + "step": 11846 + }, + { + "epoch": 108.69, + "learning_rate": 4.732415902140673e-05, + "loss": 0.0895, + "step": 11847 + }, + { + "epoch": 108.7, + "learning_rate": 4.728593272171254e-05, + "loss": 0.0953, + "step": 11848 + }, + { + "epoch": 108.71, + "learning_rate": 4.724770642201835e-05, + "loss": 0.0429, + "step": 11849 + }, + { + "epoch": 108.72, + "learning_rate": 4.7209480122324156e-05, + "loss": 0.0504, + "step": 11850 + }, + { + "epoch": 108.72, + "learning_rate": 4.717125382262997e-05, + "loss": 0.0735, + "step": 11851 + }, + { + "epoch": 108.73, + "learning_rate": 4.713302752293578e-05, + "loss": 0.0891, + "step": 11852 + }, + { + "epoch": 108.74, + "learning_rate": 4.709480122324159e-05, + "loss": 0.0201, + "step": 11853 + }, + { + "epoch": 108.75, + "learning_rate": 4.70565749235474e-05, + "loss": 0.1237, + "step": 11854 + }, + { + "epoch": 108.76, + "learning_rate": 4.701834862385322e-05, + "loss": 0.1933, + "step": 11855 + }, + { + "epoch": 108.77, + "learning_rate": 4.698012232415902e-05, + "loss": 0.1287, + "step": 11856 + }, + { + "epoch": 108.78, + "learning_rate": 4.694189602446483e-05, + "loss": 0.0688, + "step": 11857 + }, + { + "epoch": 108.79, + "learning_rate": 4.6903669724770644e-05, + "loss": 0.1254, + "step": 11858 + }, + { + "epoch": 108.8, + "learning_rate": 4.6865443425076455e-05, + "loss": 0.1274, + "step": 11859 + }, + { + "epoch": 108.81, + "learning_rate": 4.6827217125382266e-05, + "loss": 0.1463, + "step": 11860 + }, + { + "epoch": 108.82, + "learning_rate": 4.678899082568808e-05, + "loss": 0.0692, + "step": 11861 + }, + { + "epoch": 108.83, + "learning_rate": 4.675076452599388e-05, + "loss": 0.1179, + "step": 11862 + }, + { + "epoch": 108.83, + "learning_rate": 4.671253822629969e-05, + "loss": 0.1101, + "step": 11863 + }, + { + "epoch": 108.84, + "learning_rate": 4.6674311926605504e-05, + "loss": 0.0844, + "step": 11864 + }, + { + "epoch": 108.85, + "learning_rate": 4.6636085626911315e-05, + "loss": 0.1352, + "step": 11865 + }, + { + "epoch": 108.86, + "learning_rate": 4.6597859327217126e-05, + "loss": 0.091, + "step": 11866 + }, + { + "epoch": 108.87, + "learning_rate": 4.655963302752294e-05, + "loss": 0.1393, + "step": 11867 + }, + { + "epoch": 108.88, + "learning_rate": 4.652140672782874e-05, + "loss": 0.092, + "step": 11868 + }, + { + "epoch": 108.89, + "learning_rate": 4.648318042813455e-05, + "loss": 0.1015, + "step": 11869 + }, + { + "epoch": 108.9, + "learning_rate": 4.644495412844037e-05, + "loss": 0.1395, + "step": 11870 + }, + { + "epoch": 108.91, + "learning_rate": 4.640672782874618e-05, + "loss": 0.097, + "step": 11871 + }, + { + "epoch": 108.92, + "learning_rate": 4.636850152905199e-05, + "loss": 0.0681, + "step": 11872 + }, + { + "epoch": 108.93, + "learning_rate": 4.6330275229357804e-05, + "loss": 0.1476, + "step": 11873 + }, + { + "epoch": 108.94, + "learning_rate": 4.629204892966361e-05, + "loss": 0.0618, + "step": 11874 + }, + { + "epoch": 108.94, + "learning_rate": 4.625382262996942e-05, + "loss": 0.1395, + "step": 11875 + }, + { + "epoch": 108.95, + "learning_rate": 4.621559633027523e-05, + "loss": 0.0913, + "step": 11876 + }, + { + "epoch": 108.96, + "learning_rate": 4.617737003058104e-05, + "loss": 0.1167, + "step": 11877 + }, + { + "epoch": 108.97, + "learning_rate": 4.613914373088685e-05, + "loss": 0.0848, + "step": 11878 + }, + { + "epoch": 108.98, + "learning_rate": 4.610091743119266e-05, + "loss": 0.0731, + "step": 11879 + }, + { + "epoch": 108.99, + "learning_rate": 4.606269113149847e-05, + "loss": 0.0943, + "step": 11880 + }, + { + "epoch": 109.0, + "learning_rate": 4.602446483180428e-05, + "loss": 0.065, + "step": 11881 + }, + { + "epoch": 109.01, + "learning_rate": 4.598623853211009e-05, + "loss": 0.1668, + "step": 11882 + }, + { + "epoch": 109.02, + "learning_rate": 4.59480122324159e-05, + "loss": 0.1344, + "step": 11883 + }, + { + "epoch": 109.03, + "learning_rate": 4.590978593272171e-05, + "loss": 0.103, + "step": 11884 + }, + { + "epoch": 109.04, + "learning_rate": 4.587155963302753e-05, + "loss": 0.0984, + "step": 11885 + }, + { + "epoch": 109.05, + "learning_rate": 4.5833333333333334e-05, + "loss": 0.094, + "step": 11886 + }, + { + "epoch": 109.06, + "learning_rate": 4.5795107033639145e-05, + "loss": 0.0916, + "step": 11887 + }, + { + "epoch": 109.06, + "learning_rate": 4.5756880733944956e-05, + "loss": 0.1134, + "step": 11888 + }, + { + "epoch": 109.07, + "learning_rate": 4.571865443425077e-05, + "loss": 0.1099, + "step": 11889 + }, + { + "epoch": 109.08, + "learning_rate": 4.568042813455658e-05, + "loss": 0.0773, + "step": 11890 + }, + { + "epoch": 109.09, + "learning_rate": 4.564220183486239e-05, + "loss": 0.0631, + "step": 11891 + }, + { + "epoch": 109.1, + "learning_rate": 4.5603975535168193e-05, + "loss": 0.1466, + "step": 11892 + }, + { + "epoch": 109.11, + "learning_rate": 4.5565749235474005e-05, + "loss": 0.1454, + "step": 11893 + }, + { + "epoch": 109.12, + "learning_rate": 4.5527522935779816e-05, + "loss": 0.0999, + "step": 11894 + }, + { + "epoch": 109.13, + "learning_rate": 4.548929663608563e-05, + "loss": 0.1116, + "step": 11895 + }, + { + "epoch": 109.14, + "learning_rate": 4.545107033639144e-05, + "loss": 0.0946, + "step": 11896 + }, + { + "epoch": 109.15, + "learning_rate": 4.541284403669725e-05, + "loss": 0.0894, + "step": 11897 + }, + { + "epoch": 109.16, + "learning_rate": 4.537461773700305e-05, + "loss": 0.0626, + "step": 11898 + }, + { + "epoch": 109.17, + "learning_rate": 4.5336391437308864e-05, + "loss": 0.0754, + "step": 11899 + }, + { + "epoch": 109.17, + "learning_rate": 4.529816513761468e-05, + "loss": 0.0901, + "step": 11900 + }, + { + "epoch": 109.18, + "learning_rate": 4.525993883792049e-05, + "loss": 0.0833, + "step": 11901 + }, + { + "epoch": 109.19, + "learning_rate": 4.5221712538226304e-05, + "loss": 0.0764, + "step": 11902 + }, + { + "epoch": 109.2, + "learning_rate": 4.5183486238532115e-05, + "loss": 0.0723, + "step": 11903 + }, + { + "epoch": 109.21, + "learning_rate": 4.514525993883792e-05, + "loss": 0.1117, + "step": 11904 + }, + { + "epoch": 109.22, + "learning_rate": 4.510703363914373e-05, + "loss": 0.0594, + "step": 11905 + }, + { + "epoch": 109.23, + "learning_rate": 4.506880733944954e-05, + "loss": 0.0856, + "step": 11906 + }, + { + "epoch": 109.24, + "learning_rate": 4.503058103975535e-05, + "loss": 0.0778, + "step": 11907 + }, + { + "epoch": 109.25, + "learning_rate": 4.4992354740061164e-05, + "loss": 0.0451, + "step": 11908 + }, + { + "epoch": 109.26, + "learning_rate": 4.4954128440366975e-05, + "loss": 0.1649, + "step": 11909 + }, + { + "epoch": 109.27, + "learning_rate": 4.491590214067278e-05, + "loss": 0.1433, + "step": 11910 + }, + { + "epoch": 109.28, + "learning_rate": 4.487767584097859e-05, + "loss": 0.1255, + "step": 11911 + }, + { + "epoch": 109.28, + "learning_rate": 4.48394495412844e-05, + "loss": 0.1621, + "step": 11912 + }, + { + "epoch": 109.29, + "learning_rate": 4.480122324159021e-05, + "loss": 0.0954, + "step": 11913 + }, + { + "epoch": 109.3, + "learning_rate": 4.476299694189603e-05, + "loss": 0.0942, + "step": 11914 + }, + { + "epoch": 109.31, + "learning_rate": 4.472477064220184e-05, + "loss": 0.1078, + "step": 11915 + }, + { + "epoch": 109.32, + "learning_rate": 4.4686544342507645e-05, + "loss": 0.0974, + "step": 11916 + }, + { + "epoch": 109.33, + "learning_rate": 4.4648318042813456e-05, + "loss": 0.1129, + "step": 11917 + }, + { + "epoch": 109.34, + "learning_rate": 4.461009174311927e-05, + "loss": 0.0888, + "step": 11918 + }, + { + "epoch": 109.35, + "learning_rate": 4.457186544342508e-05, + "loss": 0.1114, + "step": 11919 + }, + { + "epoch": 109.36, + "learning_rate": 4.453363914373089e-05, + "loss": 0.0956, + "step": 11920 + }, + { + "epoch": 109.37, + "learning_rate": 4.44954128440367e-05, + "loss": 0.0699, + "step": 11921 + }, + { + "epoch": 109.38, + "learning_rate": 4.4457186544342505e-05, + "loss": 0.1285, + "step": 11922 + }, + { + "epoch": 109.39, + "learning_rate": 4.4418960244648316e-05, + "loss": 0.1211, + "step": 11923 + }, + { + "epoch": 109.39, + "learning_rate": 4.438073394495413e-05, + "loss": 0.1342, + "step": 11924 + }, + { + "epoch": 109.4, + "learning_rate": 4.434250764525994e-05, + "loss": 0.1013, + "step": 11925 + }, + { + "epoch": 109.41, + "learning_rate": 4.430428134556575e-05, + "loss": 0.0559, + "step": 11926 + }, + { + "epoch": 109.42, + "learning_rate": 4.426605504587156e-05, + "loss": 0.1485, + "step": 11927 + }, + { + "epoch": 109.43, + "learning_rate": 4.4227828746177365e-05, + "loss": 0.1046, + "step": 11928 + }, + { + "epoch": 109.44, + "learning_rate": 4.418960244648318e-05, + "loss": 0.0808, + "step": 11929 + }, + { + "epoch": 109.45, + "learning_rate": 4.4151376146788994e-05, + "loss": 0.0942, + "step": 11930 + }, + { + "epoch": 109.46, + "learning_rate": 4.4113149847094805e-05, + "loss": 0.1078, + "step": 11931 + }, + { + "epoch": 109.47, + "learning_rate": 4.4074923547400616e-05, + "loss": 0.0812, + "step": 11932 + }, + { + "epoch": 109.48, + "learning_rate": 4.403669724770643e-05, + "loss": 0.1018, + "step": 11933 + }, + { + "epoch": 109.49, + "learning_rate": 4.399847094801223e-05, + "loss": 0.1407, + "step": 11934 + }, + { + "epoch": 109.5, + "learning_rate": 4.396024464831804e-05, + "loss": 0.028, + "step": 11935 + }, + { + "epoch": 109.5, + "learning_rate": 4.392201834862385e-05, + "loss": 0.1415, + "step": 11936 + }, + { + "epoch": 109.51, + "learning_rate": 4.3883792048929664e-05, + "loss": 0.0698, + "step": 11937 + }, + { + "epoch": 109.52, + "learning_rate": 4.3845565749235475e-05, + "loss": 0.1368, + "step": 11938 + }, + { + "epoch": 109.53, + "learning_rate": 4.3807339449541286e-05, + "loss": 0.0893, + "step": 11939 + }, + { + "epoch": 109.54, + "learning_rate": 4.376911314984709e-05, + "loss": 0.1735, + "step": 11940 + }, + { + "epoch": 109.55, + "learning_rate": 4.37308868501529e-05, + "loss": 0.1256, + "step": 11941 + }, + { + "epoch": 109.56, + "learning_rate": 4.369266055045871e-05, + "loss": 0.1012, + "step": 11942 + }, + { + "epoch": 109.57, + "learning_rate": 4.3654434250764524e-05, + "loss": 0.1288, + "step": 11943 + }, + { + "epoch": 109.58, + "learning_rate": 4.361620795107034e-05, + "loss": 0.0989, + "step": 11944 + }, + { + "epoch": 109.59, + "learning_rate": 4.357798165137615e-05, + "loss": 0.1543, + "step": 11945 + }, + { + "epoch": 109.6, + "learning_rate": 4.353975535168196e-05, + "loss": 0.1148, + "step": 11946 + }, + { + "epoch": 109.61, + "learning_rate": 4.350152905198777e-05, + "loss": 0.1275, + "step": 11947 + }, + { + "epoch": 109.61, + "learning_rate": 4.346330275229358e-05, + "loss": 0.1021, + "step": 11948 + }, + { + "epoch": 109.62, + "learning_rate": 4.342507645259939e-05, + "loss": 0.1416, + "step": 11949 + }, + { + "epoch": 109.63, + "learning_rate": 4.33868501529052e-05, + "loss": 0.0743, + "step": 11950 + }, + { + "epoch": 109.64, + "learning_rate": 4.334862385321101e-05, + "loss": 0.1471, + "step": 11951 + }, + { + "epoch": 109.65, + "learning_rate": 4.331039755351682e-05, + "loss": 0.0726, + "step": 11952 + }, + { + "epoch": 109.66, + "learning_rate": 4.327217125382263e-05, + "loss": 0.1197, + "step": 11953 + }, + { + "epoch": 109.67, + "learning_rate": 4.323394495412844e-05, + "loss": 0.0686, + "step": 11954 + }, + { + "epoch": 109.68, + "learning_rate": 4.319571865443425e-05, + "loss": 0.074, + "step": 11955 + }, + { + "epoch": 109.69, + "learning_rate": 4.315749235474006e-05, + "loss": 0.1093, + "step": 11956 + }, + { + "epoch": 109.7, + "learning_rate": 4.311926605504587e-05, + "loss": 0.0747, + "step": 11957 + }, + { + "epoch": 109.71, + "learning_rate": 4.3081039755351676e-05, + "loss": 0.0691, + "step": 11958 + }, + { + "epoch": 109.72, + "learning_rate": 4.3042813455657494e-05, + "loss": 0.0473, + "step": 11959 + }, + { + "epoch": 109.72, + "learning_rate": 4.3004587155963305e-05, + "loss": 0.0604, + "step": 11960 + }, + { + "epoch": 109.73, + "learning_rate": 4.2966360856269116e-05, + "loss": 0.117, + "step": 11961 + }, + { + "epoch": 109.74, + "learning_rate": 4.292813455657493e-05, + "loss": 0.0368, + "step": 11962 + }, + { + "epoch": 109.75, + "learning_rate": 4.288990825688074e-05, + "loss": 0.1685, + "step": 11963 + }, + { + "epoch": 109.76, + "learning_rate": 4.285168195718654e-05, + "loss": 0.133, + "step": 11964 + }, + { + "epoch": 109.77, + "learning_rate": 4.2813455657492354e-05, + "loss": 0.1143, + "step": 11965 + }, + { + "epoch": 109.78, + "learning_rate": 4.2775229357798165e-05, + "loss": 0.1234, + "step": 11966 + }, + { + "epoch": 109.79, + "learning_rate": 4.2737003058103976e-05, + "loss": 0.1344, + "step": 11967 + }, + { + "epoch": 109.8, + "learning_rate": 4.269877675840979e-05, + "loss": 0.0817, + "step": 11968 + }, + { + "epoch": 109.81, + "learning_rate": 4.26605504587156e-05, + "loss": 0.1299, + "step": 11969 + }, + { + "epoch": 109.82, + "learning_rate": 4.26223241590214e-05, + "loss": 0.0817, + "step": 11970 + }, + { + "epoch": 109.83, + "learning_rate": 4.258409785932721e-05, + "loss": 0.0609, + "step": 11971 + }, + { + "epoch": 109.83, + "learning_rate": 4.2545871559633024e-05, + "loss": 0.0976, + "step": 11972 + }, + { + "epoch": 109.84, + "learning_rate": 4.250764525993884e-05, + "loss": 0.086, + "step": 11973 + }, + { + "epoch": 109.85, + "learning_rate": 4.246941896024465e-05, + "loss": 0.0446, + "step": 11974 + }, + { + "epoch": 109.86, + "learning_rate": 4.2431192660550464e-05, + "loss": 0.1066, + "step": 11975 + }, + { + "epoch": 109.87, + "learning_rate": 4.239296636085627e-05, + "loss": 0.1287, + "step": 11976 + }, + { + "epoch": 109.88, + "learning_rate": 4.235474006116208e-05, + "loss": 0.1215, + "step": 11977 + }, + { + "epoch": 109.89, + "learning_rate": 4.231651376146789e-05, + "loss": 0.0453, + "step": 11978 + }, + { + "epoch": 109.9, + "learning_rate": 4.22782874617737e-05, + "loss": 0.1052, + "step": 11979 + }, + { + "epoch": 109.91, + "learning_rate": 4.224006116207951e-05, + "loss": 0.0634, + "step": 11980 + }, + { + "epoch": 109.92, + "learning_rate": 4.2201834862385324e-05, + "loss": 0.1202, + "step": 11981 + }, + { + "epoch": 109.93, + "learning_rate": 4.216360856269113e-05, + "loss": 0.0915, + "step": 11982 + }, + { + "epoch": 109.94, + "learning_rate": 4.212538226299694e-05, + "loss": 0.0472, + "step": 11983 + }, + { + "epoch": 109.94, + "learning_rate": 4.208715596330275e-05, + "loss": 0.0651, + "step": 11984 + }, + { + "epoch": 109.95, + "learning_rate": 4.204892966360856e-05, + "loss": 0.0771, + "step": 11985 + }, + { + "epoch": 109.96, + "learning_rate": 4.201070336391437e-05, + "loss": 0.0904, + "step": 11986 + }, + { + "epoch": 109.97, + "learning_rate": 4.1972477064220184e-05, + "loss": 0.0958, + "step": 11987 + }, + { + "epoch": 109.98, + "learning_rate": 4.1934250764525995e-05, + "loss": 0.0862, + "step": 11988 + }, + { + "epoch": 109.99, + "learning_rate": 4.1896024464831806e-05, + "loss": 0.0167, + "step": 11989 + }, + { + "epoch": 110.0, + "learning_rate": 4.185779816513762e-05, + "loss": 0.071, + "step": 11990 + }, + { + "epoch": 110.01, + "learning_rate": 4.181957186544343e-05, + "loss": 0.1376, + "step": 11991 + }, + { + "epoch": 110.02, + "learning_rate": 4.178134556574924e-05, + "loss": 0.1091, + "step": 11992 + }, + { + "epoch": 110.03, + "learning_rate": 4.174311926605505e-05, + "loss": 0.0853, + "step": 11993 + }, + { + "epoch": 110.04, + "learning_rate": 4.1704892966360854e-05, + "loss": 0.095, + "step": 11994 + }, + { + "epoch": 110.05, + "learning_rate": 4.1666666666666665e-05, + "loss": 0.118, + "step": 11995 + }, + { + "epoch": 110.06, + "learning_rate": 4.1628440366972476e-05, + "loss": 0.0885, + "step": 11996 + }, + { + "epoch": 110.06, + "learning_rate": 4.159021406727829e-05, + "loss": 0.0897, + "step": 11997 + }, + { + "epoch": 110.07, + "learning_rate": 4.15519877675841e-05, + "loss": 0.0928, + "step": 11998 + }, + { + "epoch": 110.08, + "learning_rate": 4.151376146788991e-05, + "loss": 0.1192, + "step": 11999 + }, + { + "epoch": 110.09, + "learning_rate": 4.147553516819572e-05, + "loss": 0.0963, + "step": 12000 + }, + { + "epoch": 110.09, + "eval_cer": 0.13617895320769427, + "eval_loss": 0.7642955183982849, + "eval_runtime": 87.0494, + "eval_samples_per_second": 18.92, + "eval_steps_per_second": 2.366, + "eval_wer": 0.47594729853947504, + "step": 12000 + }, + { + "epoch": 110.1, + "learning_rate": 4.1437308868501525e-05, + "loss": 0.1475, + "step": 12001 + }, + { + "epoch": 110.11, + "learning_rate": 4.1399082568807336e-05, + "loss": 0.1015, + "step": 12002 + }, + { + "epoch": 110.12, + "learning_rate": 4.1360856269113154e-05, + "loss": 0.0755, + "step": 12003 + }, + { + "epoch": 110.13, + "learning_rate": 4.1322629969418965e-05, + "loss": 0.0998, + "step": 12004 + }, + { + "epoch": 110.14, + "learning_rate": 4.1284403669724776e-05, + "loss": 0.0921, + "step": 12005 + }, + { + "epoch": 110.15, + "learning_rate": 4.124617737003059e-05, + "loss": 0.1406, + "step": 12006 + }, + { + "epoch": 110.16, + "learning_rate": 4.120795107033639e-05, + "loss": 0.0397, + "step": 12007 + }, + { + "epoch": 110.17, + "learning_rate": 4.11697247706422e-05, + "loss": 0.1278, + "step": 12008 + }, + { + "epoch": 110.17, + "learning_rate": 4.1131498470948013e-05, + "loss": 0.058, + "step": 12009 + }, + { + "epoch": 110.18, + "learning_rate": 4.1093272171253824e-05, + "loss": 0.0964, + "step": 12010 + }, + { + "epoch": 110.19, + "learning_rate": 4.1055045871559636e-05, + "loss": 0.0539, + "step": 12011 + }, + { + "epoch": 110.2, + "learning_rate": 4.1016819571865447e-05, + "loss": 0.0817, + "step": 12012 + }, + { + "epoch": 110.21, + "learning_rate": 4.097859327217125e-05, + "loss": 0.1097, + "step": 12013 + }, + { + "epoch": 110.22, + "learning_rate": 4.094036697247706e-05, + "loss": 0.0749, + "step": 12014 + }, + { + "epoch": 110.23, + "learning_rate": 4.090214067278287e-05, + "loss": 0.0943, + "step": 12015 + }, + { + "epoch": 110.24, + "learning_rate": 4.0863914373088684e-05, + "loss": 0.0617, + "step": 12016 + }, + { + "epoch": 110.25, + "learning_rate": 4.08256880733945e-05, + "loss": 0.0748, + "step": 12017 + }, + { + "epoch": 110.26, + "learning_rate": 4.078746177370031e-05, + "loss": 0.1626, + "step": 12018 + }, + { + "epoch": 110.27, + "learning_rate": 4.074923547400612e-05, + "loss": 0.1561, + "step": 12019 + }, + { + "epoch": 110.28, + "learning_rate": 4.071100917431193e-05, + "loss": 0.0964, + "step": 12020 + }, + { + "epoch": 110.28, + "learning_rate": 4.067278287461774e-05, + "loss": 0.1187, + "step": 12021 + }, + { + "epoch": 110.29, + "learning_rate": 4.063455657492355e-05, + "loss": 0.1173, + "step": 12022 + }, + { + "epoch": 110.3, + "learning_rate": 4.059633027522936e-05, + "loss": 0.1462, + "step": 12023 + }, + { + "epoch": 110.31, + "learning_rate": 4.055810397553517e-05, + "loss": 0.1629, + "step": 12024 + }, + { + "epoch": 110.32, + "learning_rate": 4.051987767584098e-05, + "loss": 0.0879, + "step": 12025 + }, + { + "epoch": 110.33, + "learning_rate": 4.048165137614679e-05, + "loss": 0.0833, + "step": 12026 + }, + { + "epoch": 110.34, + "learning_rate": 4.04434250764526e-05, + "loss": 0.0976, + "step": 12027 + }, + { + "epoch": 110.35, + "learning_rate": 4.040519877675841e-05, + "loss": 0.1179, + "step": 12028 + }, + { + "epoch": 110.36, + "learning_rate": 4.036697247706422e-05, + "loss": 0.1248, + "step": 12029 + }, + { + "epoch": 110.37, + "learning_rate": 4.032874617737003e-05, + "loss": 0.0732, + "step": 12030 + }, + { + "epoch": 110.38, + "learning_rate": 4.0290519877675836e-05, + "loss": 0.099, + "step": 12031 + }, + { + "epoch": 110.39, + "learning_rate": 4.0252293577981654e-05, + "loss": 0.0831, + "step": 12032 + }, + { + "epoch": 110.39, + "learning_rate": 4.0214067278287465e-05, + "loss": 0.0919, + "step": 12033 + }, + { + "epoch": 110.4, + "learning_rate": 4.0175840978593276e-05, + "loss": 0.0738, + "step": 12034 + }, + { + "epoch": 110.41, + "learning_rate": 4.013761467889909e-05, + "loss": 0.0434, + "step": 12035 + }, + { + "epoch": 110.42, + "learning_rate": 4.00993883792049e-05, + "loss": 0.0969, + "step": 12036 + }, + { + "epoch": 110.43, + "learning_rate": 4.00611620795107e-05, + "loss": 0.0819, + "step": 12037 + }, + { + "epoch": 110.44, + "learning_rate": 4.0022935779816514e-05, + "loss": 0.116, + "step": 12038 + }, + { + "epoch": 110.45, + "learning_rate": 3.9984709480122325e-05, + "loss": 0.0745, + "step": 12039 + }, + { + "epoch": 110.46, + "learning_rate": 3.9946483180428136e-05, + "loss": 0.1263, + "step": 12040 + }, + { + "epoch": 110.47, + "learning_rate": 3.990825688073395e-05, + "loss": 0.0864, + "step": 12041 + }, + { + "epoch": 110.48, + "learning_rate": 3.987003058103976e-05, + "loss": 0.0272, + "step": 12042 + }, + { + "epoch": 110.49, + "learning_rate": 3.983180428134556e-05, + "loss": 0.1005, + "step": 12043 + }, + { + "epoch": 110.5, + "learning_rate": 3.9793577981651374e-05, + "loss": 0.0653, + "step": 12044 + }, + { + "epoch": 110.5, + "learning_rate": 3.9755351681957185e-05, + "loss": 0.142, + "step": 12045 + }, + { + "epoch": 110.51, + "learning_rate": 3.9717125382262996e-05, + "loss": 0.1016, + "step": 12046 + }, + { + "epoch": 110.52, + "learning_rate": 3.9678899082568813e-05, + "loss": 0.1055, + "step": 12047 + }, + { + "epoch": 110.53, + "learning_rate": 3.9640672782874625e-05, + "loss": 0.0697, + "step": 12048 + }, + { + "epoch": 110.54, + "learning_rate": 3.960244648318043e-05, + "loss": 0.107, + "step": 12049 + }, + { + "epoch": 110.55, + "learning_rate": 3.956422018348624e-05, + "loss": 0.1254, + "step": 12050 + }, + { + "epoch": 110.56, + "learning_rate": 3.952599388379205e-05, + "loss": 0.1093, + "step": 12051 + }, + { + "epoch": 110.57, + "learning_rate": 3.948776758409786e-05, + "loss": 0.1121, + "step": 12052 + }, + { + "epoch": 110.58, + "learning_rate": 3.944954128440367e-05, + "loss": 0.1047, + "step": 12053 + }, + { + "epoch": 110.59, + "learning_rate": 3.9411314984709484e-05, + "loss": 0.1453, + "step": 12054 + }, + { + "epoch": 110.6, + "learning_rate": 3.937308868501529e-05, + "loss": 0.1033, + "step": 12055 + }, + { + "epoch": 110.61, + "learning_rate": 3.93348623853211e-05, + "loss": 0.0806, + "step": 12056 + }, + { + "epoch": 110.61, + "learning_rate": 3.929663608562691e-05, + "loss": 0.1124, + "step": 12057 + }, + { + "epoch": 110.62, + "learning_rate": 3.925840978593272e-05, + "loss": 0.1152, + "step": 12058 + }, + { + "epoch": 110.63, + "learning_rate": 3.922018348623853e-05, + "loss": 0.1285, + "step": 12059 + }, + { + "epoch": 110.64, + "learning_rate": 3.9181957186544344e-05, + "loss": 0.0902, + "step": 12060 + }, + { + "epoch": 110.65, + "learning_rate": 3.914373088685015e-05, + "loss": 0.0497, + "step": 12061 + }, + { + "epoch": 110.66, + "learning_rate": 3.9105504587155966e-05, + "loss": 0.0456, + "step": 12062 + }, + { + "epoch": 110.67, + "learning_rate": 3.906727828746178e-05, + "loss": 0.067, + "step": 12063 + }, + { + "epoch": 110.68, + "learning_rate": 3.902905198776759e-05, + "loss": 0.0746, + "step": 12064 + }, + { + "epoch": 110.69, + "learning_rate": 3.89908256880734e-05, + "loss": 0.1191, + "step": 12065 + }, + { + "epoch": 110.7, + "learning_rate": 3.895259938837921e-05, + "loss": 0.0754, + "step": 12066 + }, + { + "epoch": 110.71, + "learning_rate": 3.8914373088685014e-05, + "loss": 0.0825, + "step": 12067 + }, + { + "epoch": 110.72, + "learning_rate": 3.8876146788990826e-05, + "loss": 0.1, + "step": 12068 + }, + { + "epoch": 110.72, + "learning_rate": 3.8837920489296637e-05, + "loss": 0.1143, + "step": 12069 + }, + { + "epoch": 110.73, + "learning_rate": 3.879969418960245e-05, + "loss": 0.082, + "step": 12070 + }, + { + "epoch": 110.74, + "learning_rate": 3.876146788990826e-05, + "loss": 0.0589, + "step": 12071 + }, + { + "epoch": 110.75, + "learning_rate": 3.872324159021407e-05, + "loss": 0.1245, + "step": 12072 + }, + { + "epoch": 110.76, + "learning_rate": 3.8685015290519874e-05, + "loss": 0.1354, + "step": 12073 + }, + { + "epoch": 110.77, + "learning_rate": 3.8646788990825685e-05, + "loss": 0.1358, + "step": 12074 + }, + { + "epoch": 110.78, + "learning_rate": 3.8608562691131496e-05, + "loss": 0.152, + "step": 12075 + }, + { + "epoch": 110.79, + "learning_rate": 3.8570336391437314e-05, + "loss": 0.1039, + "step": 12076 + }, + { + "epoch": 110.8, + "learning_rate": 3.8532110091743125e-05, + "loss": 0.1001, + "step": 12077 + }, + { + "epoch": 110.81, + "learning_rate": 3.8493883792048936e-05, + "loss": 0.1283, + "step": 12078 + }, + { + "epoch": 110.82, + "learning_rate": 3.845565749235474e-05, + "loss": 0.0713, + "step": 12079 + }, + { + "epoch": 110.83, + "learning_rate": 3.841743119266055e-05, + "loss": 0.0436, + "step": 12080 + }, + { + "epoch": 110.83, + "learning_rate": 3.837920489296636e-05, + "loss": 0.0598, + "step": 12081 + }, + { + "epoch": 110.84, + "learning_rate": 3.8340978593272174e-05, + "loss": 0.0776, + "step": 12082 + }, + { + "epoch": 110.85, + "learning_rate": 3.8302752293577985e-05, + "loss": 0.0788, + "step": 12083 + }, + { + "epoch": 110.86, + "learning_rate": 3.8264525993883796e-05, + "loss": 0.1121, + "step": 12084 + }, + { + "epoch": 110.87, + "learning_rate": 3.82262996941896e-05, + "loss": 0.0999, + "step": 12085 + }, + { + "epoch": 110.88, + "learning_rate": 3.818807339449541e-05, + "loss": 0.055, + "step": 12086 + }, + { + "epoch": 110.89, + "learning_rate": 3.814984709480122e-05, + "loss": 0.1133, + "step": 12087 + }, + { + "epoch": 110.9, + "learning_rate": 3.811162079510703e-05, + "loss": 0.1086, + "step": 12088 + }, + { + "epoch": 110.91, + "learning_rate": 3.8073394495412844e-05, + "loss": 0.0739, + "step": 12089 + }, + { + "epoch": 110.92, + "learning_rate": 3.8035168195718655e-05, + "loss": 0.1251, + "step": 12090 + }, + { + "epoch": 110.93, + "learning_rate": 3.7996941896024466e-05, + "loss": 0.0458, + "step": 12091 + }, + { + "epoch": 110.94, + "learning_rate": 3.795871559633028e-05, + "loss": 0.1108, + "step": 12092 + }, + { + "epoch": 110.94, + "learning_rate": 3.792048929663609e-05, + "loss": 0.0746, + "step": 12093 + }, + { + "epoch": 110.95, + "learning_rate": 3.78822629969419e-05, + "loss": 0.0573, + "step": 12094 + }, + { + "epoch": 110.96, + "learning_rate": 3.784403669724771e-05, + "loss": 0.0794, + "step": 12095 + }, + { + "epoch": 110.97, + "learning_rate": 3.780581039755352e-05, + "loss": 0.0221, + "step": 12096 + }, + { + "epoch": 110.98, + "learning_rate": 3.7767584097859326e-05, + "loss": 0.0386, + "step": 12097 + }, + { + "epoch": 110.99, + "learning_rate": 3.772935779816514e-05, + "loss": 0.0231, + "step": 12098 + }, + { + "epoch": 111.0, + "learning_rate": 3.769113149847095e-05, + "loss": 0.0608, + "step": 12099 + }, + { + "epoch": 111.01, + "learning_rate": 3.765290519877676e-05, + "loss": 0.1555, + "step": 12100 + }, + { + "epoch": 111.02, + "learning_rate": 3.761467889908257e-05, + "loss": 0.1343, + "step": 12101 + }, + { + "epoch": 111.03, + "learning_rate": 3.757645259938838e-05, + "loss": 0.0593, + "step": 12102 + }, + { + "epoch": 111.04, + "learning_rate": 3.7538226299694186e-05, + "loss": 0.1083, + "step": 12103 + }, + { + "epoch": 111.05, + "learning_rate": 3.75e-05, + "loss": 0.117, + "step": 12104 + }, + { + "epoch": 111.06, + "learning_rate": 3.746177370030581e-05, + "loss": 0.1, + "step": 12105 + }, + { + "epoch": 111.06, + "learning_rate": 3.7423547400611626e-05, + "loss": 0.1475, + "step": 12106 + }, + { + "epoch": 111.07, + "learning_rate": 3.738532110091744e-05, + "loss": 0.0927, + "step": 12107 + }, + { + "epoch": 111.08, + "learning_rate": 3.734709480122325e-05, + "loss": 0.0752, + "step": 12108 + }, + { + "epoch": 111.09, + "learning_rate": 3.730886850152905e-05, + "loss": 0.1107, + "step": 12109 + }, + { + "epoch": 111.1, + "learning_rate": 3.727064220183486e-05, + "loss": 0.0886, + "step": 12110 + }, + { + "epoch": 111.11, + "learning_rate": 3.7232415902140674e-05, + "loss": 0.0738, + "step": 12111 + }, + { + "epoch": 111.12, + "learning_rate": 3.7194189602446485e-05, + "loss": 0.0865, + "step": 12112 + }, + { + "epoch": 111.13, + "learning_rate": 3.7155963302752296e-05, + "loss": 0.1331, + "step": 12113 + }, + { + "epoch": 111.14, + "learning_rate": 3.711773700305811e-05, + "loss": 0.1049, + "step": 12114 + }, + { + "epoch": 111.15, + "learning_rate": 3.707951070336391e-05, + "loss": 0.107, + "step": 12115 + }, + { + "epoch": 111.16, + "learning_rate": 3.704128440366972e-05, + "loss": 0.0817, + "step": 12116 + }, + { + "epoch": 111.17, + "learning_rate": 3.7003058103975534e-05, + "loss": 0.0754, + "step": 12117 + }, + { + "epoch": 111.17, + "learning_rate": 3.6964831804281345e-05, + "loss": 0.088, + "step": 12118 + }, + { + "epoch": 111.18, + "learning_rate": 3.6926605504587156e-05, + "loss": 0.1143, + "step": 12119 + }, + { + "epoch": 111.19, + "learning_rate": 3.688837920489297e-05, + "loss": 0.0458, + "step": 12120 + }, + { + "epoch": 111.2, + "learning_rate": 3.685015290519878e-05, + "loss": 0.1107, + "step": 12121 + }, + { + "epoch": 111.21, + "learning_rate": 3.681192660550459e-05, + "loss": 0.0843, + "step": 12122 + }, + { + "epoch": 111.22, + "learning_rate": 3.67737003058104e-05, + "loss": 0.1382, + "step": 12123 + }, + { + "epoch": 111.23, + "learning_rate": 3.673547400611621e-05, + "loss": 0.0658, + "step": 12124 + }, + { + "epoch": 111.24, + "learning_rate": 3.669724770642202e-05, + "loss": 0.0487, + "step": 12125 + }, + { + "epoch": 111.25, + "learning_rate": 3.665902140672783e-05, + "loss": 0.0644, + "step": 12126 + }, + { + "epoch": 111.26, + "learning_rate": 3.662079510703364e-05, + "loss": 0.1312, + "step": 12127 + }, + { + "epoch": 111.27, + "learning_rate": 3.658256880733945e-05, + "loss": 0.1026, + "step": 12128 + }, + { + "epoch": 111.28, + "learning_rate": 3.654434250764526e-05, + "loss": 0.0774, + "step": 12129 + }, + { + "epoch": 111.28, + "learning_rate": 3.650611620795107e-05, + "loss": 0.1157, + "step": 12130 + }, + { + "epoch": 111.29, + "learning_rate": 3.646788990825688e-05, + "loss": 0.0978, + "step": 12131 + }, + { + "epoch": 111.3, + "learning_rate": 3.642966360856269e-05, + "loss": 0.0947, + "step": 12132 + }, + { + "epoch": 111.31, + "learning_rate": 3.63914373088685e-05, + "loss": 0.0743, + "step": 12133 + }, + { + "epoch": 111.32, + "learning_rate": 3.635321100917431e-05, + "loss": 0.0908, + "step": 12134 + }, + { + "epoch": 111.33, + "learning_rate": 3.6314984709480126e-05, + "loss": 0.0603, + "step": 12135 + }, + { + "epoch": 111.34, + "learning_rate": 3.627675840978594e-05, + "loss": 0.0887, + "step": 12136 + }, + { + "epoch": 111.35, + "learning_rate": 3.623853211009175e-05, + "loss": 0.065, + "step": 12137 + }, + { + "epoch": 111.36, + "learning_rate": 3.620030581039756e-05, + "loss": 0.1628, + "step": 12138 + }, + { + "epoch": 111.37, + "learning_rate": 3.6162079510703364e-05, + "loss": 0.0864, + "step": 12139 + }, + { + "epoch": 111.38, + "learning_rate": 3.6123853211009175e-05, + "loss": 0.0522, + "step": 12140 + }, + { + "epoch": 111.39, + "learning_rate": 3.6085626911314986e-05, + "loss": 0.0431, + "step": 12141 + }, + { + "epoch": 111.39, + "learning_rate": 3.60474006116208e-05, + "loss": 0.0998, + "step": 12142 + }, + { + "epoch": 111.4, + "learning_rate": 3.600917431192661e-05, + "loss": 0.0878, + "step": 12143 + }, + { + "epoch": 111.41, + "learning_rate": 3.597094801223242e-05, + "loss": 0.06, + "step": 12144 + }, + { + "epoch": 111.42, + "learning_rate": 3.593272171253822e-05, + "loss": 0.1133, + "step": 12145 + }, + { + "epoch": 111.43, + "learning_rate": 3.5894495412844034e-05, + "loss": 0.0823, + "step": 12146 + }, + { + "epoch": 111.44, + "learning_rate": 3.5856269113149845e-05, + "loss": 0.0693, + "step": 12147 + }, + { + "epoch": 111.45, + "learning_rate": 3.5818042813455656e-05, + "loss": 0.064, + "step": 12148 + }, + { + "epoch": 111.46, + "learning_rate": 3.577981651376147e-05, + "loss": 0.0762, + "step": 12149 + }, + { + "epoch": 111.47, + "learning_rate": 3.5741590214067285e-05, + "loss": 0.0828, + "step": 12150 + }, + { + "epoch": 111.48, + "learning_rate": 3.570336391437309e-05, + "loss": 0.1443, + "step": 12151 + }, + { + "epoch": 111.49, + "learning_rate": 3.56651376146789e-05, + "loss": 0.0388, + "step": 12152 + }, + { + "epoch": 111.5, + "learning_rate": 3.562691131498471e-05, + "loss": 0.0785, + "step": 12153 + }, + { + "epoch": 111.5, + "learning_rate": 3.558868501529052e-05, + "loss": 0.1751, + "step": 12154 + }, + { + "epoch": 111.51, + "learning_rate": 3.5550458715596334e-05, + "loss": 0.1111, + "step": 12155 + }, + { + "epoch": 111.52, + "learning_rate": 3.5512232415902145e-05, + "loss": 0.0921, + "step": 12156 + }, + { + "epoch": 111.53, + "learning_rate": 3.547400611620795e-05, + "loss": 0.1223, + "step": 12157 + }, + { + "epoch": 111.54, + "learning_rate": 3.543577981651376e-05, + "loss": 0.1623, + "step": 12158 + }, + { + "epoch": 111.55, + "learning_rate": 3.539755351681957e-05, + "loss": 0.0684, + "step": 12159 + }, + { + "epoch": 111.56, + "learning_rate": 3.535932721712538e-05, + "loss": 0.0786, + "step": 12160 + }, + { + "epoch": 111.57, + "learning_rate": 3.5321100917431193e-05, + "loss": 0.0863, + "step": 12161 + }, + { + "epoch": 111.58, + "learning_rate": 3.5282874617737005e-05, + "loss": 0.0535, + "step": 12162 + }, + { + "epoch": 111.59, + "learning_rate": 3.524464831804281e-05, + "loss": 0.0771, + "step": 12163 + }, + { + "epoch": 111.6, + "learning_rate": 3.520642201834862e-05, + "loss": 0.0629, + "step": 12164 + }, + { + "epoch": 111.61, + "learning_rate": 3.516819571865444e-05, + "loss": 0.0816, + "step": 12165 + }, + { + "epoch": 111.61, + "learning_rate": 3.512996941896025e-05, + "loss": 0.0892, + "step": 12166 + }, + { + "epoch": 111.62, + "learning_rate": 3.509174311926606e-05, + "loss": 0.1235, + "step": 12167 + }, + { + "epoch": 111.63, + "learning_rate": 3.505351681957187e-05, + "loss": 0.0863, + "step": 12168 + }, + { + "epoch": 111.64, + "learning_rate": 3.5015290519877675e-05, + "loss": 0.1336, + "step": 12169 + }, + { + "epoch": 111.65, + "learning_rate": 3.4977064220183486e-05, + "loss": 0.0907, + "step": 12170 + }, + { + "epoch": 111.66, + "learning_rate": 3.49388379204893e-05, + "loss": 0.0506, + "step": 12171 + }, + { + "epoch": 111.67, + "learning_rate": 3.490061162079511e-05, + "loss": 0.0655, + "step": 12172 + }, + { + "epoch": 111.68, + "learning_rate": 3.486238532110092e-05, + "loss": 0.0769, + "step": 12173 + }, + { + "epoch": 111.69, + "learning_rate": 3.482415902140673e-05, + "loss": 0.066, + "step": 12174 + }, + { + "epoch": 111.7, + "learning_rate": 3.4785932721712535e-05, + "loss": 0.0903, + "step": 12175 + }, + { + "epoch": 111.71, + "learning_rate": 3.4747706422018346e-05, + "loss": 0.0643, + "step": 12176 + }, + { + "epoch": 111.72, + "learning_rate": 3.470948012232416e-05, + "loss": 0.0213, + "step": 12177 + }, + { + "epoch": 111.72, + "learning_rate": 3.467125382262997e-05, + "loss": 0.1499, + "step": 12178 + }, + { + "epoch": 111.73, + "learning_rate": 3.463302752293578e-05, + "loss": 0.0437, + "step": 12179 + }, + { + "epoch": 111.74, + "learning_rate": 3.45948012232416e-05, + "loss": 0.042, + "step": 12180 + }, + { + "epoch": 111.75, + "learning_rate": 3.45565749235474e-05, + "loss": 0.1468, + "step": 12181 + }, + { + "epoch": 111.76, + "learning_rate": 3.451834862385321e-05, + "loss": 0.1008, + "step": 12182 + }, + { + "epoch": 111.77, + "learning_rate": 3.448012232415902e-05, + "loss": 0.1077, + "step": 12183 + }, + { + "epoch": 111.78, + "learning_rate": 3.4441896024464834e-05, + "loss": 0.0872, + "step": 12184 + }, + { + "epoch": 111.79, + "learning_rate": 3.4403669724770645e-05, + "loss": 0.1167, + "step": 12185 + }, + { + "epoch": 111.8, + "learning_rate": 3.4365443425076457e-05, + "loss": 0.0881, + "step": 12186 + }, + { + "epoch": 111.81, + "learning_rate": 3.432721712538226e-05, + "loss": 0.1279, + "step": 12187 + }, + { + "epoch": 111.82, + "learning_rate": 3.428899082568807e-05, + "loss": 0.0598, + "step": 12188 + }, + { + "epoch": 111.83, + "learning_rate": 3.425076452599388e-05, + "loss": 0.0881, + "step": 12189 + }, + { + "epoch": 111.83, + "learning_rate": 3.4212538226299694e-05, + "loss": 0.0727, + "step": 12190 + }, + { + "epoch": 111.84, + "learning_rate": 3.4174311926605505e-05, + "loss": 0.1043, + "step": 12191 + }, + { + "epoch": 111.85, + "learning_rate": 3.4136085626911316e-05, + "loss": 0.1179, + "step": 12192 + }, + { + "epoch": 111.86, + "learning_rate": 3.409785932721712e-05, + "loss": 0.0735, + "step": 12193 + }, + { + "epoch": 111.87, + "learning_rate": 3.405963302752294e-05, + "loss": 0.073, + "step": 12194 + }, + { + "epoch": 111.88, + "learning_rate": 3.402140672782875e-05, + "loss": 0.0572, + "step": 12195 + }, + { + "epoch": 111.89, + "learning_rate": 3.398318042813456e-05, + "loss": 0.1428, + "step": 12196 + }, + { + "epoch": 111.9, + "learning_rate": 3.394495412844037e-05, + "loss": 0.097, + "step": 12197 + }, + { + "epoch": 111.91, + "learning_rate": 3.390672782874618e-05, + "loss": 0.1078, + "step": 12198 + }, + { + "epoch": 111.92, + "learning_rate": 3.386850152905199e-05, + "loss": 0.0786, + "step": 12199 + }, + { + "epoch": 111.93, + "learning_rate": 3.38302752293578e-05, + "loss": 0.0793, + "step": 12200 + }, + { + "epoch": 111.94, + "learning_rate": 3.379204892966361e-05, + "loss": 0.0857, + "step": 12201 + }, + { + "epoch": 111.94, + "learning_rate": 3.375382262996942e-05, + "loss": 0.1348, + "step": 12202 + }, + { + "epoch": 111.95, + "learning_rate": 3.371559633027523e-05, + "loss": 0.0849, + "step": 12203 + }, + { + "epoch": 111.96, + "learning_rate": 3.367737003058104e-05, + "loss": 0.0795, + "step": 12204 + }, + { + "epoch": 111.97, + "learning_rate": 3.3639143730886846e-05, + "loss": 0.0871, + "step": 12205 + }, + { + "epoch": 111.98, + "learning_rate": 3.360091743119266e-05, + "loss": 0.0565, + "step": 12206 + }, + { + "epoch": 111.99, + "learning_rate": 3.356269113149847e-05, + "loss": 0.0347, + "step": 12207 + }, + { + "epoch": 112.0, + "learning_rate": 3.352446483180428e-05, + "loss": 0.0844, + "step": 12208 + }, + { + "epoch": 112.01, + "learning_rate": 3.34862385321101e-05, + "loss": 0.1115, + "step": 12209 + }, + { + "epoch": 112.02, + "learning_rate": 3.344801223241591e-05, + "loss": 0.1375, + "step": 12210 + }, + { + "epoch": 112.03, + "learning_rate": 3.340978593272171e-05, + "loss": 0.1042, + "step": 12211 + }, + { + "epoch": 112.04, + "learning_rate": 3.3371559633027524e-05, + "loss": 0.0851, + "step": 12212 + }, + { + "epoch": 112.05, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.11, + "step": 12213 + }, + { + "epoch": 112.06, + "learning_rate": 3.3295107033639146e-05, + "loss": 0.1592, + "step": 12214 + }, + { + "epoch": 112.06, + "learning_rate": 3.325688073394496e-05, + "loss": 0.1028, + "step": 12215 + }, + { + "epoch": 112.07, + "learning_rate": 3.321865443425077e-05, + "loss": 0.0711, + "step": 12216 + }, + { + "epoch": 112.08, + "learning_rate": 3.318042813455657e-05, + "loss": 0.0831, + "step": 12217 + }, + { + "epoch": 112.09, + "learning_rate": 3.3142201834862383e-05, + "loss": 0.1188, + "step": 12218 + }, + { + "epoch": 112.1, + "learning_rate": 3.3103975535168195e-05, + "loss": 0.1118, + "step": 12219 + }, + { + "epoch": 112.11, + "learning_rate": 3.3065749235474006e-05, + "loss": 0.0692, + "step": 12220 + }, + { + "epoch": 112.12, + "learning_rate": 3.302752293577982e-05, + "loss": 0.0755, + "step": 12221 + }, + { + "epoch": 112.13, + "learning_rate": 3.298929663608563e-05, + "loss": 0.0919, + "step": 12222 + }, + { + "epoch": 112.14, + "learning_rate": 3.295107033639143e-05, + "loss": 0.0452, + "step": 12223 + }, + { + "epoch": 112.15, + "learning_rate": 3.291284403669725e-05, + "loss": 0.0979, + "step": 12224 + }, + { + "epoch": 112.16, + "learning_rate": 3.287461773700306e-05, + "loss": 0.0866, + "step": 12225 + }, + { + "epoch": 112.17, + "learning_rate": 3.283639143730887e-05, + "loss": 0.1058, + "step": 12226 + }, + { + "epoch": 112.17, + "learning_rate": 3.279816513761468e-05, + "loss": 0.0839, + "step": 12227 + }, + { + "epoch": 112.18, + "learning_rate": 3.2759938837920494e-05, + "loss": 0.0861, + "step": 12228 + }, + { + "epoch": 112.19, + "learning_rate": 3.27217125382263e-05, + "loss": 0.1004, + "step": 12229 + }, + { + "epoch": 112.2, + "learning_rate": 3.268348623853211e-05, + "loss": 0.0918, + "step": 12230 + }, + { + "epoch": 112.21, + "learning_rate": 3.264525993883792e-05, + "loss": 0.0465, + "step": 12231 + }, + { + "epoch": 112.22, + "learning_rate": 3.260703363914373e-05, + "loss": 0.0755, + "step": 12232 + }, + { + "epoch": 112.23, + "learning_rate": 3.256880733944954e-05, + "loss": 0.0354, + "step": 12233 + }, + { + "epoch": 112.24, + "learning_rate": 3.2530581039755354e-05, + "loss": 0.0567, + "step": 12234 + }, + { + "epoch": 112.25, + "learning_rate": 3.249235474006116e-05, + "loss": 0.0868, + "step": 12235 + }, + { + "epoch": 112.26, + "learning_rate": 3.245412844036697e-05, + "loss": 0.1814, + "step": 12236 + }, + { + "epoch": 112.27, + "learning_rate": 3.241590214067278e-05, + "loss": 0.1334, + "step": 12237 + }, + { + "epoch": 112.28, + "learning_rate": 3.237767584097859e-05, + "loss": 0.1533, + "step": 12238 + }, + { + "epoch": 112.28, + "learning_rate": 3.233944954128441e-05, + "loss": 0.1187, + "step": 12239 + }, + { + "epoch": 112.29, + "learning_rate": 3.230122324159022e-05, + "loss": 0.1086, + "step": 12240 + }, + { + "epoch": 112.3, + "learning_rate": 3.2262996941896024e-05, + "loss": 0.1248, + "step": 12241 + }, + { + "epoch": 112.31, + "learning_rate": 3.2224770642201835e-05, + "loss": 0.1143, + "step": 12242 + }, + { + "epoch": 112.32, + "learning_rate": 3.2186544342507647e-05, + "loss": 0.061, + "step": 12243 + }, + { + "epoch": 112.33, + "learning_rate": 3.214831804281346e-05, + "loss": 0.1367, + "step": 12244 + }, + { + "epoch": 112.34, + "learning_rate": 3.211009174311927e-05, + "loss": 0.0668, + "step": 12245 + }, + { + "epoch": 112.35, + "learning_rate": 3.207186544342508e-05, + "loss": 0.0869, + "step": 12246 + }, + { + "epoch": 112.36, + "learning_rate": 3.2033639143730884e-05, + "loss": 0.1234, + "step": 12247 + }, + { + "epoch": 112.37, + "learning_rate": 3.1995412844036695e-05, + "loss": 0.0902, + "step": 12248 + }, + { + "epoch": 112.38, + "learning_rate": 3.1957186544342506e-05, + "loss": 0.1014, + "step": 12249 + }, + { + "epoch": 112.39, + "learning_rate": 3.191896024464832e-05, + "loss": 0.0855, + "step": 12250 + }, + { + "epoch": 112.39, + "learning_rate": 3.188073394495413e-05, + "loss": 0.0454, + "step": 12251 + }, + { + "epoch": 112.4, + "learning_rate": 3.184250764525994e-05, + "loss": 0.0491, + "step": 12252 + }, + { + "epoch": 112.41, + "learning_rate": 3.180428134556575e-05, + "loss": 0.0874, + "step": 12253 + }, + { + "epoch": 112.42, + "learning_rate": 3.176605504587156e-05, + "loss": 0.0994, + "step": 12254 + }, + { + "epoch": 112.43, + "learning_rate": 3.172782874617737e-05, + "loss": 0.0227, + "step": 12255 + }, + { + "epoch": 112.44, + "learning_rate": 3.1689602446483184e-05, + "loss": 0.1269, + "step": 12256 + }, + { + "epoch": 112.45, + "learning_rate": 3.1651376146788995e-05, + "loss": 0.1329, + "step": 12257 + }, + { + "epoch": 112.46, + "learning_rate": 3.1613149847094806e-05, + "loss": 0.0819, + "step": 12258 + }, + { + "epoch": 112.47, + "learning_rate": 3.157492354740061e-05, + "loss": 0.0303, + "step": 12259 + }, + { + "epoch": 112.48, + "learning_rate": 3.153669724770642e-05, + "loss": 0.0774, + "step": 12260 + }, + { + "epoch": 112.49, + "learning_rate": 3.149847094801223e-05, + "loss": 0.0581, + "step": 12261 + }, + { + "epoch": 112.5, + "learning_rate": 3.146024464831804e-05, + "loss": 0.0145, + "step": 12262 + }, + { + "epoch": 112.5, + "learning_rate": 3.1422018348623854e-05, + "loss": 0.1462, + "step": 12263 + }, + { + "epoch": 112.51, + "learning_rate": 3.1383792048929665e-05, + "loss": 0.146, + "step": 12264 + }, + { + "epoch": 112.52, + "learning_rate": 3.134556574923547e-05, + "loss": 0.1195, + "step": 12265 + }, + { + "epoch": 112.53, + "learning_rate": 3.130733944954128e-05, + "loss": 0.1135, + "step": 12266 + }, + { + "epoch": 112.54, + "learning_rate": 3.126911314984709e-05, + "loss": 0.0902, + "step": 12267 + }, + { + "epoch": 112.55, + "learning_rate": 3.123088685015291e-05, + "loss": 0.0874, + "step": 12268 + }, + { + "epoch": 112.56, + "learning_rate": 3.119266055045872e-05, + "loss": 0.1366, + "step": 12269 + }, + { + "epoch": 112.57, + "learning_rate": 3.1154434250764525e-05, + "loss": 0.095, + "step": 12270 + }, + { + "epoch": 112.58, + "learning_rate": 3.1116207951070336e-05, + "loss": 0.1011, + "step": 12271 + }, + { + "epoch": 112.59, + "learning_rate": 3.107798165137615e-05, + "loss": 0.0484, + "step": 12272 + }, + { + "epoch": 112.6, + "learning_rate": 3.103975535168196e-05, + "loss": 0.1127, + "step": 12273 + }, + { + "epoch": 112.61, + "learning_rate": 3.100152905198777e-05, + "loss": 0.0875, + "step": 12274 + }, + { + "epoch": 112.61, + "learning_rate": 3.096330275229358e-05, + "loss": 0.0886, + "step": 12275 + }, + { + "epoch": 112.62, + "learning_rate": 3.092507645259939e-05, + "loss": 0.1165, + "step": 12276 + }, + { + "epoch": 112.63, + "learning_rate": 3.08868501529052e-05, + "loss": 0.0635, + "step": 12277 + }, + { + "epoch": 112.64, + "learning_rate": 3.0848623853211013e-05, + "loss": 0.0998, + "step": 12278 + }, + { + "epoch": 112.65, + "learning_rate": 3.081039755351682e-05, + "loss": 0.05, + "step": 12279 + }, + { + "epoch": 112.66, + "learning_rate": 3.077217125382263e-05, + "loss": 0.0469, + "step": 12280 + }, + { + "epoch": 112.67, + "learning_rate": 3.073394495412844e-05, + "loss": 0.0676, + "step": 12281 + }, + { + "epoch": 112.68, + "learning_rate": 3.069571865443425e-05, + "loss": 0.1004, + "step": 12282 + }, + { + "epoch": 112.69, + "learning_rate": 3.065749235474006e-05, + "loss": 0.0953, + "step": 12283 + }, + { + "epoch": 112.7, + "learning_rate": 3.061926605504587e-05, + "loss": 0.0945, + "step": 12284 + }, + { + "epoch": 112.71, + "learning_rate": 3.0581039755351684e-05, + "loss": 0.055, + "step": 12285 + }, + { + "epoch": 112.72, + "learning_rate": 3.0542813455657495e-05, + "loss": 0.1105, + "step": 12286 + }, + { + "epoch": 112.72, + "learning_rate": 3.0504587155963306e-05, + "loss": 0.1198, + "step": 12287 + }, + { + "epoch": 112.73, + "learning_rate": 3.0466360856269114e-05, + "loss": 0.1223, + "step": 12288 + }, + { + "epoch": 112.74, + "learning_rate": 3.0428134556574925e-05, + "loss": 0.1014, + "step": 12289 + }, + { + "epoch": 112.75, + "learning_rate": 3.0389908256880736e-05, + "loss": 0.118, + "step": 12290 + }, + { + "epoch": 112.76, + "learning_rate": 3.0351681957186544e-05, + "loss": 0.1285, + "step": 12291 + }, + { + "epoch": 112.77, + "learning_rate": 3.0313455657492355e-05, + "loss": 0.0933, + "step": 12292 + }, + { + "epoch": 112.78, + "learning_rate": 3.0275229357798166e-05, + "loss": 0.1402, + "step": 12293 + }, + { + "epoch": 112.79, + "learning_rate": 3.0237003058103977e-05, + "loss": 0.1542, + "step": 12294 + }, + { + "epoch": 112.8, + "learning_rate": 3.0198776758409788e-05, + "loss": 0.0928, + "step": 12295 + }, + { + "epoch": 112.81, + "learning_rate": 3.01605504587156e-05, + "loss": 0.1165, + "step": 12296 + }, + { + "epoch": 112.82, + "learning_rate": 3.0122324159021407e-05, + "loss": 0.0688, + "step": 12297 + }, + { + "epoch": 112.83, + "learning_rate": 3.0084097859327218e-05, + "loss": 0.0966, + "step": 12298 + }, + { + "epoch": 112.83, + "learning_rate": 3.004587155963303e-05, + "loss": 0.1053, + "step": 12299 + }, + { + "epoch": 112.84, + "learning_rate": 3.0007645259938837e-05, + "loss": 0.1018, + "step": 12300 + }, + { + "epoch": 112.85, + "learning_rate": 2.996941896024465e-05, + "loss": 0.1175, + "step": 12301 + }, + { + "epoch": 112.86, + "learning_rate": 2.9931192660550462e-05, + "loss": 0.0706, + "step": 12302 + }, + { + "epoch": 112.87, + "learning_rate": 2.989296636085627e-05, + "loss": 0.0987, + "step": 12303 + }, + { + "epoch": 112.88, + "learning_rate": 2.985474006116208e-05, + "loss": 0.1407, + "step": 12304 + }, + { + "epoch": 112.89, + "learning_rate": 2.9816513761467892e-05, + "loss": 0.0907, + "step": 12305 + }, + { + "epoch": 112.9, + "learning_rate": 2.97782874617737e-05, + "loss": 0.0806, + "step": 12306 + }, + { + "epoch": 112.91, + "learning_rate": 2.974006116207951e-05, + "loss": 0.0692, + "step": 12307 + }, + { + "epoch": 112.92, + "learning_rate": 2.9701834862385325e-05, + "loss": 0.1123, + "step": 12308 + }, + { + "epoch": 112.93, + "learning_rate": 2.9663608562691133e-05, + "loss": 0.068, + "step": 12309 + }, + { + "epoch": 112.94, + "learning_rate": 2.9625382262996944e-05, + "loss": 0.0642, + "step": 12310 + }, + { + "epoch": 112.94, + "learning_rate": 2.9587155963302755e-05, + "loss": 0.1191, + "step": 12311 + }, + { + "epoch": 112.95, + "learning_rate": 2.9548929663608563e-05, + "loss": 0.0596, + "step": 12312 + }, + { + "epoch": 112.96, + "learning_rate": 2.9510703363914374e-05, + "loss": 0.0572, + "step": 12313 + }, + { + "epoch": 112.97, + "learning_rate": 2.9472477064220185e-05, + "loss": 0.0741, + "step": 12314 + }, + { + "epoch": 112.98, + "learning_rate": 2.9434250764525992e-05, + "loss": 0.0696, + "step": 12315 + }, + { + "epoch": 112.99, + "learning_rate": 2.9396024464831807e-05, + "loss": 0.0574, + "step": 12316 + }, + { + "epoch": 113.0, + "learning_rate": 2.9357798165137618e-05, + "loss": 0.1027, + "step": 12317 + }, + { + "epoch": 113.01, + "learning_rate": 2.9319571865443426e-05, + "loss": 0.1564, + "step": 12318 + }, + { + "epoch": 113.02, + "learning_rate": 2.9281345565749237e-05, + "loss": 0.1126, + "step": 12319 + }, + { + "epoch": 113.03, + "learning_rate": 2.9243119266055048e-05, + "loss": 0.1184, + "step": 12320 + }, + { + "epoch": 113.04, + "learning_rate": 2.9204892966360855e-05, + "loss": 0.0933, + "step": 12321 + }, + { + "epoch": 113.05, + "learning_rate": 2.9166666666666666e-05, + "loss": 0.1026, + "step": 12322 + }, + { + "epoch": 113.06, + "learning_rate": 2.912844036697248e-05, + "loss": 0.0951, + "step": 12323 + }, + { + "epoch": 113.06, + "learning_rate": 2.909021406727829e-05, + "loss": 0.115, + "step": 12324 + }, + { + "epoch": 113.07, + "learning_rate": 2.90519877675841e-05, + "loss": 0.0984, + "step": 12325 + }, + { + "epoch": 113.08, + "learning_rate": 2.901376146788991e-05, + "loss": 0.0809, + "step": 12326 + }, + { + "epoch": 113.09, + "learning_rate": 2.8975535168195718e-05, + "loss": 0.06, + "step": 12327 + }, + { + "epoch": 113.1, + "learning_rate": 2.893730886850153e-05, + "loss": 0.0292, + "step": 12328 + }, + { + "epoch": 113.11, + "learning_rate": 2.889908256880734e-05, + "loss": 0.0737, + "step": 12329 + }, + { + "epoch": 113.12, + "learning_rate": 2.8860856269113148e-05, + "loss": 0.0383, + "step": 12330 + }, + { + "epoch": 113.13, + "learning_rate": 2.8822629969418963e-05, + "loss": 0.0873, + "step": 12331 + }, + { + "epoch": 113.14, + "learning_rate": 2.8784403669724774e-05, + "loss": 0.0812, + "step": 12332 + }, + { + "epoch": 113.15, + "learning_rate": 2.874617737003058e-05, + "loss": 0.0912, + "step": 12333 + }, + { + "epoch": 113.16, + "learning_rate": 2.8707951070336392e-05, + "loss": 0.1045, + "step": 12334 + }, + { + "epoch": 113.17, + "learning_rate": 2.8669724770642203e-05, + "loss": 0.0829, + "step": 12335 + }, + { + "epoch": 113.17, + "learning_rate": 2.863149847094801e-05, + "loss": 0.1101, + "step": 12336 + }, + { + "epoch": 113.18, + "learning_rate": 2.8593272171253822e-05, + "loss": 0.0754, + "step": 12337 + }, + { + "epoch": 113.19, + "learning_rate": 2.8555045871559637e-05, + "loss": 0.0382, + "step": 12338 + }, + { + "epoch": 113.2, + "learning_rate": 2.8516819571865444e-05, + "loss": 0.1004, + "step": 12339 + }, + { + "epoch": 113.21, + "learning_rate": 2.8478593272171255e-05, + "loss": 0.1214, + "step": 12340 + }, + { + "epoch": 113.22, + "learning_rate": 2.8440366972477066e-05, + "loss": 0.0773, + "step": 12341 + }, + { + "epoch": 113.23, + "learning_rate": 2.8402140672782874e-05, + "loss": 0.0966, + "step": 12342 + }, + { + "epoch": 113.24, + "learning_rate": 2.8363914373088685e-05, + "loss": 0.0602, + "step": 12343 + }, + { + "epoch": 113.25, + "learning_rate": 2.8325688073394496e-05, + "loss": 0.0553, + "step": 12344 + }, + { + "epoch": 113.26, + "learning_rate": 2.8287461773700307e-05, + "loss": 0.1936, + "step": 12345 + }, + { + "epoch": 113.27, + "learning_rate": 2.824923547400612e-05, + "loss": 0.1227, + "step": 12346 + }, + { + "epoch": 113.28, + "learning_rate": 2.821100917431193e-05, + "loss": 0.0971, + "step": 12347 + }, + { + "epoch": 113.28, + "learning_rate": 2.8172782874617737e-05, + "loss": 0.1105, + "step": 12348 + }, + { + "epoch": 113.29, + "learning_rate": 2.8134556574923548e-05, + "loss": 0.0821, + "step": 12349 + }, + { + "epoch": 113.3, + "learning_rate": 2.809633027522936e-05, + "loss": 0.1016, + "step": 12350 + }, + { + "epoch": 113.31, + "learning_rate": 2.8058103975535167e-05, + "loss": 0.046, + "step": 12351 + }, + { + "epoch": 113.32, + "learning_rate": 2.8019877675840978e-05, + "loss": 0.0928, + "step": 12352 + }, + { + "epoch": 113.33, + "learning_rate": 2.7981651376146792e-05, + "loss": 0.0771, + "step": 12353 + }, + { + "epoch": 113.34, + "learning_rate": 2.79434250764526e-05, + "loss": 0.0686, + "step": 12354 + }, + { + "epoch": 113.35, + "learning_rate": 2.790519877675841e-05, + "loss": 0.0626, + "step": 12355 + }, + { + "epoch": 113.36, + "learning_rate": 2.7866972477064222e-05, + "loss": 0.1199, + "step": 12356 + }, + { + "epoch": 113.37, + "learning_rate": 2.782874617737003e-05, + "loss": 0.1065, + "step": 12357 + }, + { + "epoch": 113.38, + "learning_rate": 2.779051987767584e-05, + "loss": 0.1277, + "step": 12358 + }, + { + "epoch": 113.39, + "learning_rate": 2.7752293577981652e-05, + "loss": 0.1286, + "step": 12359 + }, + { + "epoch": 113.39, + "learning_rate": 2.7714067278287463e-05, + "loss": 0.0681, + "step": 12360 + }, + { + "epoch": 113.4, + "learning_rate": 2.7675840978593274e-05, + "loss": 0.0562, + "step": 12361 + }, + { + "epoch": 113.41, + "learning_rate": 2.7637614678899085e-05, + "loss": 0.0906, + "step": 12362 + }, + { + "epoch": 113.42, + "learning_rate": 2.7599388379204893e-05, + "loss": 0.0795, + "step": 12363 + }, + { + "epoch": 113.43, + "learning_rate": 2.7561162079510704e-05, + "loss": 0.0977, + "step": 12364 + }, + { + "epoch": 113.44, + "learning_rate": 2.7522935779816515e-05, + "loss": 0.0821, + "step": 12365 + }, + { + "epoch": 113.45, + "learning_rate": 2.7484709480122323e-05, + "loss": 0.1382, + "step": 12366 + }, + { + "epoch": 113.46, + "learning_rate": 2.7446483180428137e-05, + "loss": 0.0782, + "step": 12367 + }, + { + "epoch": 113.47, + "learning_rate": 2.7408256880733948e-05, + "loss": 0.0487, + "step": 12368 + }, + { + "epoch": 113.48, + "learning_rate": 2.7370030581039756e-05, + "loss": 0.1235, + "step": 12369 + }, + { + "epoch": 113.49, + "learning_rate": 2.7331804281345567e-05, + "loss": 0.0954, + "step": 12370 + }, + { + "epoch": 113.5, + "learning_rate": 2.7293577981651378e-05, + "loss": 0.0476, + "step": 12371 + }, + { + "epoch": 113.5, + "learning_rate": 2.7255351681957186e-05, + "loss": 0.161, + "step": 12372 + }, + { + "epoch": 113.51, + "learning_rate": 2.7217125382262997e-05, + "loss": 0.1715, + "step": 12373 + }, + { + "epoch": 113.52, + "learning_rate": 2.7178899082568808e-05, + "loss": 0.0871, + "step": 12374 + }, + { + "epoch": 113.53, + "learning_rate": 2.714067278287462e-05, + "loss": 0.0891, + "step": 12375 + }, + { + "epoch": 113.54, + "learning_rate": 2.710244648318043e-05, + "loss": 0.1089, + "step": 12376 + }, + { + "epoch": 113.55, + "learning_rate": 2.706422018348624e-05, + "loss": 0.1032, + "step": 12377 + }, + { + "epoch": 113.56, + "learning_rate": 2.702599388379205e-05, + "loss": 0.0999, + "step": 12378 + }, + { + "epoch": 113.57, + "learning_rate": 2.698776758409786e-05, + "loss": 0.0757, + "step": 12379 + }, + { + "epoch": 113.58, + "learning_rate": 2.694954128440367e-05, + "loss": 0.077, + "step": 12380 + }, + { + "epoch": 113.59, + "learning_rate": 2.691131498470948e-05, + "loss": 0.1035, + "step": 12381 + }, + { + "epoch": 113.6, + "learning_rate": 2.6873088685015293e-05, + "loss": 0.1509, + "step": 12382 + }, + { + "epoch": 113.61, + "learning_rate": 2.6834862385321104e-05, + "loss": 0.0677, + "step": 12383 + }, + { + "epoch": 113.61, + "learning_rate": 2.679663608562691e-05, + "loss": 0.0636, + "step": 12384 + }, + { + "epoch": 113.62, + "learning_rate": 2.6758409785932723e-05, + "loss": 0.0783, + "step": 12385 + }, + { + "epoch": 113.63, + "learning_rate": 2.6720183486238534e-05, + "loss": 0.0626, + "step": 12386 + }, + { + "epoch": 113.64, + "learning_rate": 2.668195718654434e-05, + "loss": 0.0303, + "step": 12387 + }, + { + "epoch": 113.65, + "learning_rate": 2.6643730886850153e-05, + "loss": 0.1088, + "step": 12388 + }, + { + "epoch": 113.66, + "learning_rate": 2.6605504587155967e-05, + "loss": 0.0665, + "step": 12389 + }, + { + "epoch": 113.67, + "learning_rate": 2.6567278287461775e-05, + "loss": 0.0997, + "step": 12390 + }, + { + "epoch": 113.68, + "learning_rate": 2.6529051987767586e-05, + "loss": 0.0731, + "step": 12391 + }, + { + "epoch": 113.69, + "learning_rate": 2.6490825688073397e-05, + "loss": 0.0757, + "step": 12392 + }, + { + "epoch": 113.7, + "learning_rate": 2.6452599388379204e-05, + "loss": 0.0259, + "step": 12393 + }, + { + "epoch": 113.71, + "learning_rate": 2.6414373088685016e-05, + "loss": 0.1105, + "step": 12394 + }, + { + "epoch": 113.72, + "learning_rate": 2.6376146788990827e-05, + "loss": 0.0811, + "step": 12395 + }, + { + "epoch": 113.72, + "learning_rate": 2.6337920489296634e-05, + "loss": 0.056, + "step": 12396 + }, + { + "epoch": 113.73, + "learning_rate": 2.629969418960245e-05, + "loss": 0.0149, + "step": 12397 + }, + { + "epoch": 113.74, + "learning_rate": 2.626146788990826e-05, + "loss": 0.0135, + "step": 12398 + }, + { + "epoch": 113.75, + "learning_rate": 2.6223241590214067e-05, + "loss": 0.196, + "step": 12399 + }, + { + "epoch": 113.76, + "learning_rate": 2.618501529051988e-05, + "loss": 0.1218, + "step": 12400 + }, + { + "epoch": 113.77, + "learning_rate": 2.614678899082569e-05, + "loss": 0.0653, + "step": 12401 + }, + { + "epoch": 113.78, + "learning_rate": 2.6108562691131497e-05, + "loss": 0.0884, + "step": 12402 + }, + { + "epoch": 113.79, + "learning_rate": 2.607033639143731e-05, + "loss": 0.0659, + "step": 12403 + }, + { + "epoch": 113.8, + "learning_rate": 2.6032110091743123e-05, + "loss": 0.0912, + "step": 12404 + }, + { + "epoch": 113.81, + "learning_rate": 2.599388379204893e-05, + "loss": 0.1029, + "step": 12405 + }, + { + "epoch": 113.82, + "learning_rate": 2.595565749235474e-05, + "loss": 0.0884, + "step": 12406 + }, + { + "epoch": 113.83, + "learning_rate": 2.5917431192660553e-05, + "loss": 0.1036, + "step": 12407 + }, + { + "epoch": 113.83, + "learning_rate": 2.587920489296636e-05, + "loss": 0.1305, + "step": 12408 + }, + { + "epoch": 113.84, + "learning_rate": 2.584097859327217e-05, + "loss": 0.0581, + "step": 12409 + }, + { + "epoch": 113.85, + "learning_rate": 2.5802752293577982e-05, + "loss": 0.1085, + "step": 12410 + }, + { + "epoch": 113.86, + "learning_rate": 2.576452599388379e-05, + "loss": 0.0594, + "step": 12411 + }, + { + "epoch": 113.87, + "learning_rate": 2.5726299694189605e-05, + "loss": 0.0594, + "step": 12412 + }, + { + "epoch": 113.88, + "learning_rate": 2.5688073394495416e-05, + "loss": 0.115, + "step": 12413 + }, + { + "epoch": 113.89, + "learning_rate": 2.5649847094801223e-05, + "loss": 0.0596, + "step": 12414 + }, + { + "epoch": 113.9, + "learning_rate": 2.5611620795107034e-05, + "loss": 0.0754, + "step": 12415 + }, + { + "epoch": 113.91, + "learning_rate": 2.5573394495412845e-05, + "loss": 0.1323, + "step": 12416 + }, + { + "epoch": 113.92, + "learning_rate": 2.5535168195718653e-05, + "loss": 0.1084, + "step": 12417 + }, + { + "epoch": 113.93, + "learning_rate": 2.5496941896024464e-05, + "loss": 0.0765, + "step": 12418 + }, + { + "epoch": 113.94, + "learning_rate": 2.545871559633028e-05, + "loss": 0.0452, + "step": 12419 + }, + { + "epoch": 113.94, + "learning_rate": 2.5420489296636086e-05, + "loss": 0.0905, + "step": 12420 + }, + { + "epoch": 113.95, + "learning_rate": 2.5382262996941897e-05, + "loss": 0.0749, + "step": 12421 + }, + { + "epoch": 113.96, + "learning_rate": 2.534403669724771e-05, + "loss": 0.061, + "step": 12422 + }, + { + "epoch": 113.97, + "learning_rate": 2.5305810397553516e-05, + "loss": 0.0616, + "step": 12423 + }, + { + "epoch": 113.98, + "learning_rate": 2.5267584097859327e-05, + "loss": 0.0942, + "step": 12424 + }, + { + "epoch": 113.99, + "learning_rate": 2.5229357798165138e-05, + "loss": 0.0364, + "step": 12425 + }, + { + "epoch": 114.0, + "learning_rate": 2.519113149847095e-05, + "loss": 0.0729, + "step": 12426 + }, + { + "epoch": 114.01, + "learning_rate": 2.515290519877676e-05, + "loss": 0.1266, + "step": 12427 + }, + { + "epoch": 114.02, + "learning_rate": 2.511467889908257e-05, + "loss": 0.0778, + "step": 12428 + }, + { + "epoch": 114.03, + "learning_rate": 2.507645259938838e-05, + "loss": 0.1095, + "step": 12429 + }, + { + "epoch": 114.04, + "learning_rate": 2.503822629969419e-05, + "loss": 0.1187, + "step": 12430 + }, + { + "epoch": 114.05, + "learning_rate": 2.5e-05, + "loss": 0.0845, + "step": 12431 + }, + { + "epoch": 114.06, + "learning_rate": 2.496177370030581e-05, + "loss": 0.0905, + "step": 12432 + }, + { + "epoch": 114.06, + "learning_rate": 2.492354740061162e-05, + "loss": 0.098, + "step": 12433 + }, + { + "epoch": 114.07, + "learning_rate": 2.4885321100917434e-05, + "loss": 0.0753, + "step": 12434 + }, + { + "epoch": 114.08, + "learning_rate": 2.4847094801223242e-05, + "loss": 0.0588, + "step": 12435 + }, + { + "epoch": 114.09, + "learning_rate": 2.4808868501529053e-05, + "loss": 0.0529, + "step": 12436 + }, + { + "epoch": 114.1, + "learning_rate": 2.4770642201834864e-05, + "loss": 0.1098, + "step": 12437 + }, + { + "epoch": 114.11, + "learning_rate": 2.4732415902140672e-05, + "loss": 0.1287, + "step": 12438 + }, + { + "epoch": 114.12, + "learning_rate": 2.4694189602446483e-05, + "loss": 0.0953, + "step": 12439 + }, + { + "epoch": 114.13, + "learning_rate": 2.4655963302752294e-05, + "loss": 0.0831, + "step": 12440 + }, + { + "epoch": 114.14, + "learning_rate": 2.4617737003058105e-05, + "loss": 0.1038, + "step": 12441 + }, + { + "epoch": 114.15, + "learning_rate": 2.4579510703363916e-05, + "loss": 0.0418, + "step": 12442 + }, + { + "epoch": 114.16, + "learning_rate": 2.4541284403669727e-05, + "loss": 0.063, + "step": 12443 + }, + { + "epoch": 114.17, + "learning_rate": 2.4503058103975535e-05, + "loss": 0.064, + "step": 12444 + }, + { + "epoch": 114.17, + "learning_rate": 2.4464831804281346e-05, + "loss": 0.0747, + "step": 12445 + }, + { + "epoch": 114.18, + "learning_rate": 2.4426605504587157e-05, + "loss": 0.0493, + "step": 12446 + }, + { + "epoch": 114.19, + "learning_rate": 2.4388379204892965e-05, + "loss": 0.0934, + "step": 12447 + }, + { + "epoch": 114.2, + "learning_rate": 2.435015290519878e-05, + "loss": 0.0801, + "step": 12448 + }, + { + "epoch": 114.21, + "learning_rate": 2.431192660550459e-05, + "loss": 0.0798, + "step": 12449 + }, + { + "epoch": 114.22, + "learning_rate": 2.4273700305810398e-05, + "loss": 0.0473, + "step": 12450 + }, + { + "epoch": 114.23, + "learning_rate": 2.423547400611621e-05, + "loss": 0.0214, + "step": 12451 + }, + { + "epoch": 114.24, + "learning_rate": 2.419724770642202e-05, + "loss": 0.0475, + "step": 12452 + }, + { + "epoch": 114.25, + "learning_rate": 2.4159021406727828e-05, + "loss": 0.0397, + "step": 12453 + }, + { + "epoch": 114.26, + "learning_rate": 2.412079510703364e-05, + "loss": 0.1523, + "step": 12454 + }, + { + "epoch": 114.27, + "learning_rate": 2.408256880733945e-05, + "loss": 0.1307, + "step": 12455 + }, + { + "epoch": 114.28, + "learning_rate": 2.404434250764526e-05, + "loss": 0.0995, + "step": 12456 + }, + { + "epoch": 114.28, + "learning_rate": 2.4006116207951072e-05, + "loss": 0.1434, + "step": 12457 + }, + { + "epoch": 114.29, + "learning_rate": 2.3967889908256883e-05, + "loss": 0.1048, + "step": 12458 + }, + { + "epoch": 114.3, + "learning_rate": 2.392966360856269e-05, + "loss": 0.1312, + "step": 12459 + }, + { + "epoch": 114.31, + "learning_rate": 2.3891437308868502e-05, + "loss": 0.0742, + "step": 12460 + }, + { + "epoch": 114.32, + "learning_rate": 2.3853211009174313e-05, + "loss": 0.0955, + "step": 12461 + }, + { + "epoch": 114.33, + "learning_rate": 2.381498470948012e-05, + "loss": 0.0398, + "step": 12462 + }, + { + "epoch": 114.34, + "learning_rate": 2.3776758409785935e-05, + "loss": 0.0763, + "step": 12463 + }, + { + "epoch": 114.35, + "learning_rate": 2.3738532110091746e-05, + "loss": 0.1178, + "step": 12464 + }, + { + "epoch": 114.36, + "learning_rate": 2.3700305810397554e-05, + "loss": 0.0552, + "step": 12465 + }, + { + "epoch": 114.37, + "learning_rate": 2.3662079510703365e-05, + "loss": 0.102, + "step": 12466 + }, + { + "epoch": 114.38, + "learning_rate": 2.3623853211009176e-05, + "loss": 0.051, + "step": 12467 + }, + { + "epoch": 114.39, + "learning_rate": 2.3585626911314983e-05, + "loss": 0.0482, + "step": 12468 + }, + { + "epoch": 114.39, + "learning_rate": 2.3547400611620795e-05, + "loss": 0.0626, + "step": 12469 + }, + { + "epoch": 114.4, + "learning_rate": 2.350917431192661e-05, + "loss": 0.1343, + "step": 12470 + }, + { + "epoch": 114.41, + "learning_rate": 2.3470948012232417e-05, + "loss": 0.0656, + "step": 12471 + }, + { + "epoch": 114.42, + "learning_rate": 2.3432721712538228e-05, + "loss": 0.0696, + "step": 12472 + }, + { + "epoch": 114.43, + "learning_rate": 2.339449541284404e-05, + "loss": 0.1073, + "step": 12473 + }, + { + "epoch": 114.44, + "learning_rate": 2.3356269113149846e-05, + "loss": 0.0883, + "step": 12474 + }, + { + "epoch": 114.45, + "learning_rate": 2.3318042813455658e-05, + "loss": 0.0981, + "step": 12475 + }, + { + "epoch": 114.46, + "learning_rate": 2.327981651376147e-05, + "loss": 0.0443, + "step": 12476 + }, + { + "epoch": 114.47, + "learning_rate": 2.3241590214067276e-05, + "loss": 0.082, + "step": 12477 + }, + { + "epoch": 114.48, + "learning_rate": 2.320336391437309e-05, + "loss": 0.0667, + "step": 12478 + }, + { + "epoch": 114.49, + "learning_rate": 2.3165137614678902e-05, + "loss": 0.0471, + "step": 12479 + }, + { + "epoch": 114.5, + "learning_rate": 2.312691131498471e-05, + "loss": 0.0323, + "step": 12480 + }, + { + "epoch": 114.5, + "learning_rate": 2.308868501529052e-05, + "loss": 0.1452, + "step": 12481 + }, + { + "epoch": 114.51, + "learning_rate": 2.305045871559633e-05, + "loss": 0.1673, + "step": 12482 + }, + { + "epoch": 114.52, + "learning_rate": 2.301223241590214e-05, + "loss": 0.0848, + "step": 12483 + }, + { + "epoch": 114.53, + "learning_rate": 2.297400611620795e-05, + "loss": 0.0895, + "step": 12484 + }, + { + "epoch": 114.54, + "learning_rate": 2.2935779816513765e-05, + "loss": 0.1248, + "step": 12485 + }, + { + "epoch": 114.55, + "learning_rate": 2.2897553516819572e-05, + "loss": 0.1021, + "step": 12486 + }, + { + "epoch": 114.56, + "learning_rate": 2.2859327217125384e-05, + "loss": 0.1396, + "step": 12487 + }, + { + "epoch": 114.57, + "learning_rate": 2.2821100917431195e-05, + "loss": 0.0838, + "step": 12488 + }, + { + "epoch": 114.58, + "learning_rate": 2.2782874617737002e-05, + "loss": 0.0883, + "step": 12489 + }, + { + "epoch": 114.59, + "learning_rate": 2.2744648318042813e-05, + "loss": 0.0941, + "step": 12490 + }, + { + "epoch": 114.6, + "learning_rate": 2.2706422018348624e-05, + "loss": 0.0997, + "step": 12491 + }, + { + "epoch": 114.61, + "learning_rate": 2.2668195718654432e-05, + "loss": 0.0938, + "step": 12492 + }, + { + "epoch": 114.61, + "learning_rate": 2.2629969418960247e-05, + "loss": 0.0726, + "step": 12493 + }, + { + "epoch": 114.62, + "learning_rate": 2.2591743119266058e-05, + "loss": 0.0486, + "step": 12494 + }, + { + "epoch": 114.63, + "learning_rate": 2.2553516819571865e-05, + "loss": 0.0797, + "step": 12495 + }, + { + "epoch": 114.64, + "learning_rate": 2.2515290519877676e-05, + "loss": 0.0813, + "step": 12496 + }, + { + "epoch": 114.65, + "learning_rate": 2.2477064220183487e-05, + "loss": 0.1147, + "step": 12497 + }, + { + "epoch": 114.66, + "learning_rate": 2.2438837920489295e-05, + "loss": 0.0797, + "step": 12498 + }, + { + "epoch": 114.67, + "learning_rate": 2.2400611620795106e-05, + "loss": 0.1145, + "step": 12499 + }, + { + "epoch": 114.68, + "learning_rate": 2.236238532110092e-05, + "loss": 0.0437, + "step": 12500 + }, + { + "epoch": 114.68, + "eval_cer": 0.13283557474624808, + "eval_loss": 0.7524892091751099, + "eval_runtime": 87.7282, + "eval_samples_per_second": 18.774, + "eval_steps_per_second": 2.348, + "eval_wer": 0.4640996833826984, + "step": 12500 + }, + { + "epoch": 114.69, + "learning_rate": 2.2324159021406728e-05, + "loss": 0.0644, + "step": 12501 + }, + { + "epoch": 114.7, + "learning_rate": 2.228593272171254e-05, + "loss": 0.0688, + "step": 12502 + }, + { + "epoch": 114.71, + "learning_rate": 2.224770642201835e-05, + "loss": 0.0649, + "step": 12503 + }, + { + "epoch": 114.72, + "learning_rate": 2.2209480122324158e-05, + "loss": 0.0559, + "step": 12504 + }, + { + "epoch": 114.72, + "learning_rate": 2.217125382262997e-05, + "loss": 0.1261, + "step": 12505 + }, + { + "epoch": 114.73, + "learning_rate": 2.213302752293578e-05, + "loss": 0.0548, + "step": 12506 + }, + { + "epoch": 114.74, + "learning_rate": 2.209480122324159e-05, + "loss": 0.0299, + "step": 12507 + }, + { + "epoch": 114.75, + "learning_rate": 2.2056574923547402e-05, + "loss": 0.111, + "step": 12508 + }, + { + "epoch": 114.76, + "learning_rate": 2.2018348623853213e-05, + "loss": 0.0901, + "step": 12509 + }, + { + "epoch": 114.77, + "learning_rate": 2.198012232415902e-05, + "loss": 0.1016, + "step": 12510 + }, + { + "epoch": 114.78, + "learning_rate": 2.1941896024464832e-05, + "loss": 0.1067, + "step": 12511 + }, + { + "epoch": 114.79, + "learning_rate": 2.1903669724770643e-05, + "loss": 0.1232, + "step": 12512 + }, + { + "epoch": 114.8, + "learning_rate": 2.186544342507645e-05, + "loss": 0.1065, + "step": 12513 + }, + { + "epoch": 114.81, + "learning_rate": 2.1827217125382262e-05, + "loss": 0.1063, + "step": 12514 + }, + { + "epoch": 114.82, + "learning_rate": 2.1788990825688076e-05, + "loss": 0.1113, + "step": 12515 + }, + { + "epoch": 114.83, + "learning_rate": 2.1750764525993884e-05, + "loss": 0.1007, + "step": 12516 + }, + { + "epoch": 114.83, + "learning_rate": 2.1712538226299695e-05, + "loss": 0.0892, + "step": 12517 + }, + { + "epoch": 114.84, + "learning_rate": 2.1674311926605506e-05, + "loss": 0.1287, + "step": 12518 + }, + { + "epoch": 114.85, + "learning_rate": 2.1636085626911314e-05, + "loss": 0.0821, + "step": 12519 + }, + { + "epoch": 114.86, + "learning_rate": 2.1597859327217125e-05, + "loss": 0.0936, + "step": 12520 + }, + { + "epoch": 114.87, + "learning_rate": 2.1559633027522936e-05, + "loss": 0.0819, + "step": 12521 + }, + { + "epoch": 114.88, + "learning_rate": 2.1521406727828747e-05, + "loss": 0.1081, + "step": 12522 + }, + { + "epoch": 114.89, + "learning_rate": 2.1483180428134558e-05, + "loss": 0.139, + "step": 12523 + }, + { + "epoch": 114.9, + "learning_rate": 2.144495412844037e-05, + "loss": 0.078, + "step": 12524 + }, + { + "epoch": 114.91, + "learning_rate": 2.1406727828746177e-05, + "loss": 0.0901, + "step": 12525 + }, + { + "epoch": 114.92, + "learning_rate": 2.1368501529051988e-05, + "loss": 0.1087, + "step": 12526 + }, + { + "epoch": 114.93, + "learning_rate": 2.13302752293578e-05, + "loss": 0.088, + "step": 12527 + }, + { + "epoch": 114.94, + "learning_rate": 2.1292048929663607e-05, + "loss": 0.0555, + "step": 12528 + }, + { + "epoch": 114.94, + "learning_rate": 2.125382262996942e-05, + "loss": 0.0617, + "step": 12529 + }, + { + "epoch": 114.95, + "learning_rate": 2.1215596330275232e-05, + "loss": 0.0484, + "step": 12530 + }, + { + "epoch": 114.96, + "learning_rate": 2.117737003058104e-05, + "loss": 0.046, + "step": 12531 + }, + { + "epoch": 114.97, + "learning_rate": 2.113914373088685e-05, + "loss": 0.0822, + "step": 12532 + }, + { + "epoch": 114.98, + "learning_rate": 2.1100917431192662e-05, + "loss": 0.1153, + "step": 12533 + }, + { + "epoch": 114.99, + "learning_rate": 2.106269113149847e-05, + "loss": 0.0747, + "step": 12534 + }, + { + "epoch": 115.0, + "learning_rate": 2.102446483180428e-05, + "loss": 0.1246, + "step": 12535 + }, + { + "epoch": 115.01, + "learning_rate": 2.0986238532110092e-05, + "loss": 0.0917, + "step": 12536 + }, + { + "epoch": 115.02, + "learning_rate": 2.0948012232415903e-05, + "loss": 0.0765, + "step": 12537 + }, + { + "epoch": 115.03, + "learning_rate": 2.0909785932721714e-05, + "loss": 0.0678, + "step": 12538 + }, + { + "epoch": 115.04, + "learning_rate": 2.0871559633027525e-05, + "loss": 0.1246, + "step": 12539 + }, + { + "epoch": 115.05, + "learning_rate": 2.0833333333333333e-05, + "loss": 0.0906, + "step": 12540 + }, + { + "epoch": 115.06, + "learning_rate": 2.0795107033639144e-05, + "loss": 0.0879, + "step": 12541 + }, + { + "epoch": 115.06, + "learning_rate": 2.0756880733944955e-05, + "loss": 0.1332, + "step": 12542 + }, + { + "epoch": 115.07, + "learning_rate": 2.0718654434250762e-05, + "loss": 0.0963, + "step": 12543 + }, + { + "epoch": 115.08, + "learning_rate": 2.0680428134556577e-05, + "loss": 0.101, + "step": 12544 + }, + { + "epoch": 115.09, + "learning_rate": 2.0642201834862388e-05, + "loss": 0.106, + "step": 12545 + }, + { + "epoch": 115.1, + "learning_rate": 2.0603975535168196e-05, + "loss": 0.0959, + "step": 12546 + }, + { + "epoch": 115.11, + "learning_rate": 2.0565749235474007e-05, + "loss": 0.1304, + "step": 12547 + }, + { + "epoch": 115.12, + "learning_rate": 2.0527522935779818e-05, + "loss": 0.1163, + "step": 12548 + }, + { + "epoch": 115.13, + "learning_rate": 2.0489296636085625e-05, + "loss": 0.0799, + "step": 12549 + }, + { + "epoch": 115.14, + "learning_rate": 2.0451070336391437e-05, + "loss": 0.0839, + "step": 12550 + }, + { + "epoch": 115.15, + "learning_rate": 2.041284403669725e-05, + "loss": 0.062, + "step": 12551 + }, + { + "epoch": 115.16, + "learning_rate": 2.037461773700306e-05, + "loss": 0.0704, + "step": 12552 + }, + { + "epoch": 115.17, + "learning_rate": 2.033639143730887e-05, + "loss": 0.0729, + "step": 12553 + }, + { + "epoch": 115.17, + "learning_rate": 2.029816513761468e-05, + "loss": 0.1296, + "step": 12554 + }, + { + "epoch": 115.18, + "learning_rate": 2.025993883792049e-05, + "loss": 0.126, + "step": 12555 + }, + { + "epoch": 115.19, + "learning_rate": 2.02217125382263e-05, + "loss": 0.0763, + "step": 12556 + }, + { + "epoch": 115.2, + "learning_rate": 2.018348623853211e-05, + "loss": 0.0657, + "step": 12557 + }, + { + "epoch": 115.21, + "learning_rate": 2.0145259938837918e-05, + "loss": 0.089, + "step": 12558 + }, + { + "epoch": 115.22, + "learning_rate": 2.0107033639143733e-05, + "loss": 0.1113, + "step": 12559 + }, + { + "epoch": 115.23, + "learning_rate": 2.0068807339449544e-05, + "loss": 0.055, + "step": 12560 + }, + { + "epoch": 115.24, + "learning_rate": 2.003058103975535e-05, + "loss": 0.0417, + "step": 12561 + }, + { + "epoch": 115.25, + "learning_rate": 1.9992354740061162e-05, + "loss": 0.0574, + "step": 12562 + }, + { + "epoch": 115.26, + "learning_rate": 1.9954128440366974e-05, + "loss": 0.0943, + "step": 12563 + }, + { + "epoch": 115.27, + "learning_rate": 1.991590214067278e-05, + "loss": 0.1018, + "step": 12564 + }, + { + "epoch": 115.28, + "learning_rate": 1.9877675840978592e-05, + "loss": 0.1116, + "step": 12565 + }, + { + "epoch": 115.28, + "learning_rate": 1.9839449541284407e-05, + "loss": 0.1213, + "step": 12566 + }, + { + "epoch": 115.29, + "learning_rate": 1.9801223241590214e-05, + "loss": 0.0716, + "step": 12567 + }, + { + "epoch": 115.3, + "learning_rate": 1.9762996941896025e-05, + "loss": 0.1072, + "step": 12568 + }, + { + "epoch": 115.31, + "learning_rate": 1.9724770642201837e-05, + "loss": 0.0542, + "step": 12569 + }, + { + "epoch": 115.32, + "learning_rate": 1.9686544342507644e-05, + "loss": 0.0572, + "step": 12570 + }, + { + "epoch": 115.33, + "learning_rate": 1.9648318042813455e-05, + "loss": 0.056, + "step": 12571 + }, + { + "epoch": 115.34, + "learning_rate": 1.9610091743119266e-05, + "loss": 0.0851, + "step": 12572 + }, + { + "epoch": 115.35, + "learning_rate": 1.9571865443425074e-05, + "loss": 0.0635, + "step": 12573 + }, + { + "epoch": 115.36, + "learning_rate": 1.953363914373089e-05, + "loss": 0.0697, + "step": 12574 + }, + { + "epoch": 115.37, + "learning_rate": 1.94954128440367e-05, + "loss": 0.074, + "step": 12575 + }, + { + "epoch": 115.38, + "learning_rate": 1.9457186544342507e-05, + "loss": 0.089, + "step": 12576 + }, + { + "epoch": 115.39, + "learning_rate": 1.9418960244648318e-05, + "loss": 0.0751, + "step": 12577 + }, + { + "epoch": 115.39, + "learning_rate": 1.938073394495413e-05, + "loss": 0.0815, + "step": 12578 + }, + { + "epoch": 115.4, + "learning_rate": 1.9342507645259937e-05, + "loss": 0.0921, + "step": 12579 + }, + { + "epoch": 115.41, + "learning_rate": 1.9304281345565748e-05, + "loss": 0.0616, + "step": 12580 + }, + { + "epoch": 115.42, + "learning_rate": 1.9266055045871563e-05, + "loss": 0.0563, + "step": 12581 + }, + { + "epoch": 115.43, + "learning_rate": 1.922782874617737e-05, + "loss": 0.0775, + "step": 12582 + }, + { + "epoch": 115.44, + "learning_rate": 1.918960244648318e-05, + "loss": 0.0689, + "step": 12583 + }, + { + "epoch": 115.45, + "learning_rate": 1.9151376146788992e-05, + "loss": 0.0449, + "step": 12584 + }, + { + "epoch": 115.46, + "learning_rate": 1.91131498470948e-05, + "loss": 0.0656, + "step": 12585 + }, + { + "epoch": 115.47, + "learning_rate": 1.907492354740061e-05, + "loss": 0.0447, + "step": 12586 + }, + { + "epoch": 115.48, + "learning_rate": 1.9036697247706422e-05, + "loss": 0.0708, + "step": 12587 + }, + { + "epoch": 115.49, + "learning_rate": 1.8998470948012233e-05, + "loss": 0.0867, + "step": 12588 + }, + { + "epoch": 115.5, + "learning_rate": 1.8960244648318044e-05, + "loss": 0.026, + "step": 12589 + }, + { + "epoch": 115.5, + "learning_rate": 1.8922018348623855e-05, + "loss": 0.1489, + "step": 12590 + }, + { + "epoch": 115.51, + "learning_rate": 1.8883792048929663e-05, + "loss": 0.0868, + "step": 12591 + }, + { + "epoch": 115.52, + "learning_rate": 1.8845565749235474e-05, + "loss": 0.0904, + "step": 12592 + }, + { + "epoch": 115.53, + "learning_rate": 1.8807339449541285e-05, + "loss": 0.0689, + "step": 12593 + }, + { + "epoch": 115.54, + "learning_rate": 1.8769113149847093e-05, + "loss": 0.109, + "step": 12594 + }, + { + "epoch": 115.55, + "learning_rate": 1.8730886850152904e-05, + "loss": 0.1216, + "step": 12595 + }, + { + "epoch": 115.56, + "learning_rate": 1.869266055045872e-05, + "loss": 0.0716, + "step": 12596 + }, + { + "epoch": 115.57, + "learning_rate": 1.8654434250764526e-05, + "loss": 0.1438, + "step": 12597 + }, + { + "epoch": 115.58, + "learning_rate": 1.8616207951070337e-05, + "loss": 0.0997, + "step": 12598 + }, + { + "epoch": 115.59, + "learning_rate": 1.8577981651376148e-05, + "loss": 0.1491, + "step": 12599 + }, + { + "epoch": 115.6, + "learning_rate": 1.8539755351681956e-05, + "loss": 0.089, + "step": 12600 + }, + { + "epoch": 115.61, + "learning_rate": 1.8501529051987767e-05, + "loss": 0.0648, + "step": 12601 + }, + { + "epoch": 115.61, + "learning_rate": 1.8463302752293578e-05, + "loss": 0.0739, + "step": 12602 + }, + { + "epoch": 115.62, + "learning_rate": 1.842507645259939e-05, + "loss": 0.0934, + "step": 12603 + }, + { + "epoch": 115.63, + "learning_rate": 1.83868501529052e-05, + "loss": 0.0991, + "step": 12604 + }, + { + "epoch": 115.64, + "learning_rate": 1.834862385321101e-05, + "loss": 0.1118, + "step": 12605 + }, + { + "epoch": 115.65, + "learning_rate": 1.831039755351682e-05, + "loss": 0.0655, + "step": 12606 + }, + { + "epoch": 115.66, + "learning_rate": 1.827217125382263e-05, + "loss": 0.1537, + "step": 12607 + }, + { + "epoch": 115.67, + "learning_rate": 1.823394495412844e-05, + "loss": 0.0897, + "step": 12608 + }, + { + "epoch": 115.68, + "learning_rate": 1.819571865443425e-05, + "loss": 0.0516, + "step": 12609 + }, + { + "epoch": 115.69, + "learning_rate": 1.8157492354740063e-05, + "loss": 0.0767, + "step": 12610 + }, + { + "epoch": 115.7, + "learning_rate": 1.8119266055045874e-05, + "loss": 0.0908, + "step": 12611 + }, + { + "epoch": 115.71, + "learning_rate": 1.8081039755351682e-05, + "loss": 0.0929, + "step": 12612 + }, + { + "epoch": 115.72, + "learning_rate": 1.8042813455657493e-05, + "loss": 0.0285, + "step": 12613 + }, + { + "epoch": 115.72, + "learning_rate": 1.8004587155963304e-05, + "loss": 0.0591, + "step": 12614 + }, + { + "epoch": 115.73, + "learning_rate": 1.796636085626911e-05, + "loss": 0.0719, + "step": 12615 + }, + { + "epoch": 115.74, + "learning_rate": 1.7928134556574923e-05, + "loss": 0.0289, + "step": 12616 + }, + { + "epoch": 115.75, + "learning_rate": 1.7889908256880734e-05, + "loss": 0.1091, + "step": 12617 + }, + { + "epoch": 115.76, + "learning_rate": 1.7851681957186545e-05, + "loss": 0.1162, + "step": 12618 + }, + { + "epoch": 115.77, + "learning_rate": 1.7813455657492356e-05, + "loss": 0.1099, + "step": 12619 + }, + { + "epoch": 115.78, + "learning_rate": 1.7775229357798167e-05, + "loss": 0.0478, + "step": 12620 + }, + { + "epoch": 115.79, + "learning_rate": 1.7737003058103975e-05, + "loss": 0.1074, + "step": 12621 + }, + { + "epoch": 115.8, + "learning_rate": 1.7698776758409786e-05, + "loss": 0.0839, + "step": 12622 + }, + { + "epoch": 115.81, + "learning_rate": 1.7660550458715597e-05, + "loss": 0.0604, + "step": 12623 + }, + { + "epoch": 115.82, + "learning_rate": 1.7622324159021404e-05, + "loss": 0.0761, + "step": 12624 + }, + { + "epoch": 115.83, + "learning_rate": 1.758409785932722e-05, + "loss": 0.0541, + "step": 12625 + }, + { + "epoch": 115.83, + "learning_rate": 1.754587155963303e-05, + "loss": 0.1221, + "step": 12626 + }, + { + "epoch": 115.84, + "learning_rate": 1.7507645259938838e-05, + "loss": 0.099, + "step": 12627 + }, + { + "epoch": 115.85, + "learning_rate": 1.746941896024465e-05, + "loss": 0.0431, + "step": 12628 + }, + { + "epoch": 115.86, + "learning_rate": 1.743119266055046e-05, + "loss": 0.0609, + "step": 12629 + }, + { + "epoch": 115.87, + "learning_rate": 1.7392966360856267e-05, + "loss": 0.0464, + "step": 12630 + }, + { + "epoch": 115.88, + "learning_rate": 1.735474006116208e-05, + "loss": 0.1192, + "step": 12631 + }, + { + "epoch": 115.89, + "learning_rate": 1.731651376146789e-05, + "loss": 0.0818, + "step": 12632 + }, + { + "epoch": 115.9, + "learning_rate": 1.72782874617737e-05, + "loss": 0.0886, + "step": 12633 + }, + { + "epoch": 115.91, + "learning_rate": 1.724006116207951e-05, + "loss": 0.099, + "step": 12634 + }, + { + "epoch": 115.92, + "learning_rate": 1.7201834862385323e-05, + "loss": 0.0556, + "step": 12635 + }, + { + "epoch": 115.93, + "learning_rate": 1.716360856269113e-05, + "loss": 0.0476, + "step": 12636 + }, + { + "epoch": 115.94, + "learning_rate": 1.712538226299694e-05, + "loss": 0.0718, + "step": 12637 + }, + { + "epoch": 115.94, + "learning_rate": 1.7087155963302753e-05, + "loss": 0.1165, + "step": 12638 + }, + { + "epoch": 115.95, + "learning_rate": 1.704892966360856e-05, + "loss": 0.074, + "step": 12639 + }, + { + "epoch": 115.96, + "learning_rate": 1.7010703363914375e-05, + "loss": 0.1273, + "step": 12640 + }, + { + "epoch": 115.97, + "learning_rate": 1.6972477064220186e-05, + "loss": 0.1123, + "step": 12641 + }, + { + "epoch": 115.98, + "learning_rate": 1.6934250764525993e-05, + "loss": 0.1016, + "step": 12642 + }, + { + "epoch": 115.99, + "learning_rate": 1.6896024464831804e-05, + "loss": 0.0672, + "step": 12643 + }, + { + "epoch": 116.0, + "learning_rate": 1.6857798165137616e-05, + "loss": 0.0922, + "step": 12644 + }, + { + "epoch": 116.01, + "learning_rate": 1.6819571865443423e-05, + "loss": 0.136, + "step": 12645 + }, + { + "epoch": 116.02, + "learning_rate": 1.6781345565749234e-05, + "loss": 0.1233, + "step": 12646 + }, + { + "epoch": 116.03, + "learning_rate": 1.674311926605505e-05, + "loss": 0.0797, + "step": 12647 + }, + { + "epoch": 116.04, + "learning_rate": 1.6704892966360856e-05, + "loss": 0.1553, + "step": 12648 + }, + { + "epoch": 116.05, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.0748, + "step": 12649 + }, + { + "epoch": 116.06, + "learning_rate": 1.662844036697248e-05, + "loss": 0.0715, + "step": 12650 + }, + { + "epoch": 116.06, + "learning_rate": 1.6590214067278286e-05, + "loss": 0.0724, + "step": 12651 + }, + { + "epoch": 116.07, + "learning_rate": 1.6551987767584097e-05, + "loss": 0.0711, + "step": 12652 + }, + { + "epoch": 116.08, + "learning_rate": 1.651376146788991e-05, + "loss": 0.0787, + "step": 12653 + }, + { + "epoch": 116.09, + "learning_rate": 1.6475535168195716e-05, + "loss": 0.0587, + "step": 12654 + }, + { + "epoch": 116.1, + "learning_rate": 1.643730886850153e-05, + "loss": 0.0704, + "step": 12655 + }, + { + "epoch": 116.11, + "learning_rate": 1.639908256880734e-05, + "loss": 0.0534, + "step": 12656 + }, + { + "epoch": 116.12, + "learning_rate": 1.636085626911315e-05, + "loss": 0.1022, + "step": 12657 + }, + { + "epoch": 116.13, + "learning_rate": 1.632262996941896e-05, + "loss": 0.1064, + "step": 12658 + }, + { + "epoch": 116.14, + "learning_rate": 1.628440366972477e-05, + "loss": 0.0746, + "step": 12659 + }, + { + "epoch": 116.15, + "learning_rate": 1.624617737003058e-05, + "loss": 0.0802, + "step": 12660 + }, + { + "epoch": 116.16, + "learning_rate": 1.620795107033639e-05, + "loss": 0.0972, + "step": 12661 + }, + { + "epoch": 116.17, + "learning_rate": 1.6169724770642205e-05, + "loss": 0.0671, + "step": 12662 + }, + { + "epoch": 116.17, + "learning_rate": 1.6131498470948012e-05, + "loss": 0.0851, + "step": 12663 + }, + { + "epoch": 116.18, + "learning_rate": 1.6093272171253823e-05, + "loss": 0.0964, + "step": 12664 + }, + { + "epoch": 116.19, + "learning_rate": 1.6055045871559634e-05, + "loss": 0.0691, + "step": 12665 + }, + { + "epoch": 116.2, + "learning_rate": 1.6016819571865442e-05, + "loss": 0.1287, + "step": 12666 + }, + { + "epoch": 116.21, + "learning_rate": 1.5978593272171253e-05, + "loss": 0.0968, + "step": 12667 + }, + { + "epoch": 116.22, + "learning_rate": 1.5940366972477064e-05, + "loss": 0.1021, + "step": 12668 + }, + { + "epoch": 116.23, + "learning_rate": 1.5902140672782875e-05, + "loss": 0.0877, + "step": 12669 + }, + { + "epoch": 116.24, + "learning_rate": 1.5863914373088686e-05, + "loss": 0.0385, + "step": 12670 + }, + { + "epoch": 116.25, + "learning_rate": 1.5825688073394497e-05, + "loss": 0.0425, + "step": 12671 + }, + { + "epoch": 116.26, + "learning_rate": 1.5787461773700305e-05, + "loss": 0.1641, + "step": 12672 + }, + { + "epoch": 116.27, + "learning_rate": 1.5749235474006116e-05, + "loss": 0.115, + "step": 12673 + }, + { + "epoch": 116.28, + "learning_rate": 1.5711009174311927e-05, + "loss": 0.0985, + "step": 12674 + }, + { + "epoch": 116.28, + "learning_rate": 1.5672782874617735e-05, + "loss": 0.1278, + "step": 12675 + }, + { + "epoch": 116.29, + "learning_rate": 1.5634556574923546e-05, + "loss": 0.0604, + "step": 12676 + }, + { + "epoch": 116.3, + "learning_rate": 1.559633027522936e-05, + "loss": 0.1152, + "step": 12677 + }, + { + "epoch": 116.31, + "learning_rate": 1.5558103975535168e-05, + "loss": 0.1321, + "step": 12678 + }, + { + "epoch": 116.32, + "learning_rate": 1.551987767584098e-05, + "loss": 0.0907, + "step": 12679 + }, + { + "epoch": 116.33, + "learning_rate": 1.548165137614679e-05, + "loss": 0.0623, + "step": 12680 + }, + { + "epoch": 116.34, + "learning_rate": 1.54434250764526e-05, + "loss": 0.0786, + "step": 12681 + }, + { + "epoch": 116.35, + "learning_rate": 1.540519877675841e-05, + "loss": 0.0644, + "step": 12682 + }, + { + "epoch": 116.36, + "learning_rate": 1.536697247706422e-05, + "loss": 0.0655, + "step": 12683 + }, + { + "epoch": 116.37, + "learning_rate": 1.532874617737003e-05, + "loss": 0.0964, + "step": 12684 + }, + { + "epoch": 116.38, + "learning_rate": 1.5290519877675842e-05, + "loss": 0.0788, + "step": 12685 + }, + { + "epoch": 116.39, + "learning_rate": 1.5252293577981653e-05, + "loss": 0.13, + "step": 12686 + }, + { + "epoch": 116.39, + "learning_rate": 1.5214067278287462e-05, + "loss": 0.083, + "step": 12687 + }, + { + "epoch": 116.4, + "learning_rate": 1.5175840978593272e-05, + "loss": 0.0699, + "step": 12688 + }, + { + "epoch": 116.41, + "learning_rate": 1.5137614678899083e-05, + "loss": 0.1139, + "step": 12689 + }, + { + "epoch": 116.42, + "learning_rate": 1.5099388379204894e-05, + "loss": 0.0565, + "step": 12690 + }, + { + "epoch": 116.43, + "learning_rate": 1.5061162079510703e-05, + "loss": 0.0785, + "step": 12691 + }, + { + "epoch": 116.44, + "learning_rate": 1.5022935779816514e-05, + "loss": 0.0577, + "step": 12692 + }, + { + "epoch": 116.45, + "learning_rate": 1.4984709480122325e-05, + "loss": 0.0782, + "step": 12693 + }, + { + "epoch": 116.46, + "learning_rate": 1.4946483180428135e-05, + "loss": 0.1213, + "step": 12694 + }, + { + "epoch": 116.47, + "learning_rate": 1.4908256880733946e-05, + "loss": 0.0608, + "step": 12695 + }, + { + "epoch": 116.48, + "learning_rate": 1.4870030581039755e-05, + "loss": 0.0515, + "step": 12696 + }, + { + "epoch": 116.49, + "learning_rate": 1.4831804281345566e-05, + "loss": 0.0571, + "step": 12697 + }, + { + "epoch": 116.5, + "learning_rate": 1.4793577981651377e-05, + "loss": 0.0421, + "step": 12698 + }, + { + "epoch": 116.5, + "learning_rate": 1.4755351681957187e-05, + "loss": 0.109, + "step": 12699 + }, + { + "epoch": 116.51, + "learning_rate": 1.4717125382262996e-05, + "loss": 0.138, + "step": 12700 + }, + { + "epoch": 116.52, + "learning_rate": 1.4678899082568809e-05, + "loss": 0.0833, + "step": 12701 + }, + { + "epoch": 116.53, + "learning_rate": 1.4640672782874618e-05, + "loss": 0.0685, + "step": 12702 + }, + { + "epoch": 116.54, + "learning_rate": 1.4602446483180428e-05, + "loss": 0.1376, + "step": 12703 + }, + { + "epoch": 116.55, + "learning_rate": 1.456422018348624e-05, + "loss": 0.1004, + "step": 12704 + }, + { + "epoch": 116.56, + "learning_rate": 1.452599388379205e-05, + "loss": 0.0915, + "step": 12705 + }, + { + "epoch": 116.57, + "learning_rate": 1.4487767584097859e-05, + "loss": 0.0668, + "step": 12706 + }, + { + "epoch": 116.58, + "learning_rate": 1.444954128440367e-05, + "loss": 0.1055, + "step": 12707 + }, + { + "epoch": 116.59, + "learning_rate": 1.4411314984709481e-05, + "loss": 0.0677, + "step": 12708 + }, + { + "epoch": 116.6, + "learning_rate": 1.437308868501529e-05, + "loss": 0.0992, + "step": 12709 + }, + { + "epoch": 116.61, + "learning_rate": 1.4334862385321102e-05, + "loss": 0.0618, + "step": 12710 + }, + { + "epoch": 116.61, + "learning_rate": 1.4296636085626911e-05, + "loss": 0.0965, + "step": 12711 + }, + { + "epoch": 116.62, + "learning_rate": 1.4258409785932722e-05, + "loss": 0.0212, + "step": 12712 + }, + { + "epoch": 116.63, + "learning_rate": 1.4220183486238533e-05, + "loss": 0.1372, + "step": 12713 + }, + { + "epoch": 116.64, + "learning_rate": 1.4181957186544343e-05, + "loss": 0.0365, + "step": 12714 + }, + { + "epoch": 116.65, + "learning_rate": 1.4143730886850154e-05, + "loss": 0.0879, + "step": 12715 + }, + { + "epoch": 116.66, + "learning_rate": 1.4105504587155965e-05, + "loss": 0.096, + "step": 12716 + }, + { + "epoch": 116.67, + "learning_rate": 1.4067278287461774e-05, + "loss": 0.1085, + "step": 12717 + }, + { + "epoch": 116.68, + "learning_rate": 1.4029051987767583e-05, + "loss": 0.0803, + "step": 12718 + }, + { + "epoch": 116.69, + "learning_rate": 1.3990825688073396e-05, + "loss": 0.0967, + "step": 12719 + }, + { + "epoch": 116.7, + "learning_rate": 1.3952599388379206e-05, + "loss": 0.0919, + "step": 12720 + }, + { + "epoch": 116.71, + "learning_rate": 1.3914373088685015e-05, + "loss": 0.0623, + "step": 12721 + }, + { + "epoch": 116.72, + "learning_rate": 1.3876146788990826e-05, + "loss": 0.0482, + "step": 12722 + }, + { + "epoch": 116.72, + "learning_rate": 1.3837920489296637e-05, + "loss": 0.0524, + "step": 12723 + }, + { + "epoch": 116.73, + "learning_rate": 1.3799694189602446e-05, + "loss": 0.1076, + "step": 12724 + }, + { + "epoch": 116.74, + "learning_rate": 1.3761467889908258e-05, + "loss": 0.0682, + "step": 12725 + }, + { + "epoch": 116.75, + "learning_rate": 1.3723241590214069e-05, + "loss": 0.1372, + "step": 12726 + }, + { + "epoch": 116.76, + "learning_rate": 1.3685015290519878e-05, + "loss": 0.1018, + "step": 12727 + }, + { + "epoch": 116.77, + "learning_rate": 1.3646788990825689e-05, + "loss": 0.1399, + "step": 12728 + }, + { + "epoch": 116.78, + "learning_rate": 1.3608562691131498e-05, + "loss": 0.0652, + "step": 12729 + }, + { + "epoch": 116.79, + "learning_rate": 1.357033639143731e-05, + "loss": 0.0827, + "step": 12730 + }, + { + "epoch": 116.8, + "learning_rate": 1.353211009174312e-05, + "loss": 0.0919, + "step": 12731 + }, + { + "epoch": 116.81, + "learning_rate": 1.349388379204893e-05, + "loss": 0.0833, + "step": 12732 + }, + { + "epoch": 116.82, + "learning_rate": 1.345565749235474e-05, + "loss": 0.0818, + "step": 12733 + }, + { + "epoch": 116.83, + "learning_rate": 1.3417431192660552e-05, + "loss": 0.1084, + "step": 12734 + }, + { + "epoch": 116.83, + "learning_rate": 1.3379204892966361e-05, + "loss": 0.0712, + "step": 12735 + }, + { + "epoch": 116.84, + "learning_rate": 1.334097859327217e-05, + "loss": 0.0977, + "step": 12736 + }, + { + "epoch": 116.85, + "learning_rate": 1.3302752293577984e-05, + "loss": 0.0766, + "step": 12737 + }, + { + "epoch": 116.86, + "learning_rate": 1.3264525993883793e-05, + "loss": 0.0824, + "step": 12738 + }, + { + "epoch": 116.87, + "learning_rate": 1.3226299694189602e-05, + "loss": 0.0724, + "step": 12739 + }, + { + "epoch": 116.88, + "learning_rate": 1.3188073394495413e-05, + "loss": 0.0758, + "step": 12740 + }, + { + "epoch": 116.89, + "learning_rate": 1.3149847094801224e-05, + "loss": 0.0559, + "step": 12741 + }, + { + "epoch": 116.9, + "learning_rate": 1.3111620795107034e-05, + "loss": 0.038, + "step": 12742 + }, + { + "epoch": 116.91, + "learning_rate": 1.3073394495412845e-05, + "loss": 0.0525, + "step": 12743 + }, + { + "epoch": 116.92, + "learning_rate": 1.3035168195718654e-05, + "loss": 0.1185, + "step": 12744 + }, + { + "epoch": 116.93, + "learning_rate": 1.2996941896024465e-05, + "loss": 0.1011, + "step": 12745 + }, + { + "epoch": 116.94, + "learning_rate": 1.2958715596330276e-05, + "loss": 0.045, + "step": 12746 + }, + { + "epoch": 116.94, + "learning_rate": 1.2920489296636086e-05, + "loss": 0.0686, + "step": 12747 + }, + { + "epoch": 116.95, + "learning_rate": 1.2882262996941895e-05, + "loss": 0.0831, + "step": 12748 + }, + { + "epoch": 116.96, + "learning_rate": 1.2844036697247708e-05, + "loss": 0.0999, + "step": 12749 + }, + { + "epoch": 116.97, + "learning_rate": 1.2805810397553517e-05, + "loss": 0.0561, + "step": 12750 + }, + { + "epoch": 116.98, + "learning_rate": 1.2767584097859327e-05, + "loss": 0.0541, + "step": 12751 + }, + { + "epoch": 116.99, + "learning_rate": 1.272935779816514e-05, + "loss": 0.0299, + "step": 12752 + }, + { + "epoch": 117.0, + "learning_rate": 1.2691131498470949e-05, + "loss": 0.0805, + "step": 12753 + }, + { + "epoch": 117.01, + "learning_rate": 1.2652905198776758e-05, + "loss": 0.1076, + "step": 12754 + }, + { + "epoch": 117.02, + "learning_rate": 1.2614678899082569e-05, + "loss": 0.0879, + "step": 12755 + }, + { + "epoch": 117.03, + "learning_rate": 1.257645259938838e-05, + "loss": 0.0783, + "step": 12756 + }, + { + "epoch": 117.04, + "learning_rate": 1.253822629969419e-05, + "loss": 0.0625, + "step": 12757 + }, + { + "epoch": 117.05, + "learning_rate": 1.25e-05, + "loss": 0.0858, + "step": 12758 + }, + { + "epoch": 117.06, + "learning_rate": 1.246177370030581e-05, + "loss": 0.1333, + "step": 12759 + }, + { + "epoch": 117.06, + "learning_rate": 1.2423547400611621e-05, + "loss": 0.0948, + "step": 12760 + }, + { + "epoch": 117.07, + "learning_rate": 1.2385321100917432e-05, + "loss": 0.0759, + "step": 12761 + }, + { + "epoch": 117.08, + "learning_rate": 1.2347094801223241e-05, + "loss": 0.0978, + "step": 12762 + }, + { + "epoch": 117.09, + "learning_rate": 1.2308868501529053e-05, + "loss": 0.1331, + "step": 12763 + }, + { + "epoch": 117.1, + "learning_rate": 1.2270642201834864e-05, + "loss": 0.0754, + "step": 12764 + }, + { + "epoch": 117.11, + "learning_rate": 1.2232415902140673e-05, + "loss": 0.0706, + "step": 12765 + }, + { + "epoch": 117.12, + "learning_rate": 1.2194189602446482e-05, + "loss": 0.1087, + "step": 12766 + }, + { + "epoch": 117.13, + "learning_rate": 1.2155963302752295e-05, + "loss": 0.0646, + "step": 12767 + }, + { + "epoch": 117.14, + "learning_rate": 1.2117737003058104e-05, + "loss": 0.0838, + "step": 12768 + }, + { + "epoch": 117.15, + "learning_rate": 1.2079510703363914e-05, + "loss": 0.1438, + "step": 12769 + }, + { + "epoch": 117.16, + "learning_rate": 1.2041284403669725e-05, + "loss": 0.0651, + "step": 12770 + }, + { + "epoch": 117.17, + "learning_rate": 1.2003058103975536e-05, + "loss": 0.1033, + "step": 12771 + }, + { + "epoch": 117.17, + "learning_rate": 1.1964831804281345e-05, + "loss": 0.1231, + "step": 12772 + }, + { + "epoch": 117.18, + "learning_rate": 1.1926605504587156e-05, + "loss": 0.09, + "step": 12773 + }, + { + "epoch": 117.19, + "learning_rate": 1.1888379204892967e-05, + "loss": 0.0588, + "step": 12774 + }, + { + "epoch": 117.2, + "learning_rate": 1.1850152905198777e-05, + "loss": 0.1278, + "step": 12775 + }, + { + "epoch": 117.21, + "learning_rate": 1.1811926605504588e-05, + "loss": 0.0324, + "step": 12776 + }, + { + "epoch": 117.22, + "learning_rate": 1.1773700305810397e-05, + "loss": 0.0735, + "step": 12777 + }, + { + "epoch": 117.23, + "learning_rate": 1.1735474006116208e-05, + "loss": 0.13, + "step": 12778 + }, + { + "epoch": 117.24, + "learning_rate": 1.169724770642202e-05, + "loss": 0.0992, + "step": 12779 + }, + { + "epoch": 117.25, + "learning_rate": 1.1659021406727829e-05, + "loss": 0.0153, + "step": 12780 + }, + { + "epoch": 117.26, + "learning_rate": 1.1620795107033638e-05, + "loss": 0.1697, + "step": 12781 + }, + { + "epoch": 117.27, + "learning_rate": 1.1582568807339451e-05, + "loss": 0.0935, + "step": 12782 + }, + { + "epoch": 117.28, + "learning_rate": 1.154434250764526e-05, + "loss": 0.0788, + "step": 12783 + }, + { + "epoch": 117.28, + "learning_rate": 1.150611620795107e-05, + "loss": 0.1064, + "step": 12784 + }, + { + "epoch": 117.29, + "learning_rate": 1.1467889908256882e-05, + "loss": 0.1112, + "step": 12785 + }, + { + "epoch": 117.3, + "learning_rate": 1.1429663608562692e-05, + "loss": 0.1042, + "step": 12786 + }, + { + "epoch": 117.31, + "learning_rate": 1.1391437308868501e-05, + "loss": 0.0636, + "step": 12787 + }, + { + "epoch": 117.32, + "learning_rate": 1.1353211009174312e-05, + "loss": 0.0847, + "step": 12788 + }, + { + "epoch": 117.33, + "learning_rate": 1.1314984709480123e-05, + "loss": 0.1119, + "step": 12789 + }, + { + "epoch": 117.34, + "learning_rate": 1.1276758409785933e-05, + "loss": 0.0956, + "step": 12790 + }, + { + "epoch": 117.35, + "learning_rate": 1.1238532110091744e-05, + "loss": 0.0796, + "step": 12791 + }, + { + "epoch": 117.36, + "learning_rate": 1.1200305810397553e-05, + "loss": 0.0699, + "step": 12792 + }, + { + "epoch": 117.37, + "learning_rate": 1.1162079510703364e-05, + "loss": 0.0687, + "step": 12793 + }, + { + "epoch": 117.38, + "learning_rate": 1.1123853211009175e-05, + "loss": 0.0704, + "step": 12794 + }, + { + "epoch": 117.39, + "learning_rate": 1.1085626911314985e-05, + "loss": 0.0601, + "step": 12795 + }, + { + "epoch": 117.39, + "learning_rate": 1.1047400611620796e-05, + "loss": 0.0587, + "step": 12796 + }, + { + "epoch": 117.4, + "learning_rate": 1.1009174311926607e-05, + "loss": 0.0502, + "step": 12797 + }, + { + "epoch": 117.41, + "learning_rate": 1.0970948012232416e-05, + "loss": 0.0955, + "step": 12798 + }, + { + "epoch": 117.42, + "learning_rate": 1.0932721712538225e-05, + "loss": 0.1074, + "step": 12799 + }, + { + "epoch": 117.43, + "learning_rate": 1.0894495412844038e-05, + "loss": 0.0912, + "step": 12800 + }, + { + "epoch": 117.44, + "learning_rate": 1.0856269113149848e-05, + "loss": 0.0716, + "step": 12801 + }, + { + "epoch": 117.45, + "learning_rate": 1.0818042813455657e-05, + "loss": 0.0292, + "step": 12802 + }, + { + "epoch": 117.46, + "learning_rate": 1.0779816513761468e-05, + "loss": 0.0392, + "step": 12803 + }, + { + "epoch": 117.47, + "learning_rate": 1.0741590214067279e-05, + "loss": 0.0661, + "step": 12804 + }, + { + "epoch": 117.48, + "learning_rate": 1.0703363914373088e-05, + "loss": 0.0421, + "step": 12805 + }, + { + "epoch": 117.49, + "learning_rate": 1.06651376146789e-05, + "loss": 0.0701, + "step": 12806 + }, + { + "epoch": 117.5, + "learning_rate": 1.062691131498471e-05, + "loss": 0.0272, + "step": 12807 + }, + { + "epoch": 117.5, + "learning_rate": 1.058868501529052e-05, + "loss": 0.1286, + "step": 12808 + }, + { + "epoch": 117.51, + "learning_rate": 1.0550458715596331e-05, + "loss": 0.0922, + "step": 12809 + }, + { + "epoch": 117.52, + "learning_rate": 1.051223241590214e-05, + "loss": 0.1187, + "step": 12810 + }, + { + "epoch": 117.53, + "learning_rate": 1.0474006116207951e-05, + "loss": 0.1281, + "step": 12811 + }, + { + "epoch": 117.54, + "learning_rate": 1.0435779816513762e-05, + "loss": 0.1594, + "step": 12812 + }, + { + "epoch": 117.55, + "learning_rate": 1.0397553516819572e-05, + "loss": 0.093, + "step": 12813 + }, + { + "epoch": 117.56, + "learning_rate": 1.0359327217125381e-05, + "loss": 0.1401, + "step": 12814 + }, + { + "epoch": 117.57, + "learning_rate": 1.0321100917431194e-05, + "loss": 0.074, + "step": 12815 + }, + { + "epoch": 117.58, + "learning_rate": 1.0282874617737003e-05, + "loss": 0.0885, + "step": 12816 + }, + { + "epoch": 117.59, + "learning_rate": 1.0244648318042813e-05, + "loss": 0.0818, + "step": 12817 + }, + { + "epoch": 117.6, + "learning_rate": 1.0206422018348625e-05, + "loss": 0.0798, + "step": 12818 + }, + { + "epoch": 117.61, + "learning_rate": 1.0168195718654435e-05, + "loss": 0.0377, + "step": 12819 + }, + { + "epoch": 117.61, + "learning_rate": 1.0129969418960244e-05, + "loss": 0.0575, + "step": 12820 + }, + { + "epoch": 117.62, + "learning_rate": 1.0091743119266055e-05, + "loss": 0.0954, + "step": 12821 + }, + { + "epoch": 117.63, + "learning_rate": 1.0053516819571866e-05, + "loss": 0.0845, + "step": 12822 + }, + { + "epoch": 117.64, + "learning_rate": 1.0015290519877676e-05, + "loss": 0.0616, + "step": 12823 + }, + { + "epoch": 117.65, + "learning_rate": 9.977064220183487e-06, + "loss": 0.0917, + "step": 12824 + }, + { + "epoch": 117.66, + "learning_rate": 9.938837920489296e-06, + "loss": 0.0772, + "step": 12825 + }, + { + "epoch": 117.67, + "learning_rate": 9.900611620795107e-06, + "loss": 0.0693, + "step": 12826 + }, + { + "epoch": 117.68, + "learning_rate": 9.862385321100918e-06, + "loss": 0.0445, + "step": 12827 + }, + { + "epoch": 117.69, + "learning_rate": 9.824159021406728e-06, + "loss": 0.0548, + "step": 12828 + }, + { + "epoch": 117.7, + "learning_rate": 9.785932721712537e-06, + "loss": 0.097, + "step": 12829 + }, + { + "epoch": 117.71, + "learning_rate": 9.74770642201835e-06, + "loss": 0.0884, + "step": 12830 + }, + { + "epoch": 117.72, + "learning_rate": 9.709480122324159e-06, + "loss": 0.0395, + "step": 12831 + }, + { + "epoch": 117.72, + "learning_rate": 9.671253822629969e-06, + "loss": 0.0365, + "step": 12832 + }, + { + "epoch": 117.73, + "learning_rate": 9.633027522935781e-06, + "loss": 0.0208, + "step": 12833 + }, + { + "epoch": 117.74, + "learning_rate": 9.59480122324159e-06, + "loss": 0.0822, + "step": 12834 + }, + { + "epoch": 117.75, + "learning_rate": 9.5565749235474e-06, + "loss": 0.1197, + "step": 12835 + }, + { + "epoch": 117.76, + "learning_rate": 9.518348623853211e-06, + "loss": 0.108, + "step": 12836 + }, + { + "epoch": 117.77, + "learning_rate": 9.480122324159022e-06, + "loss": 0.0714, + "step": 12837 + }, + { + "epoch": 117.78, + "learning_rate": 9.441896024464832e-06, + "loss": 0.0936, + "step": 12838 + }, + { + "epoch": 117.79, + "learning_rate": 9.403669724770643e-06, + "loss": 0.0858, + "step": 12839 + }, + { + "epoch": 117.8, + "learning_rate": 9.365443425076452e-06, + "loss": 0.0995, + "step": 12840 + }, + { + "epoch": 117.81, + "learning_rate": 9.327217125382263e-06, + "loss": 0.0994, + "step": 12841 + }, + { + "epoch": 117.82, + "learning_rate": 9.288990825688074e-06, + "loss": 0.0461, + "step": 12842 + }, + { + "epoch": 117.83, + "learning_rate": 9.250764525993883e-06, + "loss": 0.0631, + "step": 12843 + }, + { + "epoch": 117.83, + "learning_rate": 9.212538226299695e-06, + "loss": 0.0922, + "step": 12844 + }, + { + "epoch": 117.84, + "learning_rate": 9.174311926605506e-06, + "loss": 0.0548, + "step": 12845 + }, + { + "epoch": 117.85, + "learning_rate": 9.136085626911315e-06, + "loss": 0.1024, + "step": 12846 + }, + { + "epoch": 117.86, + "learning_rate": 9.097859327217124e-06, + "loss": 0.0951, + "step": 12847 + }, + { + "epoch": 117.87, + "learning_rate": 9.059633027522937e-06, + "loss": 0.0716, + "step": 12848 + }, + { + "epoch": 117.88, + "learning_rate": 9.021406727828746e-06, + "loss": 0.0778, + "step": 12849 + }, + { + "epoch": 117.89, + "learning_rate": 8.983180428134556e-06, + "loss": 0.0654, + "step": 12850 + }, + { + "epoch": 117.9, + "learning_rate": 8.944954128440367e-06, + "loss": 0.0763, + "step": 12851 + }, + { + "epoch": 117.91, + "learning_rate": 8.906727828746178e-06, + "loss": 0.0757, + "step": 12852 + }, + { + "epoch": 117.92, + "learning_rate": 8.868501529051987e-06, + "loss": 0.1096, + "step": 12853 + }, + { + "epoch": 117.93, + "learning_rate": 8.830275229357798e-06, + "loss": 0.1338, + "step": 12854 + }, + { + "epoch": 117.94, + "learning_rate": 8.79204892966361e-06, + "loss": 0.1204, + "step": 12855 + }, + { + "epoch": 117.94, + "learning_rate": 8.753822629969419e-06, + "loss": 0.0541, + "step": 12856 + }, + { + "epoch": 117.95, + "learning_rate": 8.71559633027523e-06, + "loss": 0.0732, + "step": 12857 + }, + { + "epoch": 117.96, + "learning_rate": 8.67737003058104e-06, + "loss": 0.0873, + "step": 12858 + }, + { + "epoch": 117.97, + "learning_rate": 8.63914373088685e-06, + "loss": 0.0567, + "step": 12859 + }, + { + "epoch": 117.98, + "learning_rate": 8.600917431192661e-06, + "loss": 0.0665, + "step": 12860 + }, + { + "epoch": 117.99, + "learning_rate": 8.56269113149847e-06, + "loss": 0.0373, + "step": 12861 + }, + { + "epoch": 118.0, + "learning_rate": 8.52446483180428e-06, + "loss": 0.0514, + "step": 12862 + }, + { + "epoch": 118.01, + "learning_rate": 8.486238532110093e-06, + "loss": 0.1293, + "step": 12863 + }, + { + "epoch": 118.02, + "learning_rate": 8.448012232415902e-06, + "loss": 0.0985, + "step": 12864 + }, + { + "epoch": 118.03, + "learning_rate": 8.409785932721712e-06, + "loss": 0.0767, + "step": 12865 + }, + { + "epoch": 118.04, + "learning_rate": 8.371559633027524e-06, + "loss": 0.0528, + "step": 12866 + }, + { + "epoch": 118.05, + "learning_rate": 8.333333333333334e-06, + "loss": 0.0939, + "step": 12867 + }, + { + "epoch": 118.06, + "learning_rate": 8.295107033639143e-06, + "loss": 0.1076, + "step": 12868 + }, + { + "epoch": 118.06, + "learning_rate": 8.256880733944954e-06, + "loss": 0.0867, + "step": 12869 + }, + { + "epoch": 118.07, + "learning_rate": 8.218654434250765e-06, + "loss": 0.0967, + "step": 12870 + }, + { + "epoch": 118.08, + "learning_rate": 8.180428134556575e-06, + "loss": 0.1486, + "step": 12871 + }, + { + "epoch": 118.09, + "learning_rate": 8.142201834862386e-06, + "loss": 0.099, + "step": 12872 + }, + { + "epoch": 118.1, + "learning_rate": 8.103975535168195e-06, + "loss": 0.0876, + "step": 12873 + }, + { + "epoch": 118.11, + "learning_rate": 8.065749235474006e-06, + "loss": 0.0592, + "step": 12874 + }, + { + "epoch": 118.12, + "learning_rate": 8.027522935779817e-06, + "loss": 0.0648, + "step": 12875 + }, + { + "epoch": 118.13, + "learning_rate": 7.989296636085627e-06, + "loss": 0.0874, + "step": 12876 + }, + { + "epoch": 118.14, + "learning_rate": 7.951070336391438e-06, + "loss": 0.0443, + "step": 12877 + }, + { + "epoch": 118.15, + "learning_rate": 7.912844036697249e-06, + "loss": 0.1308, + "step": 12878 + }, + { + "epoch": 118.16, + "learning_rate": 7.874617737003058e-06, + "loss": 0.1354, + "step": 12879 + }, + { + "epoch": 118.17, + "learning_rate": 7.836391437308867e-06, + "loss": 0.0897, + "step": 12880 + }, + { + "epoch": 118.17, + "learning_rate": 7.79816513761468e-06, + "loss": 0.0802, + "step": 12881 + }, + { + "epoch": 118.18, + "learning_rate": 7.75993883792049e-06, + "loss": 0.0882, + "step": 12882 + }, + { + "epoch": 118.19, + "learning_rate": 7.7217125382263e-06, + "loss": 0.0885, + "step": 12883 + }, + { + "epoch": 118.2, + "learning_rate": 7.68348623853211e-06, + "loss": 0.0811, + "step": 12884 + }, + { + "epoch": 118.21, + "learning_rate": 7.645259938837921e-06, + "loss": 0.0725, + "step": 12885 + }, + { + "epoch": 118.22, + "learning_rate": 7.607033639143731e-06, + "loss": 0.0995, + "step": 12886 + }, + { + "epoch": 118.23, + "learning_rate": 7.5688073394495415e-06, + "loss": 0.0844, + "step": 12887 + }, + { + "epoch": 118.24, + "learning_rate": 7.530581039755352e-06, + "loss": 0.0749, + "step": 12888 + }, + { + "epoch": 118.25, + "learning_rate": 7.492354740061163e-06, + "loss": 0.0189, + "step": 12889 + }, + { + "epoch": 118.26, + "learning_rate": 7.454128440366973e-06, + "loss": 0.1206, + "step": 12890 + }, + { + "epoch": 118.27, + "learning_rate": 7.415902140672783e-06, + "loss": 0.1148, + "step": 12891 + }, + { + "epoch": 118.28, + "learning_rate": 7.377675840978593e-06, + "loss": 0.0663, + "step": 12892 + }, + { + "epoch": 118.28, + "learning_rate": 7.3394495412844045e-06, + "loss": 0.1026, + "step": 12893 + }, + { + "epoch": 118.29, + "learning_rate": 7.301223241590214e-06, + "loss": 0.1125, + "step": 12894 + }, + { + "epoch": 118.3, + "learning_rate": 7.262996941896025e-06, + "loss": 0.0863, + "step": 12895 + }, + { + "epoch": 118.31, + "learning_rate": 7.224770642201835e-06, + "loss": 0.1192, + "step": 12896 + }, + { + "epoch": 118.32, + "learning_rate": 7.186544342507645e-06, + "loss": 0.0927, + "step": 12897 + }, + { + "epoch": 118.33, + "learning_rate": 7.1483180428134555e-06, + "loss": 0.0552, + "step": 12898 + }, + { + "epoch": 118.34, + "learning_rate": 7.110091743119267e-06, + "loss": 0.074, + "step": 12899 + }, + { + "epoch": 118.35, + "learning_rate": 7.071865443425077e-06, + "loss": 0.0815, + "step": 12900 + }, + { + "epoch": 118.36, + "learning_rate": 7.033639143730887e-06, + "loss": 0.1183, + "step": 12901 + }, + { + "epoch": 118.37, + "learning_rate": 6.995412844036698e-06, + "loss": 0.0769, + "step": 12902 + }, + { + "epoch": 118.38, + "learning_rate": 6.9571865443425075e-06, + "loss": 0.0865, + "step": 12903 + }, + { + "epoch": 118.39, + "learning_rate": 6.9189602446483185e-06, + "loss": 0.104, + "step": 12904 + }, + { + "epoch": 118.39, + "learning_rate": 6.880733944954129e-06, + "loss": 0.0679, + "step": 12905 + }, + { + "epoch": 118.4, + "learning_rate": 6.842507645259939e-06, + "loss": 0.0438, + "step": 12906 + }, + { + "epoch": 118.41, + "learning_rate": 6.804281345565749e-06, + "loss": 0.0714, + "step": 12907 + }, + { + "epoch": 118.42, + "learning_rate": 6.76605504587156e-06, + "loss": 0.0561, + "step": 12908 + }, + { + "epoch": 118.43, + "learning_rate": 6.72782874617737e-06, + "loss": 0.1088, + "step": 12909 + }, + { + "epoch": 118.44, + "learning_rate": 6.689602446483181e-06, + "loss": 0.0463, + "step": 12910 + }, + { + "epoch": 118.45, + "learning_rate": 6.651376146788992e-06, + "loss": 0.045, + "step": 12911 + }, + { + "epoch": 118.46, + "learning_rate": 6.613149847094801e-06, + "loss": 0.0875, + "step": 12912 + }, + { + "epoch": 118.47, + "learning_rate": 6.574923547400612e-06, + "loss": 0.0737, + "step": 12913 + }, + { + "epoch": 118.48, + "learning_rate": 6.536697247706422e-06, + "loss": 0.0853, + "step": 12914 + }, + { + "epoch": 118.49, + "learning_rate": 6.498470948012233e-06, + "loss": 0.0612, + "step": 12915 + }, + { + "epoch": 118.5, + "learning_rate": 6.460244648318043e-06, + "loss": 0.0831, + "step": 12916 + }, + { + "epoch": 118.5, + "learning_rate": 6.422018348623854e-06, + "loss": 0.12, + "step": 12917 + }, + { + "epoch": 118.51, + "learning_rate": 6.383792048929663e-06, + "loss": 0.0835, + "step": 12918 + }, + { + "epoch": 118.52, + "learning_rate": 6.345565749235474e-06, + "loss": 0.1016, + "step": 12919 + }, + { + "epoch": 118.53, + "learning_rate": 6.3073394495412846e-06, + "loss": 0.0721, + "step": 12920 + }, + { + "epoch": 118.54, + "learning_rate": 6.269113149847095e-06, + "loss": 0.0819, + "step": 12921 + }, + { + "epoch": 118.55, + "learning_rate": 6.230886850152905e-06, + "loss": 0.0994, + "step": 12922 + }, + { + "epoch": 118.56, + "learning_rate": 6.192660550458716e-06, + "loss": 0.0755, + "step": 12923 + }, + { + "epoch": 118.57, + "learning_rate": 6.154434250764526e-06, + "loss": 0.0772, + "step": 12924 + }, + { + "epoch": 118.58, + "learning_rate": 6.1162079510703365e-06, + "loss": 0.1298, + "step": 12925 + }, + { + "epoch": 118.59, + "learning_rate": 6.0779816513761475e-06, + "loss": 0.0648, + "step": 12926 + }, + { + "epoch": 118.6, + "learning_rate": 6.039755351681957e-06, + "loss": 0.064, + "step": 12927 + }, + { + "epoch": 118.61, + "learning_rate": 6.001529051987768e-06, + "loss": 0.0649, + "step": 12928 + }, + { + "epoch": 118.61, + "learning_rate": 5.963302752293578e-06, + "loss": 0.1085, + "step": 12929 + }, + { + "epoch": 118.62, + "learning_rate": 5.925076452599388e-06, + "loss": 0.1195, + "step": 12930 + }, + { + "epoch": 118.63, + "learning_rate": 5.886850152905199e-06, + "loss": 0.109, + "step": 12931 + }, + { + "epoch": 118.64, + "learning_rate": 5.84862385321101e-06, + "loss": 0.0717, + "step": 12932 + }, + { + "epoch": 118.65, + "learning_rate": 5.810397553516819e-06, + "loss": 0.0506, + "step": 12933 + }, + { + "epoch": 118.66, + "learning_rate": 5.77217125382263e-06, + "loss": 0.0644, + "step": 12934 + }, + { + "epoch": 118.67, + "learning_rate": 5.733944954128441e-06, + "loss": 0.0892, + "step": 12935 + }, + { + "epoch": 118.68, + "learning_rate": 5.6957186544342506e-06, + "loss": 0.0866, + "step": 12936 + }, + { + "epoch": 118.69, + "learning_rate": 5.657492354740062e-06, + "loss": 0.0534, + "step": 12937 + }, + { + "epoch": 118.7, + "learning_rate": 5.619266055045872e-06, + "loss": 0.0616, + "step": 12938 + }, + { + "epoch": 118.71, + "learning_rate": 5.581039755351682e-06, + "loss": 0.0396, + "step": 12939 + }, + { + "epoch": 118.72, + "learning_rate": 5.542813455657492e-06, + "loss": 0.1091, + "step": 12940 + }, + { + "epoch": 118.72, + "learning_rate": 5.504587155963303e-06, + "loss": 0.0332, + "step": 12941 + }, + { + "epoch": 118.73, + "learning_rate": 5.466360856269113e-06, + "loss": 0.0391, + "step": 12942 + }, + { + "epoch": 118.74, + "learning_rate": 5.428134556574924e-06, + "loss": 0.0654, + "step": 12943 + }, + { + "epoch": 118.75, + "learning_rate": 5.389908256880734e-06, + "loss": 0.1624, + "step": 12944 + }, + { + "epoch": 118.76, + "learning_rate": 5.351681957186544e-06, + "loss": 0.1525, + "step": 12945 + }, + { + "epoch": 118.77, + "learning_rate": 5.313455657492355e-06, + "loss": 0.0871, + "step": 12946 + }, + { + "epoch": 118.78, + "learning_rate": 5.2752293577981655e-06, + "loss": 0.0983, + "step": 12947 + }, + { + "epoch": 118.79, + "learning_rate": 5.237003058103976e-06, + "loss": 0.1237, + "step": 12948 + }, + { + "epoch": 118.8, + "learning_rate": 5.198776758409786e-06, + "loss": 0.0913, + "step": 12949 + }, + { + "epoch": 118.81, + "learning_rate": 5.160550458715597e-06, + "loss": 0.0492, + "step": 12950 + }, + { + "epoch": 118.82, + "learning_rate": 5.122324159021406e-06, + "loss": 0.0922, + "step": 12951 + }, + { + "epoch": 118.83, + "learning_rate": 5.084097859327217e-06, + "loss": 0.0679, + "step": 12952 + }, + { + "epoch": 118.83, + "learning_rate": 5.045871559633028e-06, + "loss": 0.0952, + "step": 12953 + }, + { + "epoch": 118.84, + "learning_rate": 5.007645259938838e-06, + "loss": 0.0671, + "step": 12954 + }, + { + "epoch": 118.85, + "learning_rate": 4.969418960244648e-06, + "loss": 0.1037, + "step": 12955 + }, + { + "epoch": 118.86, + "learning_rate": 4.931192660550459e-06, + "loss": 0.0687, + "step": 12956 + }, + { + "epoch": 118.87, + "learning_rate": 4.8929663608562685e-06, + "loss": 0.0276, + "step": 12957 + }, + { + "epoch": 118.88, + "learning_rate": 4.8547400611620796e-06, + "loss": 0.1105, + "step": 12958 + }, + { + "epoch": 118.89, + "learning_rate": 4.816513761467891e-06, + "loss": 0.078, + "step": 12959 + }, + { + "epoch": 118.9, + "learning_rate": 4.7782874617737e-06, + "loss": 0.0922, + "step": 12960 + }, + { + "epoch": 118.91, + "learning_rate": 4.740061162079511e-06, + "loss": 0.0654, + "step": 12961 + }, + { + "epoch": 118.92, + "learning_rate": 4.701834862385321e-06, + "loss": 0.0897, + "step": 12962 + }, + { + "epoch": 118.93, + "learning_rate": 4.6636085626911315e-06, + "loss": 0.0623, + "step": 12963 + }, + { + "epoch": 118.94, + "learning_rate": 4.625382262996942e-06, + "loss": 0.045, + "step": 12964 + }, + { + "epoch": 118.94, + "learning_rate": 4.587155963302753e-06, + "loss": 0.087, + "step": 12965 + }, + { + "epoch": 118.95, + "learning_rate": 4.548929663608562e-06, + "loss": 0.0729, + "step": 12966 + }, + { + "epoch": 118.96, + "learning_rate": 4.510703363914373e-06, + "loss": 0.0956, + "step": 12967 + }, + { + "epoch": 118.97, + "learning_rate": 4.4724770642201834e-06, + "loss": 0.0518, + "step": 12968 + }, + { + "epoch": 118.98, + "learning_rate": 4.434250764525994e-06, + "loss": 0.0556, + "step": 12969 + }, + { + "epoch": 118.99, + "learning_rate": 4.396024464831805e-06, + "loss": 0.0475, + "step": 12970 + }, + { + "epoch": 119.0, + "learning_rate": 4.357798165137615e-06, + "loss": 0.0238, + "step": 12971 + }, + { + "epoch": 119.01, + "learning_rate": 4.319571865443425e-06, + "loss": 0.1304, + "step": 12972 + }, + { + "epoch": 119.02, + "learning_rate": 4.281345565749235e-06, + "loss": 0.0814, + "step": 12973 + }, + { + "epoch": 119.03, + "learning_rate": 4.2431192660550464e-06, + "loss": 0.1046, + "step": 12974 + }, + { + "epoch": 119.04, + "learning_rate": 4.204892966360856e-06, + "loss": 0.061, + "step": 12975 + }, + { + "epoch": 119.05, + "learning_rate": 4.166666666666667e-06, + "loss": 0.0438, + "step": 12976 + }, + { + "epoch": 119.06, + "learning_rate": 4.128440366972477e-06, + "loss": 0.0931, + "step": 12977 + }, + { + "epoch": 119.06, + "learning_rate": 4.090214067278287e-06, + "loss": 0.0853, + "step": 12978 + }, + { + "epoch": 119.07, + "learning_rate": 4.0519877675840975e-06, + "loss": 0.0333, + "step": 12979 + }, + { + "epoch": 119.08, + "learning_rate": 4.013761467889909e-06, + "loss": 0.0457, + "step": 12980 + }, + { + "epoch": 119.09, + "learning_rate": 3.975535168195719e-06, + "loss": 0.0926, + "step": 12981 + }, + { + "epoch": 119.1, + "learning_rate": 3.937308868501529e-06, + "loss": 0.125, + "step": 12982 + }, + { + "epoch": 119.11, + "learning_rate": 3.89908256880734e-06, + "loss": 0.092, + "step": 12983 + }, + { + "epoch": 119.12, + "learning_rate": 3.86085626911315e-06, + "loss": 0.0612, + "step": 12984 + }, + { + "epoch": 119.13, + "learning_rate": 3.8226299694189605e-06, + "loss": 0.1016, + "step": 12985 + }, + { + "epoch": 119.14, + "learning_rate": 3.7844036697247707e-06, + "loss": 0.1091, + "step": 12986 + }, + { + "epoch": 119.15, + "learning_rate": 3.7461773700305814e-06, + "loss": 0.0593, + "step": 12987 + }, + { + "epoch": 119.16, + "learning_rate": 3.7079510703363916e-06, + "loss": 0.1368, + "step": 12988 + }, + { + "epoch": 119.17, + "learning_rate": 3.6697247706422022e-06, + "loss": 0.0701, + "step": 12989 + }, + { + "epoch": 119.17, + "learning_rate": 3.6314984709480124e-06, + "loss": 0.0653, + "step": 12990 + }, + { + "epoch": 119.18, + "learning_rate": 3.5932721712538227e-06, + "loss": 0.0637, + "step": 12991 + }, + { + "epoch": 119.19, + "learning_rate": 3.5550458715596333e-06, + "loss": 0.1093, + "step": 12992 + }, + { + "epoch": 119.2, + "learning_rate": 3.5168195718654435e-06, + "loss": 0.0902, + "step": 12993 + }, + { + "epoch": 119.21, + "learning_rate": 3.4785932721712537e-06, + "loss": 0.0501, + "step": 12994 + }, + { + "epoch": 119.22, + "learning_rate": 3.4403669724770644e-06, + "loss": 0.0313, + "step": 12995 + }, + { + "epoch": 119.23, + "learning_rate": 3.4021406727828746e-06, + "loss": 0.0422, + "step": 12996 + }, + { + "epoch": 119.24, + "learning_rate": 3.363914373088685e-06, + "loss": 0.0541, + "step": 12997 + }, + { + "epoch": 119.25, + "learning_rate": 3.325688073394496e-06, + "loss": 0.0205, + "step": 12998 + }, + { + "epoch": 119.26, + "learning_rate": 3.287461773700306e-06, + "loss": 0.1206, + "step": 12999 + }, + { + "epoch": 119.27, + "learning_rate": 3.2492354740061163e-06, + "loss": 0.1122, + "step": 13000 + }, + { + "epoch": 119.27, + "eval_cer": 0.1316811346496949, + "eval_loss": 0.7534573078155518, + "eval_runtime": 87.8463, + "eval_samples_per_second": 18.749, + "eval_steps_per_second": 2.345, + "eval_wer": 0.46512102951690326, + "step": 13000 + }, + { + "epoch": 119.28, + "learning_rate": 3.211009174311927e-06, + "loss": 0.0854, + "step": 13001 + }, + { + "epoch": 119.28, + "learning_rate": 3.172782874617737e-06, + "loss": 0.0569, + "step": 13002 + }, + { + "epoch": 119.29, + "learning_rate": 3.1345565749235474e-06, + "loss": 0.0971, + "step": 13003 + }, + { + "epoch": 119.3, + "learning_rate": 3.096330275229358e-06, + "loss": 0.0785, + "step": 13004 + }, + { + "epoch": 119.31, + "learning_rate": 3.0581039755351682e-06, + "loss": 0.0469, + "step": 13005 + }, + { + "epoch": 119.32, + "learning_rate": 3.0198776758409785e-06, + "loss": 0.0882, + "step": 13006 + }, + { + "epoch": 119.33, + "learning_rate": 2.981651376146789e-06, + "loss": 0.0706, + "step": 13007 + }, + { + "epoch": 119.34, + "learning_rate": 2.9434250764525993e-06, + "loss": 0.0895, + "step": 13008 + }, + { + "epoch": 119.35, + "learning_rate": 2.9051987767584095e-06, + "loss": 0.062, + "step": 13009 + }, + { + "epoch": 119.36, + "learning_rate": 2.8669724770642206e-06, + "loss": 0.0517, + "step": 13010 + }, + { + "epoch": 119.37, + "learning_rate": 2.828746177370031e-06, + "loss": 0.1047, + "step": 13011 + }, + { + "epoch": 119.38, + "learning_rate": 2.790519877675841e-06, + "loss": 0.0842, + "step": 13012 + }, + { + "epoch": 119.39, + "learning_rate": 2.7522935779816517e-06, + "loss": 0.0765, + "step": 13013 + }, + { + "epoch": 119.39, + "learning_rate": 2.714067278287462e-06, + "loss": 0.0469, + "step": 13014 + }, + { + "epoch": 119.4, + "learning_rate": 2.675840978593272e-06, + "loss": 0.0997, + "step": 13015 + }, + { + "epoch": 119.41, + "learning_rate": 2.6376146788990827e-06, + "loss": 0.0983, + "step": 13016 + }, + { + "epoch": 119.42, + "learning_rate": 2.599388379204893e-06, + "loss": 0.0993, + "step": 13017 + }, + { + "epoch": 119.43, + "learning_rate": 2.561162079510703e-06, + "loss": 0.0637, + "step": 13018 + }, + { + "epoch": 119.44, + "learning_rate": 2.522935779816514e-06, + "loss": 0.0583, + "step": 13019 + }, + { + "epoch": 119.45, + "learning_rate": 2.484709480122324e-06, + "loss": 0.0322, + "step": 13020 + }, + { + "epoch": 119.46, + "learning_rate": 2.4464831804281343e-06, + "loss": 0.0873, + "step": 13021 + }, + { + "epoch": 119.47, + "learning_rate": 2.4082568807339453e-06, + "loss": 0.0464, + "step": 13022 + }, + { + "epoch": 119.48, + "learning_rate": 2.3700305810397555e-06, + "loss": 0.0486, + "step": 13023 + }, + { + "epoch": 119.49, + "learning_rate": 2.3318042813455658e-06, + "loss": 0.0486, + "step": 13024 + }, + { + "epoch": 119.5, + "learning_rate": 2.2935779816513764e-06, + "loss": 0.0183, + "step": 13025 + }, + { + "epoch": 119.5, + "learning_rate": 2.2553516819571866e-06, + "loss": 0.1524, + "step": 13026 + }, + { + "epoch": 119.51, + "learning_rate": 2.217125382262997e-06, + "loss": 0.0568, + "step": 13027 + }, + { + "epoch": 119.52, + "learning_rate": 2.1788990825688075e-06, + "loss": 0.0954, + "step": 13028 + }, + { + "epoch": 119.53, + "learning_rate": 2.1406727828746177e-06, + "loss": 0.1196, + "step": 13029 + }, + { + "epoch": 119.54, + "learning_rate": 2.102446483180428e-06, + "loss": 0.0774, + "step": 13030 + }, + { + "epoch": 119.55, + "learning_rate": 2.0642201834862385e-06, + "loss": 0.0678, + "step": 13031 + }, + { + "epoch": 119.56, + "learning_rate": 2.0259938837920488e-06, + "loss": 0.0432, + "step": 13032 + }, + { + "epoch": 119.57, + "learning_rate": 1.9877675840978594e-06, + "loss": 0.0622, + "step": 13033 + }, + { + "epoch": 119.58, + "learning_rate": 1.94954128440367e-06, + "loss": 0.0797, + "step": 13034 + }, + { + "epoch": 119.59, + "learning_rate": 1.9113149847094803e-06, + "loss": 0.1085, + "step": 13035 + }, + { + "epoch": 119.6, + "learning_rate": 1.8730886850152907e-06, + "loss": 0.1096, + "step": 13036 + }, + { + "epoch": 119.61, + "learning_rate": 1.8348623853211011e-06, + "loss": 0.1229, + "step": 13037 + }, + { + "epoch": 119.61, + "learning_rate": 1.7966360856269113e-06, + "loss": 0.087, + "step": 13038 + }, + { + "epoch": 119.62, + "learning_rate": 1.7584097859327218e-06, + "loss": 0.0716, + "step": 13039 + }, + { + "epoch": 119.63, + "learning_rate": 1.7201834862385322e-06, + "loss": 0.0942, + "step": 13040 + }, + { + "epoch": 119.64, + "learning_rate": 1.6819571865443424e-06, + "loss": 0.0686, + "step": 13041 + }, + { + "epoch": 119.65, + "learning_rate": 1.643730886850153e-06, + "loss": 0.1431, + "step": 13042 + }, + { + "epoch": 119.66, + "learning_rate": 1.6055045871559635e-06, + "loss": 0.1038, + "step": 13043 + }, + { + "epoch": 119.67, + "learning_rate": 1.5672782874617737e-06, + "loss": 0.0516, + "step": 13044 + }, + { + "epoch": 119.68, + "learning_rate": 1.5290519877675841e-06, + "loss": 0.0647, + "step": 13045 + }, + { + "epoch": 119.69, + "learning_rate": 1.4908256880733945e-06, + "loss": 0.0996, + "step": 13046 + }, + { + "epoch": 119.7, + "learning_rate": 1.4525993883792048e-06, + "loss": 0.0659, + "step": 13047 + }, + { + "epoch": 119.71, + "learning_rate": 1.4143730886850154e-06, + "loss": 0.0431, + "step": 13048 + }, + { + "epoch": 119.72, + "learning_rate": 1.3761467889908258e-06, + "loss": 0.0359, + "step": 13049 + }, + { + "epoch": 119.72, + "learning_rate": 1.337920489296636e-06, + "loss": 0.0576, + "step": 13050 + }, + { + "epoch": 119.73, + "learning_rate": 1.2996941896024465e-06, + "loss": 0.0453, + "step": 13051 + }, + { + "epoch": 119.74, + "learning_rate": 1.261467889908257e-06, + "loss": 0.0324, + "step": 13052 + }, + { + "epoch": 119.75, + "learning_rate": 1.2232415902140671e-06, + "loss": 0.1434, + "step": 13053 + }, + { + "epoch": 119.76, + "learning_rate": 1.1850152905198778e-06, + "loss": 0.1523, + "step": 13054 + }, + { + "epoch": 119.77, + "learning_rate": 1.1467889908256882e-06, + "loss": 0.0999, + "step": 13055 + }, + { + "epoch": 119.78, + "learning_rate": 1.1085626911314984e-06, + "loss": 0.0943, + "step": 13056 + }, + { + "epoch": 119.79, + "learning_rate": 1.0703363914373088e-06, + "loss": 0.0894, + "step": 13057 + }, + { + "epoch": 119.8, + "learning_rate": 1.0321100917431193e-06, + "loss": 0.0783, + "step": 13058 + }, + { + "epoch": 119.81, + "learning_rate": 9.938837920489297e-07, + "loss": 0.0994, + "step": 13059 + }, + { + "epoch": 119.82, + "learning_rate": 9.556574923547401e-07, + "loss": 0.0576, + "step": 13060 + }, + { + "epoch": 119.83, + "learning_rate": 9.174311926605506e-07, + "loss": 0.0705, + "step": 13061 + }, + { + "epoch": 119.83, + "learning_rate": 8.792048929663609e-07, + "loss": 0.0641, + "step": 13062 + }, + { + "epoch": 119.84, + "learning_rate": 8.409785932721712e-07, + "loss": 0.1253, + "step": 13063 + }, + { + "epoch": 119.85, + "learning_rate": 8.027522935779817e-07, + "loss": 0.0899, + "step": 13064 + }, + { + "epoch": 119.86, + "learning_rate": 7.645259938837921e-07, + "loss": 0.077, + "step": 13065 + }, + { + "epoch": 119.87, + "learning_rate": 7.262996941896024e-07, + "loss": 0.032, + "step": 13066 + }, + { + "epoch": 119.88, + "learning_rate": 6.880733944954129e-07, + "loss": 0.0462, + "step": 13067 + }, + { + "epoch": 119.89, + "learning_rate": 6.498470948012232e-07, + "loss": 0.0897, + "step": 13068 + }, + { + "epoch": 119.9, + "learning_rate": 6.116207951070336e-07, + "loss": 0.0622, + "step": 13069 + }, + { + "epoch": 119.91, + "learning_rate": 5.733944954128441e-07, + "loss": 0.1148, + "step": 13070 + }, + { + "epoch": 119.92, + "learning_rate": 5.351681957186544e-07, + "loss": 0.0638, + "step": 13071 + }, + { + "epoch": 119.93, + "learning_rate": 4.969418960244648e-07, + "loss": 0.0635, + "step": 13072 + }, + { + "epoch": 119.94, + "learning_rate": 4.587155963302753e-07, + "loss": 0.0813, + "step": 13073 + }, + { + "epoch": 119.94, + "learning_rate": 4.204892966360856e-07, + "loss": 0.0825, + "step": 13074 + }, + { + "epoch": 119.95, + "learning_rate": 3.8226299694189603e-07, + "loss": 0.0552, + "step": 13075 + }, + { + "epoch": 119.96, + "learning_rate": 3.4403669724770646e-07, + "loss": 0.0686, + "step": 13076 + }, + { + "epoch": 119.97, + "learning_rate": 3.058103975535168e-07, + "loss": 0.1507, + "step": 13077 + }, + { + "epoch": 119.98, + "learning_rate": 2.675840978593272e-07, + "loss": 0.039, + "step": 13078 + }, + { + "epoch": 119.99, + "learning_rate": 2.2935779816513764e-07, + "loss": 0.1128, + "step": 13079 + }, + { + "epoch": 120.0, + "learning_rate": 1.9113149847094802e-07, + "loss": 0.0626, + "step": 13080 + }, + { + "epoch": 120.0, + "step": 13080, + "total_flos": 1.6034049995906177e+20, + "train_loss": 0.020454988959786518, + "train_runtime": 6971.2586, + "train_samples_per_second": 59.869, + "train_steps_per_second": 1.876 } ], - "max_steps": 545, - "num_train_epochs": 5, - "total_flos": 1.2873670788396168e+20, + "max_steps": 13080, + "num_train_epochs": 120, + "total_flos": 1.6034049995906177e+20, "trial_name": null, "trial_params": null }