diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,15769 @@ +{ + "best_metric": 0.7662602112929114, + "best_model_checkpoint": "wav2vec2-base-mirst500/checkpoint-26090", + "epoch": 10.0, + "global_step": 26090, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.1498658489842852e-07, + "loss": 2.0809, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 2.2997316979685704e-07, + "loss": 2.0826, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 3.4495975469528556e-07, + "loss": 2.0793, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.599463395937141e-07, + "loss": 2.0825, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 5.749329244921425e-07, + "loss": 2.0785, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 6.899195093905711e-07, + "loss": 2.0799, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 8.049060942889996e-07, + "loss": 2.0789, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 9.198926791874282e-07, + "loss": 2.0798, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 1.0348792640858565e-06, + "loss": 2.0765, + "step": 90 + }, + { + "epoch": 0.04, + "learning_rate": 1.149865848984285e-06, + "loss": 2.0825, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 1.2648524338827137e-06, + "loss": 2.0783, + "step": 110 + }, + { + "epoch": 0.05, + "learning_rate": 1.3798390187811422e-06, + "loss": 2.0757, + "step": 120 + }, + { + "epoch": 0.05, + "learning_rate": 1.4948256036795708e-06, + "loss": 2.08, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 1.6098121885779992e-06, + "loss": 2.0776, + "step": 140 + }, + { + "epoch": 0.06, + "learning_rate": 1.7247987734764277e-06, + "loss": 2.0746, + "step": 150 + }, + { + "epoch": 0.06, + "learning_rate": 1.8397853583748563e-06, + "loss": 2.0694, + "step": 160 + }, + { + "epoch": 0.07, + "learning_rate": 1.9547719432732845e-06, + "loss": 2.0752, + "step": 170 + }, + { + "epoch": 0.07, + "learning_rate": 2.069758528171713e-06, + "loss": 2.0715, + "step": 180 + }, + { + "epoch": 0.07, + "learning_rate": 2.1847451130701416e-06, + "loss": 2.0654, + "step": 190 + }, + { + "epoch": 0.08, + "learning_rate": 2.29973169796857e-06, + "loss": 2.0681, + "step": 200 + }, + { + "epoch": 0.08, + "learning_rate": 2.4147182828669988e-06, + "loss": 2.0669, + "step": 210 + }, + { + "epoch": 0.08, + "learning_rate": 2.5297048677654273e-06, + "loss": 2.0559, + "step": 220 + }, + { + "epoch": 0.09, + "learning_rate": 2.644691452663856e-06, + "loss": 2.0493, + "step": 230 + }, + { + "epoch": 0.09, + "learning_rate": 2.7596780375622845e-06, + "loss": 2.0587, + "step": 240 + }, + { + "epoch": 0.1, + "learning_rate": 2.874664622460713e-06, + "loss": 2.0452, + "step": 250 + }, + { + "epoch": 0.1, + "learning_rate": 2.9896512073591416e-06, + "loss": 2.0411, + "step": 260 + }, + { + "epoch": 0.1, + "learning_rate": 3.1046377922575698e-06, + "loss": 2.0333, + "step": 270 + }, + { + "epoch": 0.11, + "learning_rate": 3.2196243771559983e-06, + "loss": 2.0246, + "step": 280 + }, + { + "epoch": 0.11, + "learning_rate": 3.334610962054427e-06, + "loss": 2.0095, + "step": 290 + }, + { + "epoch": 0.11, + "learning_rate": 3.4495975469528555e-06, + "loss": 2.0108, + "step": 300 + }, + { + "epoch": 0.12, + "learning_rate": 3.564584131851284e-06, + "loss": 1.9914, + "step": 310 + }, + { + "epoch": 0.12, + "learning_rate": 3.6795707167497126e-06, + "loss": 1.9905, + "step": 320 + }, + { + "epoch": 0.13, + "learning_rate": 3.7945573016481408e-06, + "loss": 2.0021, + "step": 330 + }, + { + "epoch": 0.13, + "learning_rate": 3.909543886546569e-06, + "loss": 2.0083, + "step": 340 + }, + { + "epoch": 0.13, + "learning_rate": 4.0245304714449975e-06, + "loss": 1.9917, + "step": 350 + }, + { + "epoch": 0.14, + "learning_rate": 4.139517056343426e-06, + "loss": 1.9845, + "step": 360 + }, + { + "epoch": 0.14, + "learning_rate": 4.254503641241855e-06, + "loss": 1.9991, + "step": 370 + }, + { + "epoch": 0.15, + "learning_rate": 4.369490226140283e-06, + "loss": 1.97, + "step": 380 + }, + { + "epoch": 0.15, + "learning_rate": 4.484476811038712e-06, + "loss": 1.9776, + "step": 390 + }, + { + "epoch": 0.15, + "learning_rate": 4.59946339593714e-06, + "loss": 1.9711, + "step": 400 + }, + { + "epoch": 0.16, + "learning_rate": 4.714449980835569e-06, + "loss": 1.9273, + "step": 410 + }, + { + "epoch": 0.16, + "learning_rate": 4.8294365657339975e-06, + "loss": 1.9671, + "step": 420 + }, + { + "epoch": 0.16, + "learning_rate": 4.944423150632426e-06, + "loss": 1.9618, + "step": 430 + }, + { + "epoch": 0.17, + "learning_rate": 5.059409735530855e-06, + "loss": 1.9592, + "step": 440 + }, + { + "epoch": 0.17, + "learning_rate": 5.174396320429283e-06, + "loss": 1.9387, + "step": 450 + }, + { + "epoch": 0.18, + "learning_rate": 5.289382905327712e-06, + "loss": 1.9285, + "step": 460 + }, + { + "epoch": 0.18, + "learning_rate": 5.392870831736297e-06, + "loss": 1.9356, + "step": 470 + }, + { + "epoch": 0.18, + "learning_rate": 5.507857416634726e-06, + "loss": 1.8985, + "step": 480 + }, + { + "epoch": 0.19, + "learning_rate": 5.622844001533154e-06, + "loss": 1.9485, + "step": 490 + }, + { + "epoch": 0.19, + "learning_rate": 5.737830586431583e-06, + "loss": 1.9328, + "step": 500 + }, + { + "epoch": 0.2, + "learning_rate": 5.8528171713300115e-06, + "loss": 1.903, + "step": 510 + }, + { + "epoch": 0.2, + "learning_rate": 5.96780375622844e-06, + "loss": 1.9217, + "step": 520 + }, + { + "epoch": 0.2, + "learning_rate": 6.082790341126869e-06, + "loss": 1.9428, + "step": 530 + }, + { + "epoch": 0.21, + "learning_rate": 6.197776926025297e-06, + "loss": 1.9103, + "step": 540 + }, + { + "epoch": 0.21, + "learning_rate": 6.312763510923726e-06, + "loss": 1.9054, + "step": 550 + }, + { + "epoch": 0.21, + "learning_rate": 6.427750095822154e-06, + "loss": 1.9398, + "step": 560 + }, + { + "epoch": 0.22, + "learning_rate": 6.542736680720583e-06, + "loss": 1.9347, + "step": 570 + }, + { + "epoch": 0.22, + "learning_rate": 6.6577232656190115e-06, + "loss": 1.8898, + "step": 580 + }, + { + "epoch": 0.23, + "learning_rate": 6.77270985051744e-06, + "loss": 1.9083, + "step": 590 + }, + { + "epoch": 0.23, + "learning_rate": 6.887696435415869e-06, + "loss": 1.9163, + "step": 600 + }, + { + "epoch": 0.23, + "learning_rate": 7.002683020314297e-06, + "loss": 1.8905, + "step": 610 + }, + { + "epoch": 0.24, + "learning_rate": 7.117669605212726e-06, + "loss": 1.9142, + "step": 620 + }, + { + "epoch": 0.24, + "learning_rate": 7.2326561901111535e-06, + "loss": 1.8944, + "step": 630 + }, + { + "epoch": 0.25, + "learning_rate": 7.347642775009582e-06, + "loss": 1.8967, + "step": 640 + }, + { + "epoch": 0.25, + "learning_rate": 7.462629359908011e-06, + "loss": 1.8727, + "step": 650 + }, + { + "epoch": 0.25, + "learning_rate": 7.577615944806439e-06, + "loss": 1.8877, + "step": 660 + }, + { + "epoch": 0.26, + "learning_rate": 7.692602529704868e-06, + "loss": 1.9041, + "step": 670 + }, + { + "epoch": 0.26, + "learning_rate": 7.807589114603296e-06, + "loss": 1.8957, + "step": 680 + }, + { + "epoch": 0.26, + "learning_rate": 7.922575699501725e-06, + "loss": 1.8808, + "step": 690 + }, + { + "epoch": 0.27, + "learning_rate": 8.037562284400154e-06, + "loss": 1.8558, + "step": 700 + }, + { + "epoch": 0.27, + "learning_rate": 8.152548869298582e-06, + "loss": 1.8852, + "step": 710 + }, + { + "epoch": 0.28, + "learning_rate": 8.26753545419701e-06, + "loss": 1.8812, + "step": 720 + }, + { + "epoch": 0.28, + "learning_rate": 8.38252203909544e-06, + "loss": 1.9017, + "step": 730 + }, + { + "epoch": 0.28, + "learning_rate": 8.497508623993868e-06, + "loss": 1.8998, + "step": 740 + }, + { + "epoch": 0.29, + "learning_rate": 8.612495208892295e-06, + "loss": 1.8669, + "step": 750 + }, + { + "epoch": 0.29, + "learning_rate": 8.727481793790725e-06, + "loss": 1.8892, + "step": 760 + }, + { + "epoch": 0.3, + "learning_rate": 8.842468378689152e-06, + "loss": 1.8814, + "step": 770 + }, + { + "epoch": 0.3, + "learning_rate": 8.957454963587582e-06, + "loss": 1.854, + "step": 780 + }, + { + "epoch": 0.3, + "learning_rate": 9.072441548486009e-06, + "loss": 1.8932, + "step": 790 + }, + { + "epoch": 0.31, + "learning_rate": 9.18742813338444e-06, + "loss": 1.8865, + "step": 800 + }, + { + "epoch": 0.31, + "learning_rate": 9.302414718282866e-06, + "loss": 1.8937, + "step": 810 + }, + { + "epoch": 0.31, + "learning_rate": 9.417401303181296e-06, + "loss": 1.8775, + "step": 820 + }, + { + "epoch": 0.32, + "learning_rate": 9.532387888079723e-06, + "loss": 1.8759, + "step": 830 + }, + { + "epoch": 0.32, + "learning_rate": 9.647374472978154e-06, + "loss": 1.7925, + "step": 840 + }, + { + "epoch": 0.33, + "learning_rate": 9.76236105787658e-06, + "loss": 1.8262, + "step": 850 + }, + { + "epoch": 0.33, + "learning_rate": 9.87734764277501e-06, + "loss": 1.8571, + "step": 860 + }, + { + "epoch": 0.33, + "learning_rate": 9.992334227673438e-06, + "loss": 1.8321, + "step": 870 + }, + { + "epoch": 0.34, + "learning_rate": 1.0107320812571868e-05, + "loss": 1.8251, + "step": 880 + }, + { + "epoch": 0.34, + "learning_rate": 1.0222307397470295e-05, + "loss": 1.8128, + "step": 890 + }, + { + "epoch": 0.34, + "learning_rate": 1.0337293982368725e-05, + "loss": 1.8149, + "step": 900 + }, + { + "epoch": 0.35, + "learning_rate": 1.0452280567267152e-05, + "loss": 1.8517, + "step": 910 + }, + { + "epoch": 0.35, + "learning_rate": 1.0567267152165582e-05, + "loss": 1.8439, + "step": 920 + }, + { + "epoch": 0.36, + "learning_rate": 1.0682253737064009e-05, + "loss": 1.83, + "step": 930 + }, + { + "epoch": 0.36, + "learning_rate": 1.079724032196244e-05, + "loss": 1.8438, + "step": 940 + }, + { + "epoch": 0.36, + "learning_rate": 1.0912226906860866e-05, + "loss": 1.8024, + "step": 950 + }, + { + "epoch": 0.37, + "learning_rate": 1.1027213491759295e-05, + "loss": 1.8328, + "step": 960 + }, + { + "epoch": 0.37, + "learning_rate": 1.1142200076657723e-05, + "loss": 1.799, + "step": 970 + }, + { + "epoch": 0.38, + "learning_rate": 1.1257186661556152e-05, + "loss": 1.8646, + "step": 980 + }, + { + "epoch": 0.38, + "learning_rate": 1.137217324645458e-05, + "loss": 1.8245, + "step": 990 + }, + { + "epoch": 0.38, + "learning_rate": 1.1487159831353009e-05, + "loss": 1.7966, + "step": 1000 + }, + { + "epoch": 0.39, + "learning_rate": 1.1602146416251438e-05, + "loss": 1.7852, + "step": 1010 + }, + { + "epoch": 0.39, + "learning_rate": 1.1717133001149866e-05, + "loss": 1.7661, + "step": 1020 + }, + { + "epoch": 0.39, + "learning_rate": 1.1832119586048295e-05, + "loss": 1.7789, + "step": 1030 + }, + { + "epoch": 0.4, + "learning_rate": 1.1947106170946723e-05, + "loss": 1.7793, + "step": 1040 + }, + { + "epoch": 0.4, + "learning_rate": 1.2062092755845152e-05, + "loss": 1.8356, + "step": 1050 + }, + { + "epoch": 0.41, + "learning_rate": 1.217707934074358e-05, + "loss": 1.7949, + "step": 1060 + }, + { + "epoch": 0.41, + "learning_rate": 1.2292065925642009e-05, + "loss": 1.7382, + "step": 1070 + }, + { + "epoch": 0.41, + "learning_rate": 1.2407052510540438e-05, + "loss": 1.7778, + "step": 1080 + }, + { + "epoch": 0.42, + "learning_rate": 1.2522039095438866e-05, + "loss": 1.7256, + "step": 1090 + }, + { + "epoch": 0.42, + "learning_rate": 1.2637025680337295e-05, + "loss": 1.7275, + "step": 1100 + }, + { + "epoch": 0.43, + "learning_rate": 1.2752012265235723e-05, + "loss": 1.8011, + "step": 1110 + }, + { + "epoch": 0.43, + "learning_rate": 1.2866998850134152e-05, + "loss": 1.7839, + "step": 1120 + }, + { + "epoch": 0.43, + "learning_rate": 1.298198543503258e-05, + "loss": 1.7867, + "step": 1130 + }, + { + "epoch": 0.44, + "learning_rate": 1.3096972019931009e-05, + "loss": 1.7877, + "step": 1140 + }, + { + "epoch": 0.44, + "learning_rate": 1.3211958604829436e-05, + "loss": 1.7485, + "step": 1150 + }, + { + "epoch": 0.44, + "learning_rate": 1.3326945189727866e-05, + "loss": 1.7283, + "step": 1160 + }, + { + "epoch": 0.45, + "learning_rate": 1.3441931774626293e-05, + "loss": 1.7347, + "step": 1170 + }, + { + "epoch": 0.45, + "learning_rate": 1.3556918359524723e-05, + "loss": 1.7604, + "step": 1180 + }, + { + "epoch": 0.46, + "learning_rate": 1.367190494442315e-05, + "loss": 1.6977, + "step": 1190 + }, + { + "epoch": 0.46, + "learning_rate": 1.378689152932158e-05, + "loss": 1.7332, + "step": 1200 + }, + { + "epoch": 0.46, + "learning_rate": 1.3901878114220007e-05, + "loss": 1.7263, + "step": 1210 + }, + { + "epoch": 0.47, + "learning_rate": 1.4016864699118438e-05, + "loss": 1.7047, + "step": 1220 + }, + { + "epoch": 0.47, + "learning_rate": 1.4131851284016864e-05, + "loss": 1.7593, + "step": 1230 + }, + { + "epoch": 0.48, + "learning_rate": 1.4246837868915295e-05, + "loss": 1.7368, + "step": 1240 + }, + { + "epoch": 0.48, + "learning_rate": 1.4361824453813722e-05, + "loss": 1.7325, + "step": 1250 + }, + { + "epoch": 0.48, + "learning_rate": 1.4476811038712152e-05, + "loss": 1.6725, + "step": 1260 + }, + { + "epoch": 0.49, + "learning_rate": 1.4591797623610579e-05, + "loss": 1.7199, + "step": 1270 + }, + { + "epoch": 0.49, + "learning_rate": 1.4706784208509007e-05, + "loss": 1.6786, + "step": 1280 + }, + { + "epoch": 0.49, + "learning_rate": 1.4821770793407436e-05, + "loss": 1.6769, + "step": 1290 + }, + { + "epoch": 0.5, + "learning_rate": 1.4936757378305864e-05, + "loss": 1.669, + "step": 1300 + }, + { + "epoch": 0.5, + "learning_rate": 1.5051743963204293e-05, + "loss": 1.6373, + "step": 1310 + }, + { + "epoch": 0.51, + "learning_rate": 1.516673054810272e-05, + "loss": 1.6432, + "step": 1320 + }, + { + "epoch": 0.51, + "learning_rate": 1.528171713300115e-05, + "loss": 1.6666, + "step": 1330 + }, + { + "epoch": 0.51, + "learning_rate": 1.539670371789958e-05, + "loss": 1.7061, + "step": 1340 + }, + { + "epoch": 0.52, + "learning_rate": 1.5511690302798007e-05, + "loss": 1.652, + "step": 1350 + }, + { + "epoch": 0.52, + "learning_rate": 1.5626676887696436e-05, + "loss": 1.6681, + "step": 1360 + }, + { + "epoch": 0.53, + "learning_rate": 1.574166347259486e-05, + "loss": 1.5956, + "step": 1370 + }, + { + "epoch": 0.53, + "learning_rate": 1.5856650057493293e-05, + "loss": 1.6581, + "step": 1380 + }, + { + "epoch": 0.53, + "learning_rate": 1.597163664239172e-05, + "loss": 1.6153, + "step": 1390 + }, + { + "epoch": 0.54, + "learning_rate": 1.608662322729015e-05, + "loss": 1.6522, + "step": 1400 + }, + { + "epoch": 0.54, + "learning_rate": 1.6201609812188575e-05, + "loss": 1.5935, + "step": 1410 + }, + { + "epoch": 0.54, + "learning_rate": 1.6316596397087007e-05, + "loss": 1.6758, + "step": 1420 + }, + { + "epoch": 0.55, + "learning_rate": 1.6431582981985436e-05, + "loss": 1.5831, + "step": 1430 + }, + { + "epoch": 0.55, + "learning_rate": 1.6546569566883865e-05, + "loss": 1.6165, + "step": 1440 + }, + { + "epoch": 0.56, + "learning_rate": 1.666155615178229e-05, + "loss": 1.6222, + "step": 1450 + }, + { + "epoch": 0.56, + "learning_rate": 1.677654273668072e-05, + "loss": 1.606, + "step": 1460 + }, + { + "epoch": 0.56, + "learning_rate": 1.689152932157915e-05, + "loss": 1.6208, + "step": 1470 + }, + { + "epoch": 0.57, + "learning_rate": 1.700651590647758e-05, + "loss": 1.5936, + "step": 1480 + }, + { + "epoch": 0.57, + "learning_rate": 1.7121502491376004e-05, + "loss": 1.5332, + "step": 1490 + }, + { + "epoch": 0.57, + "learning_rate": 1.7236489076274436e-05, + "loss": 1.5827, + "step": 1500 + }, + { + "epoch": 0.58, + "learning_rate": 1.7351475661172865e-05, + "loss": 1.5474, + "step": 1510 + }, + { + "epoch": 0.58, + "learning_rate": 1.7466462246071293e-05, + "loss": 1.5729, + "step": 1520 + }, + { + "epoch": 0.59, + "learning_rate": 1.7581448830969718e-05, + "loss": 1.5742, + "step": 1530 + }, + { + "epoch": 0.59, + "learning_rate": 1.769643541586815e-05, + "loss": 1.4756, + "step": 1540 + }, + { + "epoch": 0.59, + "learning_rate": 1.781142200076658e-05, + "loss": 1.4994, + "step": 1550 + }, + { + "epoch": 0.6, + "learning_rate": 1.7926408585665007e-05, + "loss": 1.496, + "step": 1560 + }, + { + "epoch": 0.6, + "learning_rate": 1.8041395170563433e-05, + "loss": 1.5161, + "step": 1570 + }, + { + "epoch": 0.61, + "learning_rate": 1.815638175546186e-05, + "loss": 1.4848, + "step": 1580 + }, + { + "epoch": 0.61, + "learning_rate": 1.8271368340360293e-05, + "loss": 1.5293, + "step": 1590 + }, + { + "epoch": 0.61, + "learning_rate": 1.838635492525872e-05, + "loss": 1.5262, + "step": 1600 + }, + { + "epoch": 0.62, + "learning_rate": 1.8501341510157147e-05, + "loss": 1.4921, + "step": 1610 + }, + { + "epoch": 0.62, + "learning_rate": 1.8616328095055575e-05, + "loss": 1.4894, + "step": 1620 + }, + { + "epoch": 0.62, + "learning_rate": 1.8731314679954007e-05, + "loss": 1.5104, + "step": 1630 + }, + { + "epoch": 0.63, + "learning_rate": 1.8846301264852436e-05, + "loss": 1.4651, + "step": 1640 + }, + { + "epoch": 0.63, + "learning_rate": 1.896128784975086e-05, + "loss": 1.4319, + "step": 1650 + }, + { + "epoch": 0.64, + "learning_rate": 1.907627443464929e-05, + "loss": 1.4929, + "step": 1660 + }, + { + "epoch": 0.64, + "learning_rate": 1.919126101954772e-05, + "loss": 1.4852, + "step": 1670 + }, + { + "epoch": 0.64, + "learning_rate": 1.930624760444615e-05, + "loss": 1.4824, + "step": 1680 + }, + { + "epoch": 0.65, + "learning_rate": 1.9421234189344575e-05, + "loss": 1.4236, + "step": 1690 + }, + { + "epoch": 0.65, + "learning_rate": 1.9536220774243004e-05, + "loss": 1.4967, + "step": 1700 + }, + { + "epoch": 0.66, + "learning_rate": 1.9651207359141436e-05, + "loss": 1.481, + "step": 1710 + }, + { + "epoch": 0.66, + "learning_rate": 1.9766193944039865e-05, + "loss": 1.4236, + "step": 1720 + }, + { + "epoch": 0.66, + "learning_rate": 1.988118052893829e-05, + "loss": 1.4669, + "step": 1730 + }, + { + "epoch": 0.67, + "learning_rate": 1.999616711383672e-05, + "loss": 1.4596, + "step": 1740 + }, + { + "epoch": 0.67, + "learning_rate": 2.011115369873515e-05, + "loss": 1.4675, + "step": 1750 + }, + { + "epoch": 0.67, + "learning_rate": 2.022614028363358e-05, + "loss": 1.4355, + "step": 1760 + }, + { + "epoch": 0.68, + "learning_rate": 2.0341126868532004e-05, + "loss": 1.4587, + "step": 1770 + }, + { + "epoch": 0.68, + "learning_rate": 2.0456113453430433e-05, + "loss": 1.4677, + "step": 1780 + }, + { + "epoch": 0.69, + "learning_rate": 2.057110003832886e-05, + "loss": 1.4513, + "step": 1790 + }, + { + "epoch": 0.69, + "learning_rate": 2.0686086623227293e-05, + "loss": 1.4613, + "step": 1800 + }, + { + "epoch": 0.69, + "learning_rate": 2.080107320812572e-05, + "loss": 1.43, + "step": 1810 + }, + { + "epoch": 0.7, + "learning_rate": 2.0916059793024147e-05, + "loss": 1.4312, + "step": 1820 + }, + { + "epoch": 0.7, + "learning_rate": 2.1031046377922575e-05, + "loss": 1.476, + "step": 1830 + }, + { + "epoch": 0.71, + "learning_rate": 2.1146032962821007e-05, + "loss": 1.4366, + "step": 1840 + }, + { + "epoch": 0.71, + "learning_rate": 2.1261019547719433e-05, + "loss": 1.4365, + "step": 1850 + }, + { + "epoch": 0.71, + "learning_rate": 2.137600613261786e-05, + "loss": 1.4557, + "step": 1860 + }, + { + "epoch": 0.72, + "learning_rate": 2.149099271751629e-05, + "loss": 1.4191, + "step": 1870 + }, + { + "epoch": 0.72, + "learning_rate": 2.1605979302414722e-05, + "loss": 1.4422, + "step": 1880 + }, + { + "epoch": 0.72, + "learning_rate": 2.1720965887313147e-05, + "loss": 1.472, + "step": 1890 + }, + { + "epoch": 0.73, + "learning_rate": 2.1835952472211575e-05, + "loss": 1.391, + "step": 1900 + }, + { + "epoch": 0.73, + "learning_rate": 2.1950939057110004e-05, + "loss": 1.3327, + "step": 1910 + }, + { + "epoch": 0.74, + "learning_rate": 2.2065925642008436e-05, + "loss": 1.3913, + "step": 1920 + }, + { + "epoch": 0.74, + "learning_rate": 2.218091222690686e-05, + "loss": 1.4128, + "step": 1930 + }, + { + "epoch": 0.74, + "learning_rate": 2.229589881180529e-05, + "loss": 1.447, + "step": 1940 + }, + { + "epoch": 0.75, + "learning_rate": 2.241088539670372e-05, + "loss": 1.3915, + "step": 1950 + }, + { + "epoch": 0.75, + "learning_rate": 2.2525871981602147e-05, + "loss": 1.3157, + "step": 1960 + }, + { + "epoch": 0.76, + "learning_rate": 2.2640858566500575e-05, + "loss": 1.4321, + "step": 1970 + }, + { + "epoch": 0.76, + "learning_rate": 2.2755845151399004e-05, + "loss": 1.3482, + "step": 1980 + }, + { + "epoch": 0.76, + "learning_rate": 2.2870831736297433e-05, + "loss": 1.4327, + "step": 1990 + }, + { + "epoch": 0.77, + "learning_rate": 2.2985818321195858e-05, + "loss": 1.389, + "step": 2000 + }, + { + "epoch": 0.77, + "learning_rate": 2.310080490609429e-05, + "loss": 1.3312, + "step": 2010 + }, + { + "epoch": 0.77, + "learning_rate": 2.321579149099272e-05, + "loss": 1.3191, + "step": 2020 + }, + { + "epoch": 0.78, + "learning_rate": 2.3330778075891147e-05, + "loss": 1.3441, + "step": 2030 + }, + { + "epoch": 0.78, + "learning_rate": 2.3445764660789572e-05, + "loss": 1.4519, + "step": 2040 + }, + { + "epoch": 0.79, + "learning_rate": 2.3560751245688004e-05, + "loss": 1.4266, + "step": 2050 + }, + { + "epoch": 0.79, + "learning_rate": 2.3675737830586433e-05, + "loss": 1.3635, + "step": 2060 + }, + { + "epoch": 0.79, + "learning_rate": 2.379072441548486e-05, + "loss": 1.4031, + "step": 2070 + }, + { + "epoch": 0.8, + "learning_rate": 2.3905711000383286e-05, + "loss": 1.3573, + "step": 2080 + }, + { + "epoch": 0.8, + "learning_rate": 2.402069758528172e-05, + "loss": 1.3383, + "step": 2090 + }, + { + "epoch": 0.8, + "learning_rate": 2.4135684170180147e-05, + "loss": 1.3363, + "step": 2100 + }, + { + "epoch": 0.81, + "learning_rate": 2.4250670755078576e-05, + "loss": 1.3777, + "step": 2110 + }, + { + "epoch": 0.81, + "learning_rate": 2.4365657339977e-05, + "loss": 1.3103, + "step": 2120 + }, + { + "epoch": 0.82, + "learning_rate": 2.4480643924875433e-05, + "loss": 1.3438, + "step": 2130 + }, + { + "epoch": 0.82, + "learning_rate": 2.459563050977386e-05, + "loss": 1.3614, + "step": 2140 + }, + { + "epoch": 0.82, + "learning_rate": 2.471061709467229e-05, + "loss": 1.3448, + "step": 2150 + }, + { + "epoch": 0.83, + "learning_rate": 2.4825603679570715e-05, + "loss": 1.2961, + "step": 2160 + }, + { + "epoch": 0.83, + "learning_rate": 2.4940590264469147e-05, + "loss": 1.4005, + "step": 2170 + }, + { + "epoch": 0.84, + "learning_rate": 2.5055576849367576e-05, + "loss": 1.328, + "step": 2180 + }, + { + "epoch": 0.84, + "learning_rate": 2.5170563434266004e-05, + "loss": 1.2661, + "step": 2190 + }, + { + "epoch": 0.84, + "learning_rate": 2.528555001916443e-05, + "loss": 1.285, + "step": 2200 + }, + { + "epoch": 0.85, + "learning_rate": 2.5400536604062858e-05, + "loss": 1.3364, + "step": 2210 + }, + { + "epoch": 0.85, + "learning_rate": 2.551552318896129e-05, + "loss": 1.3519, + "step": 2220 + }, + { + "epoch": 0.85, + "learning_rate": 2.563050977385972e-05, + "loss": 1.3864, + "step": 2230 + }, + { + "epoch": 0.86, + "learning_rate": 2.5745496358758144e-05, + "loss": 1.2857, + "step": 2240 + }, + { + "epoch": 0.86, + "learning_rate": 2.5860482943656572e-05, + "loss": 1.3287, + "step": 2250 + }, + { + "epoch": 0.87, + "learning_rate": 2.5975469528555004e-05, + "loss": 1.3407, + "step": 2260 + }, + { + "epoch": 0.87, + "learning_rate": 2.6090456113453433e-05, + "loss": 1.3457, + "step": 2270 + }, + { + "epoch": 0.87, + "learning_rate": 2.6205442698351858e-05, + "loss": 1.2857, + "step": 2280 + }, + { + "epoch": 0.88, + "learning_rate": 2.6320429283250286e-05, + "loss": 1.3198, + "step": 2290 + }, + { + "epoch": 0.88, + "learning_rate": 2.643541586814872e-05, + "loss": 1.338, + "step": 2300 + }, + { + "epoch": 0.89, + "learning_rate": 2.6550402453047147e-05, + "loss": 1.3066, + "step": 2310 + }, + { + "epoch": 0.89, + "learning_rate": 2.6665389037945572e-05, + "loss": 1.3071, + "step": 2320 + }, + { + "epoch": 0.89, + "learning_rate": 2.6780375622844e-05, + "loss": 1.2945, + "step": 2330 + }, + { + "epoch": 0.9, + "learning_rate": 2.6895362207742433e-05, + "loss": 1.3444, + "step": 2340 + }, + { + "epoch": 0.9, + "learning_rate": 2.701034879264086e-05, + "loss": 1.3123, + "step": 2350 + }, + { + "epoch": 0.9, + "learning_rate": 2.7125335377539286e-05, + "loss": 1.3321, + "step": 2360 + }, + { + "epoch": 0.91, + "learning_rate": 2.7240321962437715e-05, + "loss": 1.2505, + "step": 2370 + }, + { + "epoch": 0.91, + "learning_rate": 2.7355308547336147e-05, + "loss": 1.3014, + "step": 2380 + }, + { + "epoch": 0.92, + "learning_rate": 2.7470295132234576e-05, + "loss": 1.3321, + "step": 2390 + }, + { + "epoch": 0.92, + "learning_rate": 2.7585281717133e-05, + "loss": 1.3096, + "step": 2400 + }, + { + "epoch": 0.92, + "learning_rate": 2.770026830203143e-05, + "loss": 1.31, + "step": 2410 + }, + { + "epoch": 0.93, + "learning_rate": 2.7815254886929858e-05, + "loss": 1.335, + "step": 2420 + }, + { + "epoch": 0.93, + "learning_rate": 2.793024147182829e-05, + "loss": 1.2492, + "step": 2430 + }, + { + "epoch": 0.94, + "learning_rate": 2.8045228056726715e-05, + "loss": 1.2654, + "step": 2440 + }, + { + "epoch": 0.94, + "learning_rate": 2.8160214641625144e-05, + "loss": 1.2852, + "step": 2450 + }, + { + "epoch": 0.94, + "learning_rate": 2.8275201226523572e-05, + "loss": 1.2642, + "step": 2460 + }, + { + "epoch": 0.95, + "learning_rate": 2.8390187811422004e-05, + "loss": 1.2931, + "step": 2470 + }, + { + "epoch": 0.95, + "learning_rate": 2.850517439632043e-05, + "loss": 1.3108, + "step": 2480 + }, + { + "epoch": 0.95, + "learning_rate": 2.8620160981218858e-05, + "loss": 1.347, + "step": 2490 + }, + { + "epoch": 0.96, + "learning_rate": 2.8735147566117286e-05, + "loss": 1.3857, + "step": 2500 + }, + { + "epoch": 0.96, + "learning_rate": 2.885013415101572e-05, + "loss": 1.3367, + "step": 2510 + }, + { + "epoch": 0.97, + "learning_rate": 2.8965120735914144e-05, + "loss": 1.3258, + "step": 2520 + }, + { + "epoch": 0.97, + "learning_rate": 2.9080107320812572e-05, + "loss": 1.3037, + "step": 2530 + }, + { + "epoch": 0.97, + "learning_rate": 2.9195093905711e-05, + "loss": 1.2947, + "step": 2540 + }, + { + "epoch": 0.98, + "learning_rate": 2.9310080490609433e-05, + "loss": 1.3097, + "step": 2550 + }, + { + "epoch": 0.98, + "learning_rate": 2.9425067075507858e-05, + "loss": 1.3048, + "step": 2560 + }, + { + "epoch": 0.99, + "learning_rate": 2.9528555001916443e-05, + "loss": 1.2779, + "step": 2570 + }, + { + "epoch": 0.99, + "learning_rate": 2.9643541586814872e-05, + "loss": 1.2939, + "step": 2580 + }, + { + "epoch": 0.99, + "learning_rate": 2.97585281717133e-05, + "loss": 1.2575, + "step": 2590 + }, + { + "epoch": 1.0, + "learning_rate": 2.987351475661173e-05, + "loss": 1.272, + "step": 2600 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.5617947919411638, + "eval_loss": 1.1589456796646118, + "eval_runtime": 297.7176, + "eval_samples_per_second": 140.21, + "eval_steps_per_second": 70.107, + "step": 2609 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988501341510158e-05, + "loss": 1.4261, + "step": 2610 + }, + { + "epoch": 1.0, + "learning_rate": 2.9988501341510158e-05, + "loss": 1.2925, + "step": 2620 + }, + { + "epoch": 1.01, + "learning_rate": 2.997572505429922e-05, + "loss": 1.3176, + "step": 2630 + }, + { + "epoch": 1.01, + "learning_rate": 2.9962948767088286e-05, + "loss": 1.2765, + "step": 2640 + }, + { + "epoch": 1.02, + "learning_rate": 2.9950172479877348e-05, + "loss": 1.2774, + "step": 2650 + }, + { + "epoch": 1.02, + "learning_rate": 2.993739619266641e-05, + "loss": 1.2715, + "step": 2660 + }, + { + "epoch": 1.02, + "learning_rate": 2.9924619905455476e-05, + "loss": 1.2701, + "step": 2670 + }, + { + "epoch": 1.03, + "learning_rate": 2.991184361824454e-05, + "loss": 1.241, + "step": 2680 + }, + { + "epoch": 1.03, + "learning_rate": 2.9899067331033604e-05, + "loss": 1.2613, + "step": 2690 + }, + { + "epoch": 1.03, + "learning_rate": 2.9886291043822667e-05, + "loss": 1.2404, + "step": 2700 + }, + { + "epoch": 1.04, + "learning_rate": 2.987351475661173e-05, + "loss": 1.2018, + "step": 2710 + }, + { + "epoch": 1.04, + "learning_rate": 2.986073846940079e-05, + "loss": 1.2422, + "step": 2720 + }, + { + "epoch": 1.05, + "learning_rate": 2.9847962182189857e-05, + "loss": 1.2276, + "step": 2730 + }, + { + "epoch": 1.05, + "learning_rate": 2.983518589497892e-05, + "loss": 1.3064, + "step": 2740 + }, + { + "epoch": 1.05, + "learning_rate": 2.9822409607767985e-05, + "loss": 1.2486, + "step": 2750 + }, + { + "epoch": 1.06, + "learning_rate": 2.9809633320557048e-05, + "loss": 1.3621, + "step": 2760 + }, + { + "epoch": 1.06, + "learning_rate": 2.979685703334611e-05, + "loss": 1.2817, + "step": 2770 + }, + { + "epoch": 1.07, + "learning_rate": 2.9784080746135172e-05, + "loss": 1.2246, + "step": 2780 + }, + { + "epoch": 1.07, + "learning_rate": 2.977258208764533e-05, + "loss": 1.2385, + "step": 2790 + }, + { + "epoch": 1.07, + "learning_rate": 2.9759805800434395e-05, + "loss": 1.2707, + "step": 2800 + }, + { + "epoch": 1.08, + "learning_rate": 2.9747029513223457e-05, + "loss": 1.3105, + "step": 2810 + }, + { + "epoch": 1.08, + "learning_rate": 2.9734253226012523e-05, + "loss": 1.2547, + "step": 2820 + }, + { + "epoch": 1.08, + "learning_rate": 2.9721476938801585e-05, + "loss": 1.2105, + "step": 2830 + }, + { + "epoch": 1.09, + "learning_rate": 2.9708700651590648e-05, + "loss": 1.2743, + "step": 2840 + }, + { + "epoch": 1.09, + "learning_rate": 2.969592436437971e-05, + "loss": 1.2738, + "step": 2850 + }, + { + "epoch": 1.1, + "learning_rate": 2.9683148077168776e-05, + "loss": 1.2489, + "step": 2860 + }, + { + "epoch": 1.1, + "learning_rate": 2.9670371789957838e-05, + "loss": 1.2824, + "step": 2870 + }, + { + "epoch": 1.1, + "learning_rate": 2.9657595502746904e-05, + "loss": 1.2434, + "step": 2880 + }, + { + "epoch": 1.11, + "learning_rate": 2.9644819215535966e-05, + "loss": 1.203, + "step": 2890 + }, + { + "epoch": 1.11, + "learning_rate": 2.963204292832503e-05, + "loss": 1.2387, + "step": 2900 + }, + { + "epoch": 1.12, + "learning_rate": 2.9619266641114094e-05, + "loss": 1.2014, + "step": 2910 + }, + { + "epoch": 1.12, + "learning_rate": 2.9606490353903157e-05, + "loss": 1.2252, + "step": 2920 + }, + { + "epoch": 1.12, + "learning_rate": 2.9593714066692223e-05, + "loss": 1.2449, + "step": 2930 + }, + { + "epoch": 1.13, + "learning_rate": 2.958093777948128e-05, + "loss": 1.2564, + "step": 2940 + }, + { + "epoch": 1.13, + "learning_rate": 2.9568161492270347e-05, + "loss": 1.2215, + "step": 2950 + }, + { + "epoch": 1.13, + "learning_rate": 2.955538520505941e-05, + "loss": 1.2514, + "step": 2960 + }, + { + "epoch": 1.14, + "learning_rate": 2.9542608917848475e-05, + "loss": 1.172, + "step": 2970 + }, + { + "epoch": 1.14, + "learning_rate": 2.9529832630637538e-05, + "loss": 1.2287, + "step": 2980 + }, + { + "epoch": 1.15, + "learning_rate": 2.95170563434266e-05, + "loss": 1.2363, + "step": 2990 + }, + { + "epoch": 1.15, + "learning_rate": 2.9504280056215662e-05, + "loss": 1.262, + "step": 3000 + }, + { + "epoch": 1.15, + "learning_rate": 2.9491503769004728e-05, + "loss": 1.2625, + "step": 3010 + }, + { + "epoch": 1.16, + "learning_rate": 2.947872748179379e-05, + "loss": 1.1952, + "step": 3020 + }, + { + "epoch": 1.16, + "learning_rate": 2.9465951194582856e-05, + "loss": 1.1656, + "step": 3030 + }, + { + "epoch": 1.17, + "learning_rate": 2.945317490737192e-05, + "loss": 1.2057, + "step": 3040 + }, + { + "epoch": 1.17, + "learning_rate": 2.944039862016098e-05, + "loss": 1.2663, + "step": 3050 + }, + { + "epoch": 1.17, + "learning_rate": 2.9427622332950047e-05, + "loss": 1.265, + "step": 3060 + }, + { + "epoch": 1.18, + "learning_rate": 2.941484604573911e-05, + "loss": 1.2173, + "step": 3070 + }, + { + "epoch": 1.18, + "learning_rate": 2.9402069758528175e-05, + "loss": 1.2404, + "step": 3080 + }, + { + "epoch": 1.18, + "learning_rate": 2.9389293471317237e-05, + "loss": 1.2088, + "step": 3090 + }, + { + "epoch": 1.19, + "learning_rate": 2.93765171841063e-05, + "loss": 1.2703, + "step": 3100 + }, + { + "epoch": 1.19, + "learning_rate": 2.9363740896895362e-05, + "loss": 1.2675, + "step": 3110 + }, + { + "epoch": 1.2, + "learning_rate": 2.9350964609684428e-05, + "loss": 1.2993, + "step": 3120 + }, + { + "epoch": 1.2, + "learning_rate": 2.933818832247349e-05, + "loss": 1.2086, + "step": 3130 + }, + { + "epoch": 1.2, + "learning_rate": 2.9325412035262556e-05, + "loss": 1.2295, + "step": 3140 + }, + { + "epoch": 1.21, + "learning_rate": 2.9312635748051615e-05, + "loss": 1.2653, + "step": 3150 + }, + { + "epoch": 1.21, + "learning_rate": 2.929985946084068e-05, + "loss": 1.2664, + "step": 3160 + }, + { + "epoch": 1.22, + "learning_rate": 2.9287083173629743e-05, + "loss": 1.1978, + "step": 3170 + }, + { + "epoch": 1.22, + "learning_rate": 2.927430688641881e-05, + "loss": 1.247, + "step": 3180 + }, + { + "epoch": 1.22, + "learning_rate": 2.926153059920787e-05, + "loss": 1.2309, + "step": 3190 + }, + { + "epoch": 1.23, + "learning_rate": 2.9248754311996933e-05, + "loss": 1.1825, + "step": 3200 + }, + { + "epoch": 1.23, + "learning_rate": 2.9235978024785996e-05, + "loss": 1.2008, + "step": 3210 + }, + { + "epoch": 1.23, + "learning_rate": 2.922320173757506e-05, + "loss": 1.2321, + "step": 3220 + }, + { + "epoch": 1.24, + "learning_rate": 2.9210425450364124e-05, + "loss": 1.2492, + "step": 3230 + }, + { + "epoch": 1.24, + "learning_rate": 2.919764916315319e-05, + "loss": 1.2045, + "step": 3240 + }, + { + "epoch": 1.25, + "learning_rate": 2.9184872875942252e-05, + "loss": 1.2977, + "step": 3250 + }, + { + "epoch": 1.25, + "learning_rate": 2.9172096588731314e-05, + "loss": 1.1696, + "step": 3260 + }, + { + "epoch": 1.25, + "learning_rate": 2.915932030152038e-05, + "loss": 1.1654, + "step": 3270 + }, + { + "epoch": 1.26, + "learning_rate": 2.9146544014309442e-05, + "loss": 1.2436, + "step": 3280 + }, + { + "epoch": 1.26, + "learning_rate": 2.9133767727098508e-05, + "loss": 1.2246, + "step": 3290 + }, + { + "epoch": 1.26, + "learning_rate": 2.9120991439887567e-05, + "loss": 1.1985, + "step": 3300 + }, + { + "epoch": 1.27, + "learning_rate": 2.9108215152676633e-05, + "loss": 1.2358, + "step": 3310 + }, + { + "epoch": 1.27, + "learning_rate": 2.9095438865465695e-05, + "loss": 1.1543, + "step": 3320 + }, + { + "epoch": 1.28, + "learning_rate": 2.908266257825476e-05, + "loss": 1.2323, + "step": 3330 + }, + { + "epoch": 1.28, + "learning_rate": 2.9069886291043823e-05, + "loss": 1.1982, + "step": 3340 + }, + { + "epoch": 1.28, + "learning_rate": 2.9057110003832886e-05, + "loss": 1.2133, + "step": 3350 + }, + { + "epoch": 1.29, + "learning_rate": 2.9044333716621948e-05, + "loss": 1.2035, + "step": 3360 + }, + { + "epoch": 1.29, + "learning_rate": 2.9031557429411014e-05, + "loss": 1.1577, + "step": 3370 + }, + { + "epoch": 1.3, + "learning_rate": 2.9018781142200076e-05, + "loss": 1.2358, + "step": 3380 + }, + { + "epoch": 1.3, + "learning_rate": 2.9006004854989142e-05, + "loss": 1.1823, + "step": 3390 + }, + { + "epoch": 1.3, + "learning_rate": 2.8993228567778204e-05, + "loss": 1.1481, + "step": 3400 + }, + { + "epoch": 1.31, + "learning_rate": 2.8980452280567267e-05, + "loss": 1.2236, + "step": 3410 + }, + { + "epoch": 1.31, + "learning_rate": 2.8967675993356332e-05, + "loss": 1.176, + "step": 3420 + }, + { + "epoch": 1.31, + "learning_rate": 2.8954899706145395e-05, + "loss": 1.2002, + "step": 3430 + }, + { + "epoch": 1.32, + "learning_rate": 2.894212341893446e-05, + "loss": 1.1854, + "step": 3440 + }, + { + "epoch": 1.32, + "learning_rate": 2.8929347131723523e-05, + "loss": 1.2288, + "step": 3450 + }, + { + "epoch": 1.33, + "learning_rate": 2.8916570844512585e-05, + "loss": 1.2526, + "step": 3460 + }, + { + "epoch": 1.33, + "learning_rate": 2.8903794557301648e-05, + "loss": 1.23, + "step": 3470 + }, + { + "epoch": 1.33, + "learning_rate": 2.8891018270090713e-05, + "loss": 1.2538, + "step": 3480 + }, + { + "epoch": 1.34, + "learning_rate": 2.8878241982879776e-05, + "loss": 1.1736, + "step": 3490 + }, + { + "epoch": 1.34, + "learning_rate": 2.886546569566884e-05, + "loss": 1.2168, + "step": 3500 + }, + { + "epoch": 1.35, + "learning_rate": 2.88526894084579e-05, + "loss": 1.2636, + "step": 3510 + }, + { + "epoch": 1.35, + "learning_rate": 2.8839913121246966e-05, + "loss": 1.1677, + "step": 3520 + }, + { + "epoch": 1.35, + "learning_rate": 2.882713683403603e-05, + "loss": 1.2067, + "step": 3530 + }, + { + "epoch": 1.36, + "learning_rate": 2.8814360546825094e-05, + "loss": 1.2135, + "step": 3540 + }, + { + "epoch": 1.36, + "learning_rate": 2.8801584259614157e-05, + "loss": 1.22, + "step": 3550 + }, + { + "epoch": 1.36, + "learning_rate": 2.878880797240322e-05, + "loss": 1.1231, + "step": 3560 + }, + { + "epoch": 1.37, + "learning_rate": 2.877603168519228e-05, + "loss": 1.2263, + "step": 3570 + }, + { + "epoch": 1.37, + "learning_rate": 2.8763255397981347e-05, + "loss": 1.1942, + "step": 3580 + }, + { + "epoch": 1.38, + "learning_rate": 2.8750479110770413e-05, + "loss": 1.2248, + "step": 3590 + }, + { + "epoch": 1.38, + "learning_rate": 2.8737702823559475e-05, + "loss": 1.2274, + "step": 3600 + }, + { + "epoch": 1.38, + "learning_rate": 2.8724926536348538e-05, + "loss": 1.1893, + "step": 3610 + }, + { + "epoch": 1.39, + "learning_rate": 2.87121502491376e-05, + "loss": 1.1591, + "step": 3620 + }, + { + "epoch": 1.39, + "learning_rate": 2.8699373961926666e-05, + "loss": 1.2181, + "step": 3630 + }, + { + "epoch": 1.4, + "learning_rate": 2.8686597674715728e-05, + "loss": 1.2559, + "step": 3640 + }, + { + "epoch": 1.4, + "learning_rate": 2.8673821387504794e-05, + "loss": 1.2094, + "step": 3650 + }, + { + "epoch": 1.4, + "learning_rate": 2.8661045100293853e-05, + "loss": 1.1637, + "step": 3660 + }, + { + "epoch": 1.41, + "learning_rate": 2.864826881308292e-05, + "loss": 1.2916, + "step": 3670 + }, + { + "epoch": 1.41, + "learning_rate": 2.863549252587198e-05, + "loss": 1.2407, + "step": 3680 + }, + { + "epoch": 1.41, + "learning_rate": 2.8622716238661047e-05, + "loss": 1.2177, + "step": 3690 + }, + { + "epoch": 1.42, + "learning_rate": 2.860993995145011e-05, + "loss": 1.1724, + "step": 3700 + }, + { + "epoch": 1.42, + "learning_rate": 2.859716366423917e-05, + "loss": 1.1939, + "step": 3710 + }, + { + "epoch": 1.43, + "learning_rate": 2.8584387377028234e-05, + "loss": 1.1435, + "step": 3720 + }, + { + "epoch": 1.43, + "learning_rate": 2.85716110898173e-05, + "loss": 1.1551, + "step": 3730 + }, + { + "epoch": 1.43, + "learning_rate": 2.8558834802606362e-05, + "loss": 1.1566, + "step": 3740 + }, + { + "epoch": 1.44, + "learning_rate": 2.8546058515395428e-05, + "loss": 1.1552, + "step": 3750 + }, + { + "epoch": 1.44, + "learning_rate": 2.8533282228184493e-05, + "loss": 1.2611, + "step": 3760 + }, + { + "epoch": 1.44, + "learning_rate": 2.8520505940973552e-05, + "loss": 1.2098, + "step": 3770 + }, + { + "epoch": 1.45, + "learning_rate": 2.8507729653762618e-05, + "loss": 1.2531, + "step": 3780 + }, + { + "epoch": 1.45, + "learning_rate": 2.849495336655168e-05, + "loss": 1.244, + "step": 3790 + }, + { + "epoch": 1.46, + "learning_rate": 2.8482177079340746e-05, + "loss": 1.1791, + "step": 3800 + }, + { + "epoch": 1.46, + "learning_rate": 2.846940079212981e-05, + "loss": 1.1851, + "step": 3810 + }, + { + "epoch": 1.46, + "learning_rate": 2.845662450491887e-05, + "loss": 1.2081, + "step": 3820 + }, + { + "epoch": 1.47, + "learning_rate": 2.8443848217707933e-05, + "loss": 1.1826, + "step": 3830 + }, + { + "epoch": 1.47, + "learning_rate": 2.8431071930497e-05, + "loss": 1.1684, + "step": 3840 + }, + { + "epoch": 1.48, + "learning_rate": 2.841829564328606e-05, + "loss": 1.1964, + "step": 3850 + }, + { + "epoch": 1.48, + "learning_rate": 2.8405519356075127e-05, + "loss": 1.2004, + "step": 3860 + }, + { + "epoch": 1.48, + "learning_rate": 2.8392743068864186e-05, + "loss": 1.197, + "step": 3870 + }, + { + "epoch": 1.49, + "learning_rate": 2.8379966781653252e-05, + "loss": 1.1414, + "step": 3880 + }, + { + "epoch": 1.49, + "learning_rate": 2.8367190494442314e-05, + "loss": 1.2399, + "step": 3890 + }, + { + "epoch": 1.49, + "learning_rate": 2.835441420723138e-05, + "loss": 1.1402, + "step": 3900 + }, + { + "epoch": 1.5, + "learning_rate": 2.8341637920020442e-05, + "loss": 1.0983, + "step": 3910 + }, + { + "epoch": 1.5, + "learning_rate": 2.8328861632809505e-05, + "loss": 1.1021, + "step": 3920 + }, + { + "epoch": 1.51, + "learning_rate": 2.831608534559857e-05, + "loss": 1.0821, + "step": 3930 + }, + { + "epoch": 1.51, + "learning_rate": 2.8303309058387633e-05, + "loss": 1.2289, + "step": 3940 + }, + { + "epoch": 1.51, + "learning_rate": 2.82905327711767e-05, + "loss": 1.2545, + "step": 3950 + }, + { + "epoch": 1.52, + "learning_rate": 2.827775648396576e-05, + "loss": 1.2209, + "step": 3960 + }, + { + "epoch": 1.52, + "learning_rate": 2.8264980196754823e-05, + "loss": 1.1316, + "step": 3970 + }, + { + "epoch": 1.53, + "learning_rate": 2.8252203909543886e-05, + "loss": 1.141, + "step": 3980 + }, + { + "epoch": 1.53, + "learning_rate": 2.823942762233295e-05, + "loss": 1.2099, + "step": 3990 + }, + { + "epoch": 1.53, + "learning_rate": 2.8226651335122014e-05, + "loss": 1.2144, + "step": 4000 + }, + { + "epoch": 1.54, + "learning_rate": 2.821387504791108e-05, + "loss": 1.0854, + "step": 4010 + }, + { + "epoch": 1.54, + "learning_rate": 2.820109876070014e-05, + "loss": 1.1082, + "step": 4020 + }, + { + "epoch": 1.54, + "learning_rate": 2.8188322473489204e-05, + "loss": 1.1284, + "step": 4030 + }, + { + "epoch": 1.55, + "learning_rate": 2.8175546186278267e-05, + "loss": 1.1415, + "step": 4040 + }, + { + "epoch": 1.55, + "learning_rate": 2.8162769899067332e-05, + "loss": 1.0973, + "step": 4050 + }, + { + "epoch": 1.56, + "learning_rate": 2.8149993611856395e-05, + "loss": 1.154, + "step": 4060 + }, + { + "epoch": 1.56, + "learning_rate": 2.813721732464546e-05, + "loss": 1.2014, + "step": 4070 + }, + { + "epoch": 1.56, + "learning_rate": 2.812444103743452e-05, + "loss": 1.1277, + "step": 4080 + }, + { + "epoch": 1.57, + "learning_rate": 2.8111664750223585e-05, + "loss": 1.15, + "step": 4090 + }, + { + "epoch": 1.57, + "learning_rate": 2.809888846301265e-05, + "loss": 1.1828, + "step": 4100 + }, + { + "epoch": 1.58, + "learning_rate": 2.8086112175801713e-05, + "loss": 1.1453, + "step": 4110 + }, + { + "epoch": 1.58, + "learning_rate": 2.807333588859078e-05, + "loss": 1.2141, + "step": 4120 + }, + { + "epoch": 1.58, + "learning_rate": 2.8060559601379838e-05, + "loss": 1.1202, + "step": 4130 + }, + { + "epoch": 1.59, + "learning_rate": 2.8047783314168904e-05, + "loss": 1.2477, + "step": 4140 + }, + { + "epoch": 1.59, + "learning_rate": 2.8035007026957966e-05, + "loss": 1.17, + "step": 4150 + }, + { + "epoch": 1.59, + "learning_rate": 2.8022230739747032e-05, + "loss": 1.1806, + "step": 4160 + }, + { + "epoch": 1.6, + "learning_rate": 2.8009454452536094e-05, + "loss": 1.1241, + "step": 4170 + }, + { + "epoch": 1.6, + "learning_rate": 2.7996678165325157e-05, + "loss": 1.1069, + "step": 4180 + }, + { + "epoch": 1.61, + "learning_rate": 2.798390187811422e-05, + "loss": 1.1165, + "step": 4190 + }, + { + "epoch": 1.61, + "learning_rate": 2.7971125590903285e-05, + "loss": 1.149, + "step": 4200 + }, + { + "epoch": 1.61, + "learning_rate": 2.7958349303692347e-05, + "loss": 1.1921, + "step": 4210 + }, + { + "epoch": 1.62, + "learning_rate": 2.7945573016481413e-05, + "loss": 1.1559, + "step": 4220 + }, + { + "epoch": 1.62, + "learning_rate": 2.7932796729270472e-05, + "loss": 1.1451, + "step": 4230 + }, + { + "epoch": 1.63, + "learning_rate": 2.7920020442059538e-05, + "loss": 1.1556, + "step": 4240 + }, + { + "epoch": 1.63, + "learning_rate": 2.79072441548486e-05, + "loss": 1.1168, + "step": 4250 + }, + { + "epoch": 1.63, + "learning_rate": 2.7894467867637666e-05, + "loss": 1.1272, + "step": 4260 + }, + { + "epoch": 1.64, + "learning_rate": 2.788169158042673e-05, + "loss": 1.1107, + "step": 4270 + }, + { + "epoch": 1.64, + "learning_rate": 2.786891529321579e-05, + "loss": 1.0901, + "step": 4280 + }, + { + "epoch": 1.64, + "learning_rate": 2.7856139006004856e-05, + "loss": 1.073, + "step": 4290 + }, + { + "epoch": 1.65, + "learning_rate": 2.784336271879392e-05, + "loss": 1.15, + "step": 4300 + }, + { + "epoch": 1.65, + "learning_rate": 2.7830586431582984e-05, + "loss": 1.1264, + "step": 4310 + }, + { + "epoch": 1.66, + "learning_rate": 2.7817810144372047e-05, + "loss": 1.2153, + "step": 4320 + }, + { + "epoch": 1.66, + "learning_rate": 2.780503385716111e-05, + "loss": 1.147, + "step": 4330 + }, + { + "epoch": 1.66, + "learning_rate": 2.779225756995017e-05, + "loss": 1.132, + "step": 4340 + }, + { + "epoch": 1.67, + "learning_rate": 2.7779481282739237e-05, + "loss": 1.1926, + "step": 4350 + }, + { + "epoch": 1.67, + "learning_rate": 2.77667049955283e-05, + "loss": 1.147, + "step": 4360 + }, + { + "epoch": 1.67, + "learning_rate": 2.7753928708317365e-05, + "loss": 1.2182, + "step": 4370 + }, + { + "epoch": 1.68, + "learning_rate": 2.7741152421106428e-05, + "loss": 1.2095, + "step": 4380 + }, + { + "epoch": 1.68, + "learning_rate": 2.772837613389549e-05, + "loss": 1.1776, + "step": 4390 + }, + { + "epoch": 1.69, + "learning_rate": 2.7715599846684552e-05, + "loss": 1.0947, + "step": 4400 + }, + { + "epoch": 1.69, + "learning_rate": 2.7702823559473618e-05, + "loss": 1.0506, + "step": 4410 + }, + { + "epoch": 1.69, + "learning_rate": 2.769004727226268e-05, + "loss": 1.1986, + "step": 4420 + }, + { + "epoch": 1.7, + "learning_rate": 2.7677270985051746e-05, + "loss": 1.1531, + "step": 4430 + }, + { + "epoch": 1.7, + "learning_rate": 2.766449469784081e-05, + "loss": 1.224, + "step": 4440 + }, + { + "epoch": 1.71, + "learning_rate": 2.765171841062987e-05, + "loss": 1.1256, + "step": 4450 + }, + { + "epoch": 1.71, + "learning_rate": 2.7638942123418937e-05, + "loss": 1.2251, + "step": 4460 + }, + { + "epoch": 1.71, + "learning_rate": 2.7626165836208e-05, + "loss": 1.196, + "step": 4470 + }, + { + "epoch": 1.72, + "learning_rate": 2.7613389548997065e-05, + "loss": 1.1201, + "step": 4480 + }, + { + "epoch": 1.72, + "learning_rate": 2.7600613261786124e-05, + "loss": 1.1389, + "step": 4490 + }, + { + "epoch": 1.72, + "learning_rate": 2.758783697457519e-05, + "loss": 1.1334, + "step": 4500 + }, + { + "epoch": 1.73, + "learning_rate": 2.7575060687364252e-05, + "loss": 1.1685, + "step": 4510 + }, + { + "epoch": 1.73, + "learning_rate": 2.7562284400153318e-05, + "loss": 1.1864, + "step": 4520 + }, + { + "epoch": 1.74, + "learning_rate": 2.754950811294238e-05, + "loss": 1.177, + "step": 4530 + }, + { + "epoch": 1.74, + "learning_rate": 2.7536731825731442e-05, + "loss": 1.1563, + "step": 4540 + }, + { + "epoch": 1.74, + "learning_rate": 2.7523955538520505e-05, + "loss": 1.187, + "step": 4550 + }, + { + "epoch": 1.75, + "learning_rate": 2.751117925130957e-05, + "loss": 1.1834, + "step": 4560 + }, + { + "epoch": 1.75, + "learning_rate": 2.7498402964098633e-05, + "loss": 1.1543, + "step": 4570 + }, + { + "epoch": 1.76, + "learning_rate": 2.74856266768877e-05, + "loss": 1.1232, + "step": 4580 + }, + { + "epoch": 1.76, + "learning_rate": 2.7472850389676758e-05, + "loss": 1.115, + "step": 4590 + }, + { + "epoch": 1.76, + "learning_rate": 2.7460074102465823e-05, + "loss": 1.1729, + "step": 4600 + }, + { + "epoch": 1.77, + "learning_rate": 2.744729781525489e-05, + "loss": 1.1257, + "step": 4610 + }, + { + "epoch": 1.77, + "learning_rate": 2.743452152804395e-05, + "loss": 1.1555, + "step": 4620 + }, + { + "epoch": 1.77, + "learning_rate": 2.7421745240833017e-05, + "loss": 1.1492, + "step": 4630 + }, + { + "epoch": 1.78, + "learning_rate": 2.7408968953622076e-05, + "loss": 1.1116, + "step": 4640 + }, + { + "epoch": 1.78, + "learning_rate": 2.7396192666411142e-05, + "loss": 1.0887, + "step": 4650 + }, + { + "epoch": 1.79, + "learning_rate": 2.7383416379200204e-05, + "loss": 1.1007, + "step": 4660 + }, + { + "epoch": 1.79, + "learning_rate": 2.737064009198927e-05, + "loss": 1.0951, + "step": 4670 + }, + { + "epoch": 1.79, + "learning_rate": 2.7357863804778332e-05, + "loss": 1.0771, + "step": 4680 + }, + { + "epoch": 1.8, + "learning_rate": 2.7345087517567395e-05, + "loss": 1.1075, + "step": 4690 + }, + { + "epoch": 1.8, + "learning_rate": 2.7333588859077555e-05, + "loss": 1.2025, + "step": 4700 + }, + { + "epoch": 1.81, + "learning_rate": 2.7320812571866617e-05, + "loss": 1.1542, + "step": 4710 + }, + { + "epoch": 1.81, + "learning_rate": 2.730803628465568e-05, + "loss": 1.1391, + "step": 4720 + }, + { + "epoch": 1.81, + "learning_rate": 2.7295259997444742e-05, + "loss": 1.168, + "step": 4730 + }, + { + "epoch": 1.82, + "learning_rate": 2.7282483710233808e-05, + "loss": 1.0547, + "step": 4740 + }, + { + "epoch": 1.82, + "learning_rate": 2.726970742302287e-05, + "loss": 1.0872, + "step": 4750 + }, + { + "epoch": 1.82, + "learning_rate": 2.7256931135811936e-05, + "loss": 1.1366, + "step": 4760 + }, + { + "epoch": 1.83, + "learning_rate": 2.7244154848600995e-05, + "loss": 1.1685, + "step": 4770 + }, + { + "epoch": 1.83, + "learning_rate": 2.723137856139006e-05, + "loss": 1.1861, + "step": 4780 + }, + { + "epoch": 1.84, + "learning_rate": 2.7218602274179123e-05, + "loss": 1.1452, + "step": 4790 + }, + { + "epoch": 1.84, + "learning_rate": 2.720582598696819e-05, + "loss": 1.1779, + "step": 4800 + }, + { + "epoch": 1.84, + "learning_rate": 2.719304969975725e-05, + "loss": 1.0635, + "step": 4810 + }, + { + "epoch": 1.85, + "learning_rate": 2.7180273412546314e-05, + "loss": 1.045, + "step": 4820 + }, + { + "epoch": 1.85, + "learning_rate": 2.716749712533538e-05, + "loss": 1.2208, + "step": 4830 + }, + { + "epoch": 1.86, + "learning_rate": 2.715472083812444e-05, + "loss": 1.1713, + "step": 4840 + }, + { + "epoch": 1.86, + "learning_rate": 2.7141944550913507e-05, + "loss": 1.1346, + "step": 4850 + }, + { + "epoch": 1.86, + "learning_rate": 2.712916826370257e-05, + "loss": 1.1371, + "step": 4860 + }, + { + "epoch": 1.87, + "learning_rate": 2.7116391976491632e-05, + "loss": 1.0715, + "step": 4870 + }, + { + "epoch": 1.87, + "learning_rate": 2.7103615689280694e-05, + "loss": 1.1294, + "step": 4880 + }, + { + "epoch": 1.87, + "learning_rate": 2.709083940206976e-05, + "loss": 1.0605, + "step": 4890 + }, + { + "epoch": 1.88, + "learning_rate": 2.7078063114858823e-05, + "loss": 1.131, + "step": 4900 + }, + { + "epoch": 1.88, + "learning_rate": 2.706528682764789e-05, + "loss": 1.1283, + "step": 4910 + }, + { + "epoch": 1.89, + "learning_rate": 2.7052510540436947e-05, + "loss": 1.1328, + "step": 4920 + }, + { + "epoch": 1.89, + "learning_rate": 2.7039734253226013e-05, + "loss": 1.0773, + "step": 4930 + }, + { + "epoch": 1.89, + "learning_rate": 2.7026957966015075e-05, + "loss": 1.1057, + "step": 4940 + }, + { + "epoch": 1.9, + "learning_rate": 2.701418167880414e-05, + "loss": 1.1657, + "step": 4950 + }, + { + "epoch": 1.9, + "learning_rate": 2.7001405391593204e-05, + "loss": 1.1232, + "step": 4960 + }, + { + "epoch": 1.9, + "learning_rate": 2.6988629104382266e-05, + "loss": 1.1655, + "step": 4970 + }, + { + "epoch": 1.91, + "learning_rate": 2.6975852817171328e-05, + "loss": 1.0693, + "step": 4980 + }, + { + "epoch": 1.91, + "learning_rate": 2.6963076529960394e-05, + "loss": 1.0329, + "step": 4990 + }, + { + "epoch": 1.92, + "learning_rate": 2.6950300242749456e-05, + "loss": 1.1258, + "step": 5000 + }, + { + "epoch": 1.92, + "learning_rate": 2.6937523955538522e-05, + "loss": 1.1272, + "step": 5010 + }, + { + "epoch": 1.92, + "learning_rate": 2.6924747668327585e-05, + "loss": 1.1643, + "step": 5020 + }, + { + "epoch": 1.93, + "learning_rate": 2.6911971381116647e-05, + "loss": 1.1449, + "step": 5030 + }, + { + "epoch": 1.93, + "learning_rate": 2.6899195093905713e-05, + "loss": 1.1833, + "step": 5040 + }, + { + "epoch": 1.94, + "learning_rate": 2.6886418806694775e-05, + "loss": 1.0866, + "step": 5050 + }, + { + "epoch": 1.94, + "learning_rate": 2.687364251948384e-05, + "loss": 1.0456, + "step": 5060 + }, + { + "epoch": 1.94, + "learning_rate": 2.6860866232272903e-05, + "loss": 1.1877, + "step": 5070 + }, + { + "epoch": 1.95, + "learning_rate": 2.6848089945061965e-05, + "loss": 1.0884, + "step": 5080 + }, + { + "epoch": 1.95, + "learning_rate": 2.6835313657851028e-05, + "loss": 1.1218, + "step": 5090 + }, + { + "epoch": 1.95, + "learning_rate": 2.6822537370640094e-05, + "loss": 1.1494, + "step": 5100 + }, + { + "epoch": 1.96, + "learning_rate": 2.6809761083429156e-05, + "loss": 1.1362, + "step": 5110 + }, + { + "epoch": 1.96, + "learning_rate": 2.679698479621822e-05, + "loss": 1.1797, + "step": 5120 + }, + { + "epoch": 1.97, + "learning_rate": 2.678420850900728e-05, + "loss": 1.096, + "step": 5130 + }, + { + "epoch": 1.97, + "learning_rate": 2.6771432221796346e-05, + "loss": 1.0352, + "step": 5140 + }, + { + "epoch": 1.97, + "learning_rate": 2.675865593458541e-05, + "loss": 1.0766, + "step": 5150 + }, + { + "epoch": 1.98, + "learning_rate": 2.6745879647374475e-05, + "loss": 1.0958, + "step": 5160 + }, + { + "epoch": 1.98, + "learning_rate": 2.6733103360163537e-05, + "loss": 1.0997, + "step": 5170 + }, + { + "epoch": 1.99, + "learning_rate": 2.67203270729526e-05, + "loss": 1.2081, + "step": 5180 + }, + { + "epoch": 1.99, + "learning_rate": 2.6707550785741665e-05, + "loss": 1.0706, + "step": 5190 + }, + { + "epoch": 1.99, + "learning_rate": 2.6694774498530727e-05, + "loss": 1.1818, + "step": 5200 + }, + { + "epoch": 2.0, + "learning_rate": 2.6681998211319793e-05, + "loss": 1.1918, + "step": 5210 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.6628177179407326, + "eval_loss": 0.9315594434738159, + "eval_runtime": 299.8356, + "eval_samples_per_second": 139.22, + "eval_steps_per_second": 69.611, + "step": 5218 + }, + { + "epoch": 2.0, + "learning_rate": 2.6669221924108855e-05, + "loss": 1.1672, + "step": 5220 + }, + { + "epoch": 2.0, + "learning_rate": 2.6656445636897918e-05, + "loss": 1.1412, + "step": 5230 + }, + { + "epoch": 2.01, + "learning_rate": 2.664366934968698e-05, + "loss": 1.0237, + "step": 5240 + }, + { + "epoch": 2.01, + "learning_rate": 2.6630893062476046e-05, + "loss": 1.1565, + "step": 5250 + }, + { + "epoch": 2.02, + "learning_rate": 2.661811677526511e-05, + "loss": 1.1093, + "step": 5260 + }, + { + "epoch": 2.02, + "learning_rate": 2.6605340488054174e-05, + "loss": 1.0703, + "step": 5270 + }, + { + "epoch": 2.02, + "learning_rate": 2.6592564200843233e-05, + "loss": 1.122, + "step": 5280 + }, + { + "epoch": 2.03, + "learning_rate": 2.65797879136323e-05, + "loss": 1.0192, + "step": 5290 + }, + { + "epoch": 2.03, + "learning_rate": 2.656701162642136e-05, + "loss": 1.1086, + "step": 5300 + }, + { + "epoch": 2.04, + "learning_rate": 2.6554235339210427e-05, + "loss": 1.0331, + "step": 5310 + }, + { + "epoch": 2.04, + "learning_rate": 2.654145905199949e-05, + "loss": 1.122, + "step": 5320 + }, + { + "epoch": 2.04, + "learning_rate": 2.652868276478855e-05, + "loss": 1.1772, + "step": 5330 + }, + { + "epoch": 2.05, + "learning_rate": 2.6515906477577614e-05, + "loss": 1.2321, + "step": 5340 + }, + { + "epoch": 2.05, + "learning_rate": 2.650313019036668e-05, + "loss": 1.0778, + "step": 5350 + }, + { + "epoch": 2.05, + "learning_rate": 2.6490353903155745e-05, + "loss": 1.111, + "step": 5360 + }, + { + "epoch": 2.06, + "learning_rate": 2.6477577615944808e-05, + "loss": 1.206, + "step": 5370 + }, + { + "epoch": 2.06, + "learning_rate": 2.6464801328733874e-05, + "loss": 1.0628, + "step": 5380 + }, + { + "epoch": 2.07, + "learning_rate": 2.6452025041522933e-05, + "loss": 1.0674, + "step": 5390 + }, + { + "epoch": 2.07, + "learning_rate": 2.6439248754312e-05, + "loss": 1.1364, + "step": 5400 + }, + { + "epoch": 2.07, + "learning_rate": 2.642647246710106e-05, + "loss": 1.0581, + "step": 5410 + }, + { + "epoch": 2.08, + "learning_rate": 2.6413696179890126e-05, + "loss": 1.0979, + "step": 5420 + }, + { + "epoch": 2.08, + "learning_rate": 2.640091989267919e-05, + "loss": 1.0712, + "step": 5430 + }, + { + "epoch": 2.09, + "learning_rate": 2.638814360546825e-05, + "loss": 1.1597, + "step": 5440 + }, + { + "epoch": 2.09, + "learning_rate": 2.6375367318257314e-05, + "loss": 1.1, + "step": 5450 + }, + { + "epoch": 2.09, + "learning_rate": 2.636259103104638e-05, + "loss": 1.1021, + "step": 5460 + }, + { + "epoch": 2.1, + "learning_rate": 2.634981474383544e-05, + "loss": 1.199, + "step": 5470 + }, + { + "epoch": 2.1, + "learning_rate": 2.6337038456624507e-05, + "loss": 1.1968, + "step": 5480 + }, + { + "epoch": 2.1, + "learning_rate": 2.6324262169413566e-05, + "loss": 1.1201, + "step": 5490 + }, + { + "epoch": 2.11, + "learning_rate": 2.6311485882202632e-05, + "loss": 1.0864, + "step": 5500 + }, + { + "epoch": 2.11, + "learning_rate": 2.6298709594991694e-05, + "loss": 1.1718, + "step": 5510 + }, + { + "epoch": 2.12, + "learning_rate": 2.628593330778076e-05, + "loss": 1.1743, + "step": 5520 + }, + { + "epoch": 2.12, + "learning_rate": 2.6273157020569826e-05, + "loss": 1.0378, + "step": 5530 + }, + { + "epoch": 2.12, + "learning_rate": 2.6260380733358885e-05, + "loss": 1.0786, + "step": 5540 + }, + { + "epoch": 2.13, + "learning_rate": 2.624760444614795e-05, + "loss": 1.1166, + "step": 5550 + }, + { + "epoch": 2.13, + "learning_rate": 2.6234828158937013e-05, + "loss": 1.0256, + "step": 5560 + }, + { + "epoch": 2.13, + "learning_rate": 2.622205187172608e-05, + "loss": 1.0558, + "step": 5570 + }, + { + "epoch": 2.14, + "learning_rate": 2.620927558451514e-05, + "loss": 1.1041, + "step": 5580 + }, + { + "epoch": 2.14, + "learning_rate": 2.6196499297304204e-05, + "loss": 1.1053, + "step": 5590 + }, + { + "epoch": 2.15, + "learning_rate": 2.6183723010093266e-05, + "loss": 1.18, + "step": 5600 + }, + { + "epoch": 2.15, + "learning_rate": 2.617094672288233e-05, + "loss": 1.1121, + "step": 5610 + }, + { + "epoch": 2.15, + "learning_rate": 2.6158170435671394e-05, + "loss": 1.1014, + "step": 5620 + }, + { + "epoch": 2.16, + "learning_rate": 2.614539414846046e-05, + "loss": 1.0429, + "step": 5630 + }, + { + "epoch": 2.16, + "learning_rate": 2.613261786124952e-05, + "loss": 1.0736, + "step": 5640 + }, + { + "epoch": 2.17, + "learning_rate": 2.6119841574038584e-05, + "loss": 1.0831, + "step": 5650 + }, + { + "epoch": 2.17, + "learning_rate": 2.6107065286827647e-05, + "loss": 1.1652, + "step": 5660 + }, + { + "epoch": 2.17, + "learning_rate": 2.6094288999616713e-05, + "loss": 1.0742, + "step": 5670 + }, + { + "epoch": 2.18, + "learning_rate": 2.6081512712405775e-05, + "loss": 1.1007, + "step": 5680 + }, + { + "epoch": 2.18, + "learning_rate": 2.6068736425194837e-05, + "loss": 1.1308, + "step": 5690 + }, + { + "epoch": 2.18, + "learning_rate": 2.6055960137983903e-05, + "loss": 1.0248, + "step": 5700 + }, + { + "epoch": 2.19, + "learning_rate": 2.6043183850772965e-05, + "loss": 1.0279, + "step": 5710 + }, + { + "epoch": 2.19, + "learning_rate": 2.603040756356203e-05, + "loss": 1.1586, + "step": 5720 + }, + { + "epoch": 2.2, + "learning_rate": 2.6017631276351094e-05, + "loss": 1.0694, + "step": 5730 + }, + { + "epoch": 2.2, + "learning_rate": 2.600485498914016e-05, + "loss": 1.1462, + "step": 5740 + }, + { + "epoch": 2.2, + "learning_rate": 2.5992078701929218e-05, + "loss": 1.1396, + "step": 5750 + }, + { + "epoch": 2.21, + "learning_rate": 2.5979302414718284e-05, + "loss": 1.1173, + "step": 5760 + }, + { + "epoch": 2.21, + "learning_rate": 2.5966526127507346e-05, + "loss": 1.1395, + "step": 5770 + }, + { + "epoch": 2.22, + "learning_rate": 2.5953749840296412e-05, + "loss": 1.0256, + "step": 5780 + }, + { + "epoch": 2.22, + "learning_rate": 2.5940973553085475e-05, + "loss": 1.1937, + "step": 5790 + }, + { + "epoch": 2.22, + "learning_rate": 2.5928197265874537e-05, + "loss": 1.1789, + "step": 5800 + }, + { + "epoch": 2.23, + "learning_rate": 2.59154209786636e-05, + "loss": 1.1477, + "step": 5810 + }, + { + "epoch": 2.23, + "learning_rate": 2.5902644691452665e-05, + "loss": 1.1234, + "step": 5820 + }, + { + "epoch": 2.23, + "learning_rate": 2.5889868404241727e-05, + "loss": 1.1487, + "step": 5830 + }, + { + "epoch": 2.24, + "learning_rate": 2.5877092117030793e-05, + "loss": 1.0304, + "step": 5840 + }, + { + "epoch": 2.24, + "learning_rate": 2.5864315829819852e-05, + "loss": 1.1853, + "step": 5850 + }, + { + "epoch": 2.25, + "learning_rate": 2.5851539542608918e-05, + "loss": 1.1154, + "step": 5860 + }, + { + "epoch": 2.25, + "learning_rate": 2.5838763255397984e-05, + "loss": 1.0777, + "step": 5870 + }, + { + "epoch": 2.25, + "learning_rate": 2.5825986968187046e-05, + "loss": 1.0639, + "step": 5880 + }, + { + "epoch": 2.26, + "learning_rate": 2.581321068097611e-05, + "loss": 1.0546, + "step": 5890 + }, + { + "epoch": 2.26, + "learning_rate": 2.580043439376517e-05, + "loss": 1.0165, + "step": 5900 + }, + { + "epoch": 2.27, + "learning_rate": 2.5787658106554236e-05, + "loss": 1.1897, + "step": 5910 + }, + { + "epoch": 2.27, + "learning_rate": 2.57748818193433e-05, + "loss": 1.1065, + "step": 5920 + }, + { + "epoch": 2.27, + "learning_rate": 2.5762105532132365e-05, + "loss": 1.0956, + "step": 5930 + }, + { + "epoch": 2.28, + "learning_rate": 2.5749329244921427e-05, + "loss": 1.0996, + "step": 5940 + }, + { + "epoch": 2.28, + "learning_rate": 2.573655295771049e-05, + "loss": 1.0707, + "step": 5950 + }, + { + "epoch": 2.28, + "learning_rate": 2.572377667049955e-05, + "loss": 1.1295, + "step": 5960 + }, + { + "epoch": 2.29, + "learning_rate": 2.5711000383288617e-05, + "loss": 1.0654, + "step": 5970 + }, + { + "epoch": 2.29, + "learning_rate": 2.569822409607768e-05, + "loss": 1.1163, + "step": 5980 + }, + { + "epoch": 2.3, + "learning_rate": 2.5685447808866745e-05, + "loss": 1.1043, + "step": 5990 + }, + { + "epoch": 2.3, + "learning_rate": 2.5672671521655804e-05, + "loss": 1.1324, + "step": 6000 + }, + { + "epoch": 2.3, + "learning_rate": 2.565989523444487e-05, + "loss": 1.082, + "step": 6010 + }, + { + "epoch": 2.31, + "learning_rate": 2.5647118947233933e-05, + "loss": 1.1027, + "step": 6020 + }, + { + "epoch": 2.31, + "learning_rate": 2.5634342660023e-05, + "loss": 1.0954, + "step": 6030 + }, + { + "epoch": 2.32, + "learning_rate": 2.5621566372812064e-05, + "loss": 1.0809, + "step": 6040 + }, + { + "epoch": 2.32, + "learning_rate": 2.5608790085601126e-05, + "loss": 1.0714, + "step": 6050 + }, + { + "epoch": 2.32, + "learning_rate": 2.559601379839019e-05, + "loss": 1.0627, + "step": 6060 + }, + { + "epoch": 2.33, + "learning_rate": 2.558323751117925e-05, + "loss": 1.0128, + "step": 6070 + }, + { + "epoch": 2.33, + "learning_rate": 2.5570461223968317e-05, + "loss": 1.0972, + "step": 6080 + }, + { + "epoch": 2.33, + "learning_rate": 2.555768493675738e-05, + "loss": 1.0176, + "step": 6090 + }, + { + "epoch": 2.34, + "learning_rate": 2.5544908649546445e-05, + "loss": 1.0691, + "step": 6100 + }, + { + "epoch": 2.34, + "learning_rate": 2.5532132362335504e-05, + "loss": 1.0893, + "step": 6110 + }, + { + "epoch": 2.35, + "learning_rate": 2.551935607512457e-05, + "loss": 1.1509, + "step": 6120 + }, + { + "epoch": 2.35, + "learning_rate": 2.5506579787913632e-05, + "loss": 1.1002, + "step": 6130 + }, + { + "epoch": 2.35, + "learning_rate": 2.5493803500702698e-05, + "loss": 1.1349, + "step": 6140 + }, + { + "epoch": 2.36, + "learning_rate": 2.548102721349176e-05, + "loss": 1.0991, + "step": 6150 + }, + { + "epoch": 2.36, + "learning_rate": 2.5468250926280823e-05, + "loss": 1.1392, + "step": 6160 + }, + { + "epoch": 2.36, + "learning_rate": 2.5455474639069885e-05, + "loss": 1.0253, + "step": 6170 + }, + { + "epoch": 2.37, + "learning_rate": 2.544269835185895e-05, + "loss": 1.0333, + "step": 6180 + }, + { + "epoch": 2.37, + "learning_rate": 2.5429922064648013e-05, + "loss": 1.0522, + "step": 6190 + }, + { + "epoch": 2.38, + "learning_rate": 2.541714577743708e-05, + "loss": 1.0827, + "step": 6200 + }, + { + "epoch": 2.38, + "learning_rate": 2.540436949022614e-05, + "loss": 1.0338, + "step": 6210 + }, + { + "epoch": 2.38, + "learning_rate": 2.5391593203015204e-05, + "loss": 1.0513, + "step": 6220 + }, + { + "epoch": 2.39, + "learning_rate": 2.537881691580427e-05, + "loss": 1.0698, + "step": 6230 + }, + { + "epoch": 2.39, + "learning_rate": 2.536604062859333e-05, + "loss": 1.107, + "step": 6240 + }, + { + "epoch": 2.4, + "learning_rate": 2.5353264341382397e-05, + "loss": 1.0761, + "step": 6250 + }, + { + "epoch": 2.4, + "learning_rate": 2.5340488054171456e-05, + "loss": 1.1251, + "step": 6260 + }, + { + "epoch": 2.4, + "learning_rate": 2.5327711766960522e-05, + "loss": 1.0736, + "step": 6270 + }, + { + "epoch": 2.41, + "learning_rate": 2.5314935479749584e-05, + "loss": 1.0833, + "step": 6280 + }, + { + "epoch": 2.41, + "learning_rate": 2.530215919253865e-05, + "loss": 1.0835, + "step": 6290 + }, + { + "epoch": 2.41, + "learning_rate": 2.5289382905327713e-05, + "loss": 0.9721, + "step": 6300 + }, + { + "epoch": 2.42, + "learning_rate": 2.5276606618116775e-05, + "loss": 1.047, + "step": 6310 + }, + { + "epoch": 2.42, + "learning_rate": 2.5263830330905837e-05, + "loss": 1.1411, + "step": 6320 + }, + { + "epoch": 2.43, + "learning_rate": 2.5251054043694903e-05, + "loss": 1.0874, + "step": 6330 + }, + { + "epoch": 2.43, + "learning_rate": 2.5238277756483965e-05, + "loss": 1.14, + "step": 6340 + }, + { + "epoch": 2.43, + "learning_rate": 2.522550146927303e-05, + "loss": 1.1002, + "step": 6350 + }, + { + "epoch": 2.44, + "learning_rate": 2.5212725182062094e-05, + "loss": 1.0454, + "step": 6360 + }, + { + "epoch": 2.44, + "learning_rate": 2.5199948894851156e-05, + "loss": 1.1183, + "step": 6370 + }, + { + "epoch": 2.45, + "learning_rate": 2.518717260764022e-05, + "loss": 1.1313, + "step": 6380 + }, + { + "epoch": 2.45, + "learning_rate": 2.5174396320429284e-05, + "loss": 1.2169, + "step": 6390 + }, + { + "epoch": 2.45, + "learning_rate": 2.516162003321835e-05, + "loss": 1.0862, + "step": 6400 + }, + { + "epoch": 2.46, + "learning_rate": 2.5148843746007412e-05, + "loss": 1.0721, + "step": 6410 + }, + { + "epoch": 2.46, + "learning_rate": 2.5136067458796475e-05, + "loss": 1.0286, + "step": 6420 + }, + { + "epoch": 2.46, + "learning_rate": 2.5123291171585537e-05, + "loss": 1.073, + "step": 6430 + }, + { + "epoch": 2.47, + "learning_rate": 2.5110514884374603e-05, + "loss": 1.0554, + "step": 6440 + }, + { + "epoch": 2.47, + "learning_rate": 2.5097738597163665e-05, + "loss": 1.004, + "step": 6450 + }, + { + "epoch": 2.48, + "learning_rate": 2.508496230995273e-05, + "loss": 1.1401, + "step": 6460 + }, + { + "epoch": 2.48, + "learning_rate": 2.507218602274179e-05, + "loss": 1.0613, + "step": 6470 + }, + { + "epoch": 2.48, + "learning_rate": 2.5059409735530855e-05, + "loss": 1.0989, + "step": 6480 + }, + { + "epoch": 2.49, + "learning_rate": 2.5046633448319918e-05, + "loss": 1.1485, + "step": 6490 + }, + { + "epoch": 2.49, + "learning_rate": 2.5033857161108984e-05, + "loss": 1.0263, + "step": 6500 + }, + { + "epoch": 2.5, + "learning_rate": 2.5021080873898046e-05, + "loss": 1.0634, + "step": 6510 + }, + { + "epoch": 2.5, + "learning_rate": 2.5008304586687108e-05, + "loss": 1.1238, + "step": 6520 + }, + { + "epoch": 2.5, + "learning_rate": 2.499552829947617e-05, + "loss": 1.176, + "step": 6530 + }, + { + "epoch": 2.51, + "learning_rate": 2.4982752012265236e-05, + "loss": 1.1438, + "step": 6540 + }, + { + "epoch": 2.51, + "learning_rate": 2.4969975725054302e-05, + "loss": 1.0184, + "step": 6550 + }, + { + "epoch": 2.51, + "learning_rate": 2.4957199437843365e-05, + "loss": 1.1115, + "step": 6560 + }, + { + "epoch": 2.52, + "learning_rate": 2.4944423150632427e-05, + "loss": 1.0265, + "step": 6570 + }, + { + "epoch": 2.52, + "learning_rate": 2.493164686342149e-05, + "loss": 1.1034, + "step": 6580 + }, + { + "epoch": 2.53, + "learning_rate": 2.4918870576210555e-05, + "loss": 1.0778, + "step": 6590 + }, + { + "epoch": 2.53, + "learning_rate": 2.4906094288999617e-05, + "loss": 1.1707, + "step": 6600 + }, + { + "epoch": 2.53, + "learning_rate": 2.4893318001788683e-05, + "loss": 1.0163, + "step": 6610 + }, + { + "epoch": 2.54, + "learning_rate": 2.4880541714577742e-05, + "loss": 1.1181, + "step": 6620 + }, + { + "epoch": 2.54, + "learning_rate": 2.4867765427366808e-05, + "loss": 1.0864, + "step": 6630 + }, + { + "epoch": 2.55, + "learning_rate": 2.485498914015587e-05, + "loss": 1.0583, + "step": 6640 + }, + { + "epoch": 2.55, + "learning_rate": 2.4842212852944936e-05, + "loss": 1.0578, + "step": 6650 + }, + { + "epoch": 2.55, + "learning_rate": 2.4829436565734e-05, + "loss": 1.173, + "step": 6660 + }, + { + "epoch": 2.56, + "learning_rate": 2.481666027852306e-05, + "loss": 1.0768, + "step": 6670 + }, + { + "epoch": 2.56, + "learning_rate": 2.4803883991312123e-05, + "loss": 1.0241, + "step": 6680 + }, + { + "epoch": 2.56, + "learning_rate": 2.479110770410119e-05, + "loss": 1.1056, + "step": 6690 + }, + { + "epoch": 2.57, + "learning_rate": 2.477833141689025e-05, + "loss": 1.2032, + "step": 6700 + }, + { + "epoch": 2.57, + "learning_rate": 2.4765555129679317e-05, + "loss": 1.0829, + "step": 6710 + }, + { + "epoch": 2.58, + "learning_rate": 2.4752778842468383e-05, + "loss": 1.1422, + "step": 6720 + }, + { + "epoch": 2.58, + "learning_rate": 2.474000255525744e-05, + "loss": 1.0895, + "step": 6730 + }, + { + "epoch": 2.58, + "learning_rate": 2.4727226268046507e-05, + "loss": 0.9601, + "step": 6740 + }, + { + "epoch": 2.59, + "learning_rate": 2.471444998083557e-05, + "loss": 1.0642, + "step": 6750 + }, + { + "epoch": 2.59, + "learning_rate": 2.4701673693624635e-05, + "loss": 1.1043, + "step": 6760 + }, + { + "epoch": 2.59, + "learning_rate": 2.4688897406413698e-05, + "loss": 1.0912, + "step": 6770 + }, + { + "epoch": 2.6, + "learning_rate": 2.467612111920276e-05, + "loss": 1.0568, + "step": 6780 + }, + { + "epoch": 2.6, + "learning_rate": 2.4663344831991823e-05, + "loss": 0.9949, + "step": 6790 + }, + { + "epoch": 2.61, + "learning_rate": 2.465056854478089e-05, + "loss": 1.1519, + "step": 6800 + }, + { + "epoch": 2.61, + "learning_rate": 2.463779225756995e-05, + "loss": 1.0177, + "step": 6810 + }, + { + "epoch": 2.61, + "learning_rate": 2.4625015970359016e-05, + "loss": 0.9917, + "step": 6820 + }, + { + "epoch": 2.62, + "learning_rate": 2.4612239683148075e-05, + "loss": 1.0757, + "step": 6830 + }, + { + "epoch": 2.62, + "learning_rate": 2.459946339593714e-05, + "loss": 1.0863, + "step": 6840 + }, + { + "epoch": 2.63, + "learning_rate": 2.4586687108726204e-05, + "loss": 1.0913, + "step": 6850 + }, + { + "epoch": 2.63, + "learning_rate": 2.457391082151527e-05, + "loss": 1.1372, + "step": 6860 + }, + { + "epoch": 2.63, + "learning_rate": 2.456113453430433e-05, + "loss": 0.9983, + "step": 6870 + }, + { + "epoch": 2.64, + "learning_rate": 2.4548358247093394e-05, + "loss": 1.0834, + "step": 6880 + }, + { + "epoch": 2.64, + "learning_rate": 2.4535581959882456e-05, + "loss": 1.0411, + "step": 6890 + }, + { + "epoch": 2.64, + "learning_rate": 2.4522805672671522e-05, + "loss": 1.0315, + "step": 6900 + }, + { + "epoch": 2.65, + "learning_rate": 2.4510029385460588e-05, + "loss": 1.0517, + "step": 6910 + }, + { + "epoch": 2.65, + "learning_rate": 2.449725309824965e-05, + "loss": 1.1103, + "step": 6920 + }, + { + "epoch": 2.66, + "learning_rate": 2.4484476811038713e-05, + "loss": 1.0671, + "step": 6930 + }, + { + "epoch": 2.66, + "learning_rate": 2.4471700523827775e-05, + "loss": 0.9529, + "step": 6940 + }, + { + "epoch": 2.66, + "learning_rate": 2.445892423661684e-05, + "loss": 1.017, + "step": 6950 + }, + { + "epoch": 2.67, + "learning_rate": 2.4446147949405903e-05, + "loss": 1.1169, + "step": 6960 + }, + { + "epoch": 2.67, + "learning_rate": 2.443337166219497e-05, + "loss": 0.9866, + "step": 6970 + }, + { + "epoch": 2.68, + "learning_rate": 2.4420595374984028e-05, + "loss": 1.0956, + "step": 6980 + }, + { + "epoch": 2.68, + "learning_rate": 2.4407819087773094e-05, + "loss": 1.0235, + "step": 6990 + }, + { + "epoch": 2.68, + "learning_rate": 2.4395042800562156e-05, + "loss": 1.0232, + "step": 7000 + }, + { + "epoch": 2.69, + "learning_rate": 2.438226651335122e-05, + "loss": 1.0942, + "step": 7010 + }, + { + "epoch": 2.69, + "learning_rate": 2.4369490226140284e-05, + "loss": 1.0338, + "step": 7020 + }, + { + "epoch": 2.69, + "learning_rate": 2.4356713938929346e-05, + "loss": 1.1155, + "step": 7030 + }, + { + "epoch": 2.7, + "learning_rate": 2.434393765171841e-05, + "loss": 1.0933, + "step": 7040 + }, + { + "epoch": 2.7, + "learning_rate": 2.4331161364507474e-05, + "loss": 1.0331, + "step": 7050 + }, + { + "epoch": 2.71, + "learning_rate": 2.4318385077296537e-05, + "loss": 1.0647, + "step": 7060 + }, + { + "epoch": 2.71, + "learning_rate": 2.4305608790085603e-05, + "loss": 1.0587, + "step": 7070 + }, + { + "epoch": 2.71, + "learning_rate": 2.429283250287467e-05, + "loss": 1.0124, + "step": 7080 + }, + { + "epoch": 2.72, + "learning_rate": 2.4280056215663727e-05, + "loss": 1.0816, + "step": 7090 + }, + { + "epoch": 2.72, + "learning_rate": 2.4267279928452793e-05, + "loss": 1.0868, + "step": 7100 + }, + { + "epoch": 2.73, + "learning_rate": 2.4254503641241855e-05, + "loss": 1.1505, + "step": 7110 + }, + { + "epoch": 2.73, + "learning_rate": 2.424172735403092e-05, + "loss": 1.077, + "step": 7120 + }, + { + "epoch": 2.73, + "learning_rate": 2.4228951066819984e-05, + "loss": 1.1635, + "step": 7130 + }, + { + "epoch": 2.74, + "learning_rate": 2.4216174779609046e-05, + "loss": 1.1038, + "step": 7140 + }, + { + "epoch": 2.74, + "learning_rate": 2.4203398492398108e-05, + "loss": 1.0356, + "step": 7150 + }, + { + "epoch": 2.74, + "learning_rate": 2.4190622205187174e-05, + "loss": 1.1225, + "step": 7160 + }, + { + "epoch": 2.75, + "learning_rate": 2.4177845917976236e-05, + "loss": 1.067, + "step": 7170 + }, + { + "epoch": 2.75, + "learning_rate": 2.4165069630765302e-05, + "loss": 1.0761, + "step": 7180 + }, + { + "epoch": 2.76, + "learning_rate": 2.415229334355436e-05, + "loss": 1.0828, + "step": 7190 + }, + { + "epoch": 2.76, + "learning_rate": 2.4139517056343427e-05, + "loss": 1.0658, + "step": 7200 + }, + { + "epoch": 2.76, + "learning_rate": 2.412674076913249e-05, + "loss": 1.0643, + "step": 7210 + }, + { + "epoch": 2.77, + "learning_rate": 2.4113964481921555e-05, + "loss": 1.0683, + "step": 7220 + }, + { + "epoch": 2.77, + "learning_rate": 2.4101188194710617e-05, + "loss": 1.0343, + "step": 7230 + }, + { + "epoch": 2.78, + "learning_rate": 2.408841190749968e-05, + "loss": 1.0698, + "step": 7240 + }, + { + "epoch": 2.78, + "learning_rate": 2.4075635620288745e-05, + "loss": 1.1194, + "step": 7250 + }, + { + "epoch": 2.78, + "learning_rate": 2.4062859333077808e-05, + "loss": 1.0786, + "step": 7260 + }, + { + "epoch": 2.79, + "learning_rate": 2.4050083045866874e-05, + "loss": 1.0877, + "step": 7270 + }, + { + "epoch": 2.79, + "learning_rate": 2.4037306758655936e-05, + "loss": 1.0789, + "step": 7280 + }, + { + "epoch": 2.79, + "learning_rate": 2.4024530471444998e-05, + "loss": 1.0631, + "step": 7290 + }, + { + "epoch": 2.8, + "learning_rate": 2.401175418423406e-05, + "loss": 1.0943, + "step": 7300 + }, + { + "epoch": 2.8, + "learning_rate": 2.3998977897023126e-05, + "loss": 1.082, + "step": 7310 + }, + { + "epoch": 2.81, + "learning_rate": 2.398620160981219e-05, + "loss": 1.1354, + "step": 7320 + }, + { + "epoch": 2.81, + "learning_rate": 2.3973425322601255e-05, + "loss": 1.0272, + "step": 7330 + }, + { + "epoch": 2.81, + "learning_rate": 2.3960649035390313e-05, + "loss": 1.0843, + "step": 7340 + }, + { + "epoch": 2.82, + "learning_rate": 2.394787274817938e-05, + "loss": 1.0707, + "step": 7350 + }, + { + "epoch": 2.82, + "learning_rate": 2.393509646096844e-05, + "loss": 1.1286, + "step": 7360 + }, + { + "epoch": 2.82, + "learning_rate": 2.3922320173757507e-05, + "loss": 1.0692, + "step": 7370 + }, + { + "epoch": 2.83, + "learning_rate": 2.390954388654657e-05, + "loss": 1.1927, + "step": 7380 + }, + { + "epoch": 2.83, + "learning_rate": 2.3896767599335635e-05, + "loss": 1.0788, + "step": 7390 + }, + { + "epoch": 2.84, + "learning_rate": 2.3883991312124694e-05, + "loss": 1.0983, + "step": 7400 + }, + { + "epoch": 2.84, + "learning_rate": 2.387121502491376e-05, + "loss": 1.135, + "step": 7410 + }, + { + "epoch": 2.84, + "learning_rate": 2.3858438737702826e-05, + "loss": 1.149, + "step": 7420 + }, + { + "epoch": 2.85, + "learning_rate": 2.384566245049189e-05, + "loss": 1.1294, + "step": 7430 + }, + { + "epoch": 2.85, + "learning_rate": 2.3832886163280954e-05, + "loss": 1.041, + "step": 7440 + }, + { + "epoch": 2.86, + "learning_rate": 2.3820109876070013e-05, + "loss": 1.0936, + "step": 7450 + }, + { + "epoch": 2.86, + "learning_rate": 2.380733358885908e-05, + "loss": 1.041, + "step": 7460 + }, + { + "epoch": 2.86, + "learning_rate": 2.379455730164814e-05, + "loss": 1.0955, + "step": 7470 + }, + { + "epoch": 2.87, + "learning_rate": 2.3781781014437207e-05, + "loss": 1.0294, + "step": 7480 + }, + { + "epoch": 2.87, + "learning_rate": 2.376900472722627e-05, + "loss": 1.1155, + "step": 7490 + }, + { + "epoch": 2.87, + "learning_rate": 2.375622844001533e-05, + "loss": 1.0319, + "step": 7500 + }, + { + "epoch": 2.88, + "learning_rate": 2.3743452152804394e-05, + "loss": 1.0478, + "step": 7510 + }, + { + "epoch": 2.88, + "learning_rate": 2.373067586559346e-05, + "loss": 1.0321, + "step": 7520 + }, + { + "epoch": 2.89, + "learning_rate": 2.3717899578382522e-05, + "loss": 1.1729, + "step": 7530 + }, + { + "epoch": 2.89, + "learning_rate": 2.3705123291171588e-05, + "loss": 1.0052, + "step": 7540 + }, + { + "epoch": 2.89, + "learning_rate": 2.3692347003960647e-05, + "loss": 1.0182, + "step": 7550 + }, + { + "epoch": 2.9, + "learning_rate": 2.3679570716749713e-05, + "loss": 0.9899, + "step": 7560 + }, + { + "epoch": 2.9, + "learning_rate": 2.3666794429538775e-05, + "loss": 1.042, + "step": 7570 + }, + { + "epoch": 2.91, + "learning_rate": 2.365401814232784e-05, + "loss": 1.017, + "step": 7580 + }, + { + "epoch": 2.91, + "learning_rate": 2.3641241855116906e-05, + "loss": 1.0711, + "step": 7590 + }, + { + "epoch": 2.91, + "learning_rate": 2.3628465567905965e-05, + "loss": 1.0517, + "step": 7600 + }, + { + "epoch": 2.92, + "learning_rate": 2.361568928069503e-05, + "loss": 1.1192, + "step": 7610 + }, + { + "epoch": 2.92, + "learning_rate": 2.3602912993484094e-05, + "loss": 0.9748, + "step": 7620 + }, + { + "epoch": 2.92, + "learning_rate": 2.359013670627316e-05, + "loss": 1.1557, + "step": 7630 + }, + { + "epoch": 2.93, + "learning_rate": 2.357736041906222e-05, + "loss": 1.0267, + "step": 7640 + }, + { + "epoch": 2.93, + "learning_rate": 2.3564584131851284e-05, + "loss": 1.023, + "step": 7650 + }, + { + "epoch": 2.94, + "learning_rate": 2.3551807844640346e-05, + "loss": 1.0621, + "step": 7660 + }, + { + "epoch": 2.94, + "learning_rate": 2.3539031557429412e-05, + "loss": 1.1223, + "step": 7670 + }, + { + "epoch": 2.94, + "learning_rate": 2.3526255270218474e-05, + "loss": 1.0851, + "step": 7680 + }, + { + "epoch": 2.95, + "learning_rate": 2.351347898300754e-05, + "loss": 1.002, + "step": 7690 + }, + { + "epoch": 2.95, + "learning_rate": 2.3500702695796603e-05, + "loss": 0.983, + "step": 7700 + }, + { + "epoch": 2.96, + "learning_rate": 2.3487926408585665e-05, + "loss": 1.1106, + "step": 7710 + }, + { + "epoch": 2.96, + "learning_rate": 2.3475150121374727e-05, + "loss": 1.0228, + "step": 7720 + }, + { + "epoch": 2.96, + "learning_rate": 2.3462373834163793e-05, + "loss": 1.0445, + "step": 7730 + }, + { + "epoch": 2.97, + "learning_rate": 2.3449597546952855e-05, + "loss": 1.0605, + "step": 7740 + }, + { + "epoch": 2.97, + "learning_rate": 2.343682125974192e-05, + "loss": 1.1248, + "step": 7750 + }, + { + "epoch": 2.97, + "learning_rate": 2.3424044972530984e-05, + "loss": 1.0091, + "step": 7760 + }, + { + "epoch": 2.98, + "learning_rate": 2.3411268685320046e-05, + "loss": 1.0451, + "step": 7770 + }, + { + "epoch": 2.98, + "learning_rate": 2.339849239810911e-05, + "loss": 1.0389, + "step": 7780 + }, + { + "epoch": 2.99, + "learning_rate": 2.3385716110898174e-05, + "loss": 0.9988, + "step": 7790 + }, + { + "epoch": 2.99, + "learning_rate": 2.337293982368724e-05, + "loss": 1.0669, + "step": 7800 + }, + { + "epoch": 2.99, + "learning_rate": 2.33601635364763e-05, + "loss": 1.0262, + "step": 7810 + }, + { + "epoch": 3.0, + "learning_rate": 2.3347387249265364e-05, + "loss": 1.0755, + "step": 7820 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.6825096423352418, + "eval_loss": 0.8937900066375732, + "eval_runtime": 301.1324, + "eval_samples_per_second": 138.62, + "eval_steps_per_second": 69.312, + "step": 7827 + }, + { + "epoch": 3.0, + "learning_rate": 2.3334610962054427e-05, + "loss": 1.0878, + "step": 7830 + }, + { + "epoch": 3.0, + "learning_rate": 2.3321834674843493e-05, + "loss": 1.025, + "step": 7840 + }, + { + "epoch": 3.01, + "learning_rate": 2.3309058387632555e-05, + "loss": 1.0359, + "step": 7850 + }, + { + "epoch": 3.01, + "learning_rate": 2.3296282100421617e-05, + "loss": 1.0187, + "step": 7860 + }, + { + "epoch": 3.02, + "learning_rate": 2.328350581321068e-05, + "loss": 1.0652, + "step": 7870 + }, + { + "epoch": 3.02, + "learning_rate": 2.3270729525999745e-05, + "loss": 1.018, + "step": 7880 + }, + { + "epoch": 3.02, + "learning_rate": 2.3257953238788808e-05, + "loss": 1.0593, + "step": 7890 + }, + { + "epoch": 3.03, + "learning_rate": 2.3245176951577874e-05, + "loss": 0.9571, + "step": 7900 + }, + { + "epoch": 3.03, + "learning_rate": 2.3232400664366933e-05, + "loss": 0.9714, + "step": 7910 + }, + { + "epoch": 3.04, + "learning_rate": 2.3219624377155998e-05, + "loss": 1.0267, + "step": 7920 + }, + { + "epoch": 3.04, + "learning_rate": 2.3206848089945064e-05, + "loss": 1.0624, + "step": 7930 + }, + { + "epoch": 3.04, + "learning_rate": 2.3194071802734126e-05, + "loss": 1.0104, + "step": 7940 + }, + { + "epoch": 3.05, + "learning_rate": 2.3181295515523192e-05, + "loss": 1.0656, + "step": 7950 + }, + { + "epoch": 3.05, + "learning_rate": 2.316851922831225e-05, + "loss": 0.9538, + "step": 7960 + }, + { + "epoch": 3.05, + "learning_rate": 2.3155742941101317e-05, + "loss": 1.0332, + "step": 7970 + }, + { + "epoch": 3.06, + "learning_rate": 2.314296665389038e-05, + "loss": 0.9867, + "step": 7980 + }, + { + "epoch": 3.06, + "learning_rate": 2.3130190366679445e-05, + "loss": 1.0243, + "step": 7990 + }, + { + "epoch": 3.07, + "learning_rate": 2.3117414079468507e-05, + "loss": 1.0271, + "step": 8000 + }, + { + "epoch": 3.07, + "learning_rate": 2.310463779225757e-05, + "loss": 1.013, + "step": 8010 + }, + { + "epoch": 3.07, + "learning_rate": 2.3091861505046632e-05, + "loss": 1.0212, + "step": 8020 + }, + { + "epoch": 3.08, + "learning_rate": 2.3079085217835698e-05, + "loss": 1.0156, + "step": 8030 + }, + { + "epoch": 3.08, + "learning_rate": 2.306630893062476e-05, + "loss": 1.0226, + "step": 8040 + }, + { + "epoch": 3.09, + "learning_rate": 2.3053532643413826e-05, + "loss": 0.954, + "step": 8050 + }, + { + "epoch": 3.09, + "learning_rate": 2.3040756356202888e-05, + "loss": 1.031, + "step": 8060 + }, + { + "epoch": 3.09, + "learning_rate": 2.302798006899195e-05, + "loss": 1.0905, + "step": 8070 + }, + { + "epoch": 3.1, + "learning_rate": 2.3015203781781013e-05, + "loss": 1.0685, + "step": 8080 + }, + { + "epoch": 3.1, + "learning_rate": 2.300242749457008e-05, + "loss": 1.0371, + "step": 8090 + }, + { + "epoch": 3.1, + "learning_rate": 2.2989651207359145e-05, + "loss": 1.1171, + "step": 8100 + }, + { + "epoch": 3.11, + "learning_rate": 2.2976874920148207e-05, + "loss": 1.0627, + "step": 8110 + }, + { + "epoch": 3.11, + "learning_rate": 2.296409863293727e-05, + "loss": 1.0545, + "step": 8120 + }, + { + "epoch": 3.12, + "learning_rate": 2.295132234572633e-05, + "loss": 1.0362, + "step": 8130 + }, + { + "epoch": 3.12, + "learning_rate": 2.2938546058515397e-05, + "loss": 1.0701, + "step": 8140 + }, + { + "epoch": 3.12, + "learning_rate": 2.292576977130446e-05, + "loss": 0.961, + "step": 8150 + }, + { + "epoch": 3.13, + "learning_rate": 2.2912993484093525e-05, + "loss": 1.0286, + "step": 8160 + }, + { + "epoch": 3.13, + "learning_rate": 2.2900217196882584e-05, + "loss": 1.0491, + "step": 8170 + }, + { + "epoch": 3.14, + "learning_rate": 2.288744090967165e-05, + "loss": 1.1052, + "step": 8180 + }, + { + "epoch": 3.14, + "learning_rate": 2.2874664622460713e-05, + "loss": 1.0902, + "step": 8190 + }, + { + "epoch": 3.14, + "learning_rate": 2.286188833524978e-05, + "loss": 1.0374, + "step": 8200 + }, + { + "epoch": 3.15, + "learning_rate": 2.284911204803884e-05, + "loss": 0.9469, + "step": 8210 + }, + { + "epoch": 3.15, + "learning_rate": 2.2836335760827903e-05, + "loss": 1.0683, + "step": 8220 + }, + { + "epoch": 3.15, + "learning_rate": 2.2823559473616965e-05, + "loss": 1.0952, + "step": 8230 + }, + { + "epoch": 3.16, + "learning_rate": 2.281078318640603e-05, + "loss": 1.0811, + "step": 8240 + }, + { + "epoch": 3.16, + "learning_rate": 2.2798006899195094e-05, + "loss": 1.0438, + "step": 8250 + }, + { + "epoch": 3.17, + "learning_rate": 2.278523061198416e-05, + "loss": 0.983, + "step": 8260 + }, + { + "epoch": 3.17, + "learning_rate": 2.277245432477322e-05, + "loss": 1.0498, + "step": 8270 + }, + { + "epoch": 3.17, + "learning_rate": 2.2759678037562284e-05, + "loss": 0.9876, + "step": 8280 + }, + { + "epoch": 3.18, + "learning_rate": 2.274690175035135e-05, + "loss": 1.0483, + "step": 8290 + }, + { + "epoch": 3.18, + "learning_rate": 2.2734125463140412e-05, + "loss": 1.0201, + "step": 8300 + }, + { + "epoch": 3.19, + "learning_rate": 2.2721349175929478e-05, + "loss": 1.0603, + "step": 8310 + }, + { + "epoch": 3.19, + "learning_rate": 2.2708572888718537e-05, + "loss": 0.98, + "step": 8320 + }, + { + "epoch": 3.19, + "learning_rate": 2.2695796601507603e-05, + "loss": 1.0186, + "step": 8330 + }, + { + "epoch": 3.2, + "learning_rate": 2.2683020314296665e-05, + "loss": 1.0364, + "step": 8340 + }, + { + "epoch": 3.2, + "learning_rate": 2.267024402708573e-05, + "loss": 0.9915, + "step": 8350 + }, + { + "epoch": 3.2, + "learning_rate": 2.2657467739874793e-05, + "loss": 0.9961, + "step": 8360 + }, + { + "epoch": 3.21, + "learning_rate": 2.2644691452663855e-05, + "loss": 1.0486, + "step": 8370 + }, + { + "epoch": 3.21, + "learning_rate": 2.2631915165452918e-05, + "loss": 1.1112, + "step": 8380 + }, + { + "epoch": 3.22, + "learning_rate": 2.2619138878241984e-05, + "loss": 1.0118, + "step": 8390 + }, + { + "epoch": 3.22, + "learning_rate": 2.2606362591031046e-05, + "loss": 1.0305, + "step": 8400 + }, + { + "epoch": 3.22, + "learning_rate": 2.259358630382011e-05, + "loss": 1.0391, + "step": 8410 + }, + { + "epoch": 3.23, + "learning_rate": 2.2580810016609174e-05, + "loss": 1.0419, + "step": 8420 + }, + { + "epoch": 3.23, + "learning_rate": 2.2568033729398236e-05, + "loss": 1.0441, + "step": 8430 + }, + { + "epoch": 3.23, + "learning_rate": 2.2555257442187302e-05, + "loss": 1.014, + "step": 8440 + }, + { + "epoch": 3.24, + "learning_rate": 2.2542481154976364e-05, + "loss": 0.9706, + "step": 8450 + }, + { + "epoch": 3.24, + "learning_rate": 2.252970486776543e-05, + "loss": 1.125, + "step": 8460 + }, + { + "epoch": 3.25, + "learning_rate": 2.2516928580554493e-05, + "loss": 1.0316, + "step": 8470 + }, + { + "epoch": 3.25, + "learning_rate": 2.2504152293343555e-05, + "loss": 1.0275, + "step": 8480 + }, + { + "epoch": 3.25, + "learning_rate": 2.2491376006132617e-05, + "loss": 1.1037, + "step": 8490 + }, + { + "epoch": 3.26, + "learning_rate": 2.2478599718921683e-05, + "loss": 1.0732, + "step": 8500 + }, + { + "epoch": 3.26, + "learning_rate": 2.2465823431710745e-05, + "loss": 1.1062, + "step": 8510 + }, + { + "epoch": 3.27, + "learning_rate": 2.245304714449981e-05, + "loss": 1.0378, + "step": 8520 + }, + { + "epoch": 3.27, + "learning_rate": 2.244027085728887e-05, + "loss": 1.1277, + "step": 8530 + }, + { + "epoch": 3.27, + "learning_rate": 2.2427494570077936e-05, + "loss": 0.9765, + "step": 8540 + }, + { + "epoch": 3.28, + "learning_rate": 2.2414718282866998e-05, + "loss": 1.0515, + "step": 8550 + }, + { + "epoch": 3.28, + "learning_rate": 2.2401941995656064e-05, + "loss": 1.0798, + "step": 8560 + }, + { + "epoch": 3.28, + "learning_rate": 2.2389165708445126e-05, + "loss": 0.9839, + "step": 8570 + }, + { + "epoch": 3.29, + "learning_rate": 2.237638942123419e-05, + "loss": 1.0819, + "step": 8580 + }, + { + "epoch": 3.29, + "learning_rate": 2.236361313402325e-05, + "loss": 0.9844, + "step": 8590 + }, + { + "epoch": 3.3, + "learning_rate": 2.2350836846812317e-05, + "loss": 1.0126, + "step": 8600 + }, + { + "epoch": 3.3, + "learning_rate": 2.2338060559601383e-05, + "loss": 1.0232, + "step": 8610 + }, + { + "epoch": 3.3, + "learning_rate": 2.2325284272390445e-05, + "loss": 1.0272, + "step": 8620 + }, + { + "epoch": 3.31, + "learning_rate": 2.2312507985179507e-05, + "loss": 1.0461, + "step": 8630 + }, + { + "epoch": 3.31, + "learning_rate": 2.229973169796857e-05, + "loss": 1.064, + "step": 8640 + }, + { + "epoch": 3.32, + "learning_rate": 2.2286955410757635e-05, + "loss": 1.0858, + "step": 8650 + }, + { + "epoch": 3.32, + "learning_rate": 2.2274179123546698e-05, + "loss": 1.0288, + "step": 8660 + }, + { + "epoch": 3.32, + "learning_rate": 2.2261402836335764e-05, + "loss": 0.9986, + "step": 8670 + }, + { + "epoch": 3.33, + "learning_rate": 2.2248626549124823e-05, + "loss": 1.0781, + "step": 8680 + }, + { + "epoch": 3.33, + "learning_rate": 2.2235850261913888e-05, + "loss": 1.0184, + "step": 8690 + }, + { + "epoch": 3.33, + "learning_rate": 2.222307397470295e-05, + "loss": 0.9895, + "step": 8700 + }, + { + "epoch": 3.34, + "learning_rate": 2.2210297687492016e-05, + "loss": 1.0041, + "step": 8710 + }, + { + "epoch": 3.34, + "learning_rate": 2.219752140028108e-05, + "loss": 1.0071, + "step": 8720 + }, + { + "epoch": 3.35, + "learning_rate": 2.2184745113070145e-05, + "loss": 1.0364, + "step": 8730 + }, + { + "epoch": 3.35, + "learning_rate": 2.2171968825859203e-05, + "loss": 1.0183, + "step": 8740 + }, + { + "epoch": 3.35, + "learning_rate": 2.215919253864827e-05, + "loss": 0.9923, + "step": 8750 + }, + { + "epoch": 3.36, + "learning_rate": 2.214641625143733e-05, + "loss": 1.0698, + "step": 8760 + }, + { + "epoch": 3.36, + "learning_rate": 2.2133639964226397e-05, + "loss": 1.0158, + "step": 8770 + }, + { + "epoch": 3.37, + "learning_rate": 2.2120863677015463e-05, + "loss": 0.9953, + "step": 8780 + }, + { + "epoch": 3.37, + "learning_rate": 2.2108087389804522e-05, + "loss": 1.0502, + "step": 8790 + }, + { + "epoch": 3.37, + "learning_rate": 2.2095311102593588e-05, + "loss": 0.9623, + "step": 8800 + }, + { + "epoch": 3.38, + "learning_rate": 2.208253481538265e-05, + "loss": 0.924, + "step": 8810 + }, + { + "epoch": 3.38, + "learning_rate": 2.2069758528171716e-05, + "loss": 1.0329, + "step": 8820 + }, + { + "epoch": 3.38, + "learning_rate": 2.205698224096078e-05, + "loss": 1.0636, + "step": 8830 + }, + { + "epoch": 3.39, + "learning_rate": 2.204420595374984e-05, + "loss": 1.025, + "step": 8840 + }, + { + "epoch": 3.39, + "learning_rate": 2.2031429666538903e-05, + "loss": 1.0375, + "step": 8850 + }, + { + "epoch": 3.4, + "learning_rate": 2.201865337932797e-05, + "loss": 1.0329, + "step": 8860 + }, + { + "epoch": 3.4, + "learning_rate": 2.200587709211703e-05, + "loss": 1.0646, + "step": 8870 + }, + { + "epoch": 3.4, + "learning_rate": 2.1993100804906097e-05, + "loss": 1.0791, + "step": 8880 + }, + { + "epoch": 3.41, + "learning_rate": 2.1980324517695156e-05, + "loss": 1.0558, + "step": 8890 + }, + { + "epoch": 3.41, + "learning_rate": 2.196754823048422e-05, + "loss": 0.9135, + "step": 8900 + }, + { + "epoch": 3.42, + "learning_rate": 2.1954771943273284e-05, + "loss": 1.0741, + "step": 8910 + }, + { + "epoch": 3.42, + "learning_rate": 2.194199565606235e-05, + "loss": 1.0717, + "step": 8920 + }, + { + "epoch": 3.42, + "learning_rate": 2.1929219368851412e-05, + "loss": 1.0419, + "step": 8930 + }, + { + "epoch": 3.43, + "learning_rate": 2.1916443081640474e-05, + "loss": 1.0776, + "step": 8940 + }, + { + "epoch": 3.43, + "learning_rate": 2.1903666794429537e-05, + "loss": 1.0317, + "step": 8950 + }, + { + "epoch": 3.43, + "learning_rate": 2.1890890507218603e-05, + "loss": 1.097, + "step": 8960 + }, + { + "epoch": 3.44, + "learning_rate": 2.187811422000767e-05, + "loss": 1.0727, + "step": 8970 + }, + { + "epoch": 3.44, + "learning_rate": 2.186533793279673e-05, + "loss": 0.9845, + "step": 8980 + }, + { + "epoch": 3.45, + "learning_rate": 2.1852561645585793e-05, + "loss": 0.9579, + "step": 8990 + }, + { + "epoch": 3.45, + "learning_rate": 2.1839785358374855e-05, + "loss": 0.9434, + "step": 9000 + }, + { + "epoch": 3.45, + "learning_rate": 2.182700907116392e-05, + "loss": 1.0293, + "step": 9010 + }, + { + "epoch": 3.46, + "learning_rate": 2.1814232783952984e-05, + "loss": 1.0813, + "step": 9020 + }, + { + "epoch": 3.46, + "learning_rate": 2.180145649674205e-05, + "loss": 1.0201, + "step": 9030 + }, + { + "epoch": 3.46, + "learning_rate": 2.178868020953111e-05, + "loss": 1.0174, + "step": 9040 + }, + { + "epoch": 3.47, + "learning_rate": 2.1775903922320174e-05, + "loss": 1.0147, + "step": 9050 + }, + { + "epoch": 3.47, + "learning_rate": 2.1763127635109236e-05, + "loss": 1.078, + "step": 9060 + }, + { + "epoch": 3.48, + "learning_rate": 2.1750351347898302e-05, + "loss": 1.0205, + "step": 9070 + }, + { + "epoch": 3.48, + "learning_rate": 2.1737575060687364e-05, + "loss": 1.1099, + "step": 9080 + }, + { + "epoch": 3.48, + "learning_rate": 2.172479877347643e-05, + "loss": 1.0465, + "step": 9090 + }, + { + "epoch": 3.49, + "learning_rate": 2.171202248626549e-05, + "loss": 1.0107, + "step": 9100 + }, + { + "epoch": 3.49, + "learning_rate": 2.1699246199054555e-05, + "loss": 0.99, + "step": 9110 + }, + { + "epoch": 3.5, + "learning_rate": 2.1686469911843617e-05, + "loss": 1.0071, + "step": 9120 + }, + { + "epoch": 3.5, + "learning_rate": 2.1673693624632683e-05, + "loss": 0.997, + "step": 9130 + }, + { + "epoch": 3.5, + "learning_rate": 2.166091733742175e-05, + "loss": 1.0344, + "step": 9140 + }, + { + "epoch": 3.51, + "learning_rate": 2.1648141050210808e-05, + "loss": 1.0395, + "step": 9150 + }, + { + "epoch": 3.51, + "learning_rate": 2.1635364762999874e-05, + "loss": 0.9957, + "step": 9160 + }, + { + "epoch": 3.51, + "learning_rate": 2.1622588475788936e-05, + "loss": 0.9928, + "step": 9170 + }, + { + "epoch": 3.52, + "learning_rate": 2.1609812188578e-05, + "loss": 1.0512, + "step": 9180 + }, + { + "epoch": 3.52, + "learning_rate": 2.1597035901367064e-05, + "loss": 1.0085, + "step": 9190 + }, + { + "epoch": 3.53, + "learning_rate": 2.1584259614156126e-05, + "loss": 1.021, + "step": 9200 + }, + { + "epoch": 3.53, + "learning_rate": 2.157148332694519e-05, + "loss": 1.1114, + "step": 9210 + }, + { + "epoch": 3.53, + "learning_rate": 2.1558707039734254e-05, + "loss": 1.053, + "step": 9220 + }, + { + "epoch": 3.54, + "learning_rate": 2.1545930752523317e-05, + "loss": 0.959, + "step": 9230 + }, + { + "epoch": 3.54, + "learning_rate": 2.1533154465312383e-05, + "loss": 1.0537, + "step": 9240 + }, + { + "epoch": 3.55, + "learning_rate": 2.152037817810144e-05, + "loss": 1.1059, + "step": 9250 + }, + { + "epoch": 3.55, + "learning_rate": 2.1507601890890507e-05, + "loss": 1.0334, + "step": 9260 + }, + { + "epoch": 3.55, + "learning_rate": 2.149482560367957e-05, + "loss": 0.945, + "step": 9270 + }, + { + "epoch": 3.56, + "learning_rate": 2.1482049316468635e-05, + "loss": 1.0205, + "step": 9280 + }, + { + "epoch": 3.56, + "learning_rate": 2.1469273029257698e-05, + "loss": 0.8946, + "step": 9290 + }, + { + "epoch": 3.56, + "learning_rate": 2.145649674204676e-05, + "loss": 1.1158, + "step": 9300 + }, + { + "epoch": 3.57, + "learning_rate": 2.1443720454835826e-05, + "loss": 1.0037, + "step": 9310 + }, + { + "epoch": 3.57, + "learning_rate": 2.1430944167624888e-05, + "loss": 1.0036, + "step": 9320 + }, + { + "epoch": 3.58, + "learning_rate": 2.1418167880413954e-05, + "loss": 1.0294, + "step": 9330 + }, + { + "epoch": 3.58, + "learning_rate": 2.1405391593203016e-05, + "loss": 0.9538, + "step": 9340 + }, + { + "epoch": 3.58, + "learning_rate": 2.139261530599208e-05, + "loss": 1.0533, + "step": 9350 + }, + { + "epoch": 3.59, + "learning_rate": 2.137983901878114e-05, + "loss": 1.0851, + "step": 9360 + }, + { + "epoch": 3.59, + "learning_rate": 2.1367062731570207e-05, + "loss": 1.0317, + "step": 9370 + }, + { + "epoch": 3.6, + "learning_rate": 2.135428644435927e-05, + "loss": 1.0627, + "step": 9380 + }, + { + "epoch": 3.6, + "learning_rate": 2.1341510157148335e-05, + "loss": 1.0275, + "step": 9390 + }, + { + "epoch": 3.6, + "learning_rate": 2.1328733869937397e-05, + "loss": 1.034, + "step": 9400 + }, + { + "epoch": 3.61, + "learning_rate": 2.131595758272646e-05, + "loss": 1.0159, + "step": 9410 + }, + { + "epoch": 3.61, + "learning_rate": 2.1303181295515522e-05, + "loss": 0.9994, + "step": 9420 + }, + { + "epoch": 3.61, + "learning_rate": 2.1290405008304588e-05, + "loss": 1.0029, + "step": 9430 + }, + { + "epoch": 3.62, + "learning_rate": 2.127762872109365e-05, + "loss": 1.054, + "step": 9440 + }, + { + "epoch": 3.62, + "learning_rate": 2.1264852433882716e-05, + "loss": 1.0247, + "step": 9450 + }, + { + "epoch": 3.63, + "learning_rate": 2.1252076146671775e-05, + "loss": 1.101, + "step": 9460 + }, + { + "epoch": 3.63, + "learning_rate": 2.123929985946084e-05, + "loss": 1.0384, + "step": 9470 + }, + { + "epoch": 3.63, + "learning_rate": 2.1226523572249906e-05, + "loss": 1.0056, + "step": 9480 + }, + { + "epoch": 3.64, + "learning_rate": 2.121374728503897e-05, + "loss": 1.0253, + "step": 9490 + }, + { + "epoch": 3.64, + "learning_rate": 2.1200970997828035e-05, + "loss": 1.0022, + "step": 9500 + }, + { + "epoch": 3.65, + "learning_rate": 2.1188194710617094e-05, + "loss": 1.0184, + "step": 9510 + }, + { + "epoch": 3.65, + "learning_rate": 2.117541842340616e-05, + "loss": 1.084, + "step": 9520 + }, + { + "epoch": 3.65, + "learning_rate": 2.116264213619522e-05, + "loss": 1.0293, + "step": 9530 + }, + { + "epoch": 3.66, + "learning_rate": 2.1149865848984287e-05, + "loss": 1.0071, + "step": 9540 + }, + { + "epoch": 3.66, + "learning_rate": 2.113708956177335e-05, + "loss": 1.0419, + "step": 9550 + }, + { + "epoch": 3.66, + "learning_rate": 2.1124313274562412e-05, + "loss": 1.025, + "step": 9560 + }, + { + "epoch": 3.67, + "learning_rate": 2.1111536987351474e-05, + "loss": 1.064, + "step": 9570 + }, + { + "epoch": 3.67, + "learning_rate": 2.109876070014054e-05, + "loss": 1.077, + "step": 9580 + }, + { + "epoch": 3.68, + "learning_rate": 2.1085984412929603e-05, + "loss": 1.0143, + "step": 9590 + }, + { + "epoch": 3.68, + "learning_rate": 2.107320812571867e-05, + "loss": 1.0703, + "step": 9600 + }, + { + "epoch": 3.68, + "learning_rate": 2.1060431838507727e-05, + "loss": 1.0376, + "step": 9610 + }, + { + "epoch": 3.69, + "learning_rate": 2.1047655551296793e-05, + "loss": 1.0564, + "step": 9620 + }, + { + "epoch": 3.69, + "learning_rate": 2.1034879264085855e-05, + "loss": 1.0056, + "step": 9630 + }, + { + "epoch": 3.69, + "learning_rate": 2.102210297687492e-05, + "loss": 1.0423, + "step": 9640 + }, + { + "epoch": 3.7, + "learning_rate": 2.1010604318385078e-05, + "loss": 0.9948, + "step": 9650 + }, + { + "epoch": 3.7, + "learning_rate": 2.099782803117414e-05, + "loss": 0.9852, + "step": 9660 + }, + { + "epoch": 3.71, + "learning_rate": 2.0985051743963206e-05, + "loss": 0.9679, + "step": 9670 + }, + { + "epoch": 3.71, + "learning_rate": 2.097227545675227e-05, + "loss": 1.0069, + "step": 9680 + }, + { + "epoch": 3.71, + "learning_rate": 2.095949916954133e-05, + "loss": 1.0064, + "step": 9690 + }, + { + "epoch": 3.72, + "learning_rate": 2.0946722882330397e-05, + "loss": 1.0177, + "step": 9700 + }, + { + "epoch": 3.72, + "learning_rate": 2.093394659511946e-05, + "loss": 1.0382, + "step": 9710 + }, + { + "epoch": 3.73, + "learning_rate": 2.0921170307908525e-05, + "loss": 1.0895, + "step": 9720 + }, + { + "epoch": 3.73, + "learning_rate": 2.0908394020697587e-05, + "loss": 0.946, + "step": 9730 + }, + { + "epoch": 3.73, + "learning_rate": 2.089561773348665e-05, + "loss": 1.0327, + "step": 9740 + }, + { + "epoch": 3.74, + "learning_rate": 2.0882841446275712e-05, + "loss": 1.0571, + "step": 9750 + }, + { + "epoch": 3.74, + "learning_rate": 2.0870065159064777e-05, + "loss": 0.9581, + "step": 9760 + }, + { + "epoch": 3.74, + "learning_rate": 2.085728887185384e-05, + "loss": 0.9532, + "step": 9770 + }, + { + "epoch": 3.75, + "learning_rate": 2.0844512584642906e-05, + "loss": 1.0014, + "step": 9780 + }, + { + "epoch": 3.75, + "learning_rate": 2.0831736297431965e-05, + "loss": 0.9949, + "step": 9790 + }, + { + "epoch": 3.76, + "learning_rate": 2.081896001022103e-05, + "loss": 1.0254, + "step": 9800 + }, + { + "epoch": 3.76, + "learning_rate": 2.0806183723010093e-05, + "loss": 1.0585, + "step": 9810 + }, + { + "epoch": 3.76, + "learning_rate": 2.079340743579916e-05, + "loss": 1.0318, + "step": 9820 + }, + { + "epoch": 3.77, + "learning_rate": 2.078063114858822e-05, + "loss": 0.9836, + "step": 9830 + }, + { + "epoch": 3.77, + "learning_rate": 2.0767854861377283e-05, + "loss": 0.9845, + "step": 9840 + }, + { + "epoch": 3.78, + "learning_rate": 2.0755078574166346e-05, + "loss": 1.0269, + "step": 9850 + }, + { + "epoch": 3.78, + "learning_rate": 2.074230228695541e-05, + "loss": 0.9764, + "step": 9860 + }, + { + "epoch": 3.78, + "learning_rate": 2.0729525999744477e-05, + "loss": 1.004, + "step": 9870 + }, + { + "epoch": 3.79, + "learning_rate": 2.071674971253354e-05, + "loss": 1.0853, + "step": 9880 + }, + { + "epoch": 3.79, + "learning_rate": 2.0703973425322602e-05, + "loss": 1.0041, + "step": 9890 + }, + { + "epoch": 3.79, + "learning_rate": 2.0691197138111664e-05, + "loss": 1.076, + "step": 9900 + }, + { + "epoch": 3.8, + "learning_rate": 2.067842085090073e-05, + "loss": 1.0294, + "step": 9910 + }, + { + "epoch": 3.8, + "learning_rate": 2.0665644563689792e-05, + "loss": 1.0616, + "step": 9920 + }, + { + "epoch": 3.81, + "learning_rate": 2.0652868276478858e-05, + "loss": 1.0558, + "step": 9930 + }, + { + "epoch": 3.81, + "learning_rate": 2.0640091989267917e-05, + "loss": 1.0142, + "step": 9940 + }, + { + "epoch": 3.81, + "learning_rate": 2.0627315702056983e-05, + "loss": 0.965, + "step": 9950 + }, + { + "epoch": 3.82, + "learning_rate": 2.0614539414846045e-05, + "loss": 0.9732, + "step": 9960 + }, + { + "epoch": 3.82, + "learning_rate": 2.060176312763511e-05, + "loss": 1.0222, + "step": 9970 + }, + { + "epoch": 3.83, + "learning_rate": 2.0588986840424173e-05, + "loss": 1.0496, + "step": 9980 + }, + { + "epoch": 3.83, + "learning_rate": 2.0576210553213236e-05, + "loss": 0.9679, + "step": 9990 + }, + { + "epoch": 3.83, + "learning_rate": 2.0563434266002298e-05, + "loss": 1.0011, + "step": 10000 + }, + { + "epoch": 3.84, + "learning_rate": 2.0550657978791364e-05, + "loss": 1.0534, + "step": 10010 + }, + { + "epoch": 3.84, + "learning_rate": 2.0537881691580426e-05, + "loss": 0.972, + "step": 10020 + }, + { + "epoch": 3.84, + "learning_rate": 2.0525105404369492e-05, + "loss": 1.0555, + "step": 10030 + }, + { + "epoch": 3.85, + "learning_rate": 2.0512329117158558e-05, + "loss": 1.0325, + "step": 10040 + }, + { + "epoch": 3.85, + "learning_rate": 2.0499552829947617e-05, + "loss": 0.9442, + "step": 10050 + }, + { + "epoch": 3.86, + "learning_rate": 2.0486776542736682e-05, + "loss": 0.9743, + "step": 10060 + }, + { + "epoch": 3.86, + "learning_rate": 2.0474000255525745e-05, + "loss": 1.0768, + "step": 10070 + }, + { + "epoch": 3.86, + "learning_rate": 2.046122396831481e-05, + "loss": 0.943, + "step": 10080 + }, + { + "epoch": 3.87, + "learning_rate": 2.0448447681103873e-05, + "loss": 1.055, + "step": 10090 + }, + { + "epoch": 3.87, + "learning_rate": 2.0435671393892935e-05, + "loss": 0.968, + "step": 10100 + }, + { + "epoch": 3.88, + "learning_rate": 2.0422895106681997e-05, + "loss": 1.0523, + "step": 10110 + }, + { + "epoch": 3.88, + "learning_rate": 2.0410118819471063e-05, + "loss": 1.0725, + "step": 10120 + }, + { + "epoch": 3.88, + "learning_rate": 2.0397342532260126e-05, + "loss": 1.0593, + "step": 10130 + }, + { + "epoch": 3.89, + "learning_rate": 2.038456624504919e-05, + "loss": 0.8884, + "step": 10140 + }, + { + "epoch": 3.89, + "learning_rate": 2.037178995783825e-05, + "loss": 0.9884, + "step": 10150 + }, + { + "epoch": 3.89, + "learning_rate": 2.0359013670627316e-05, + "loss": 1.0388, + "step": 10160 + }, + { + "epoch": 3.9, + "learning_rate": 2.034623738341638e-05, + "loss": 1.0385, + "step": 10170 + }, + { + "epoch": 3.9, + "learning_rate": 2.0333461096205444e-05, + "loss": 0.9961, + "step": 10180 + }, + { + "epoch": 3.91, + "learning_rate": 2.0320684808994507e-05, + "loss": 0.986, + "step": 10190 + }, + { + "epoch": 3.91, + "learning_rate": 2.030790852178357e-05, + "loss": 1.0116, + "step": 10200 + }, + { + "epoch": 3.91, + "learning_rate": 2.0295132234572635e-05, + "loss": 0.9868, + "step": 10210 + }, + { + "epoch": 3.92, + "learning_rate": 2.0282355947361697e-05, + "loss": 1.0432, + "step": 10220 + }, + { + "epoch": 3.92, + "learning_rate": 2.0269579660150763e-05, + "loss": 0.9453, + "step": 10230 + }, + { + "epoch": 3.92, + "learning_rate": 2.0256803372939825e-05, + "loss": 1.0286, + "step": 10240 + }, + { + "epoch": 3.93, + "learning_rate": 2.0244027085728887e-05, + "loss": 1.0106, + "step": 10250 + }, + { + "epoch": 3.93, + "learning_rate": 2.023125079851795e-05, + "loss": 1.0025, + "step": 10260 + }, + { + "epoch": 3.94, + "learning_rate": 2.0218474511307016e-05, + "loss": 1.02, + "step": 10270 + }, + { + "epoch": 3.94, + "learning_rate": 2.0205698224096078e-05, + "loss": 0.9797, + "step": 10280 + }, + { + "epoch": 3.94, + "learning_rate": 2.0192921936885144e-05, + "loss": 1.0164, + "step": 10290 + }, + { + "epoch": 3.95, + "learning_rate": 2.0180145649674203e-05, + "loss": 1.0135, + "step": 10300 + }, + { + "epoch": 3.95, + "learning_rate": 2.016736936246327e-05, + "loss": 0.9658, + "step": 10310 + }, + { + "epoch": 3.96, + "learning_rate": 2.015459307525233e-05, + "loss": 0.9884, + "step": 10320 + }, + { + "epoch": 3.96, + "learning_rate": 2.0141816788041397e-05, + "loss": 0.9869, + "step": 10330 + }, + { + "epoch": 3.96, + "learning_rate": 2.012904050083046e-05, + "loss": 1.0391, + "step": 10340 + }, + { + "epoch": 3.97, + "learning_rate": 2.011626421361952e-05, + "loss": 0.9192, + "step": 10350 + }, + { + "epoch": 3.97, + "learning_rate": 2.0103487926408584e-05, + "loss": 1.0853, + "step": 10360 + }, + { + "epoch": 3.97, + "learning_rate": 2.009071163919765e-05, + "loss": 0.9712, + "step": 10370 + }, + { + "epoch": 3.98, + "learning_rate": 2.0077935351986715e-05, + "loss": 1.0299, + "step": 10380 + }, + { + "epoch": 3.98, + "learning_rate": 2.0065159064775777e-05, + "loss": 1.0221, + "step": 10390 + }, + { + "epoch": 3.99, + "learning_rate": 2.0052382777564843e-05, + "loss": 1.0338, + "step": 10400 + }, + { + "epoch": 3.99, + "learning_rate": 2.0039606490353902e-05, + "loss": 0.9901, + "step": 10410 + }, + { + "epoch": 3.99, + "learning_rate": 2.0026830203142968e-05, + "loss": 0.9988, + "step": 10420 + }, + { + "epoch": 4.0, + "learning_rate": 2.001405391593203e-05, + "loss": 1.0048, + "step": 10430 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7040461873847113, + "eval_loss": 0.8600494265556335, + "eval_runtime": 306.4466, + "eval_samples_per_second": 136.216, + "eval_steps_per_second": 68.11, + "step": 10436 + }, + { + "epoch": 4.0, + "learning_rate": 2.0001277628721096e-05, + "loss": 0.9459, + "step": 10440 + }, + { + "epoch": 4.01, + "learning_rate": 1.998850134151016e-05, + "loss": 0.9764, + "step": 10450 + }, + { + "epoch": 4.01, + "learning_rate": 1.997572505429922e-05, + "loss": 1.0754, + "step": 10460 + }, + { + "epoch": 4.01, + "learning_rate": 1.9962948767088283e-05, + "loss": 1.0515, + "step": 10470 + }, + { + "epoch": 4.02, + "learning_rate": 1.995017247987735e-05, + "loss": 1.0065, + "step": 10480 + }, + { + "epoch": 4.02, + "learning_rate": 1.993739619266641e-05, + "loss": 1.0253, + "step": 10490 + }, + { + "epoch": 4.02, + "learning_rate": 1.9924619905455477e-05, + "loss": 0.9243, + "step": 10500 + }, + { + "epoch": 4.03, + "learning_rate": 1.9911843618244536e-05, + "loss": 0.9527, + "step": 10510 + }, + { + "epoch": 4.03, + "learning_rate": 1.9899067331033602e-05, + "loss": 1.0599, + "step": 10520 + }, + { + "epoch": 4.04, + "learning_rate": 1.9886291043822664e-05, + "loss": 0.9384, + "step": 10530 + }, + { + "epoch": 4.04, + "learning_rate": 1.987351475661173e-05, + "loss": 1.0183, + "step": 10540 + }, + { + "epoch": 4.04, + "learning_rate": 1.9860738469400796e-05, + "loss": 1.0682, + "step": 10550 + }, + { + "epoch": 4.05, + "learning_rate": 1.9847962182189855e-05, + "loss": 1.0485, + "step": 10560 + }, + { + "epoch": 4.05, + "learning_rate": 1.983518589497892e-05, + "loss": 1.0225, + "step": 10570 + }, + { + "epoch": 4.06, + "learning_rate": 1.9822409607767983e-05, + "loss": 0.9636, + "step": 10580 + }, + { + "epoch": 4.06, + "learning_rate": 1.980963332055705e-05, + "loss": 0.9476, + "step": 10590 + }, + { + "epoch": 4.06, + "learning_rate": 1.979685703334611e-05, + "loss": 0.9997, + "step": 10600 + }, + { + "epoch": 4.07, + "learning_rate": 1.9784080746135173e-05, + "loss": 1.0073, + "step": 10610 + }, + { + "epoch": 4.07, + "learning_rate": 1.9771304458924236e-05, + "loss": 0.9891, + "step": 10620 + }, + { + "epoch": 4.07, + "learning_rate": 1.97585281717133e-05, + "loss": 1.0127, + "step": 10630 + }, + { + "epoch": 4.08, + "learning_rate": 1.9745751884502364e-05, + "loss": 0.9135, + "step": 10640 + }, + { + "epoch": 4.08, + "learning_rate": 1.973297559729143e-05, + "loss": 0.9873, + "step": 10650 + }, + { + "epoch": 4.09, + "learning_rate": 1.972019931008049e-05, + "loss": 1.0475, + "step": 10660 + }, + { + "epoch": 4.09, + "learning_rate": 1.9707423022869554e-05, + "loss": 1.0128, + "step": 10670 + }, + { + "epoch": 4.09, + "learning_rate": 1.9694646735658616e-05, + "loss": 0.9845, + "step": 10680 + }, + { + "epoch": 4.1, + "learning_rate": 1.9681870448447682e-05, + "loss": 0.9993, + "step": 10690 + }, + { + "epoch": 4.1, + "learning_rate": 1.9669094161236745e-05, + "loss": 0.9962, + "step": 10700 + }, + { + "epoch": 4.11, + "learning_rate": 1.965631787402581e-05, + "loss": 1.017, + "step": 10710 + }, + { + "epoch": 4.11, + "learning_rate": 1.964354158681487e-05, + "loss": 0.9382, + "step": 10720 + }, + { + "epoch": 4.11, + "learning_rate": 1.9630765299603935e-05, + "loss": 1.0317, + "step": 10730 + }, + { + "epoch": 4.12, + "learning_rate": 1.9617989012393e-05, + "loss": 1.0399, + "step": 10740 + }, + { + "epoch": 4.12, + "learning_rate": 1.9605212725182063e-05, + "loss": 1.0362, + "step": 10750 + }, + { + "epoch": 4.12, + "learning_rate": 1.959243643797113e-05, + "loss": 0.9808, + "step": 10760 + }, + { + "epoch": 4.13, + "learning_rate": 1.9579660150760188e-05, + "loss": 0.9382, + "step": 10770 + }, + { + "epoch": 4.13, + "learning_rate": 1.9566883863549254e-05, + "loss": 0.983, + "step": 10780 + }, + { + "epoch": 4.14, + "learning_rate": 1.9554107576338316e-05, + "loss": 0.9385, + "step": 10790 + }, + { + "epoch": 4.14, + "learning_rate": 1.9541331289127382e-05, + "loss": 0.9472, + "step": 10800 + }, + { + "epoch": 4.14, + "learning_rate": 1.9528555001916444e-05, + "loss": 1.0646, + "step": 10810 + }, + { + "epoch": 4.15, + "learning_rate": 1.9515778714705507e-05, + "loss": 0.9611, + "step": 10820 + }, + { + "epoch": 4.15, + "learning_rate": 1.950300242749457e-05, + "loss": 1.0051, + "step": 10830 + }, + { + "epoch": 4.15, + "learning_rate": 1.9490226140283635e-05, + "loss": 0.9689, + "step": 10840 + }, + { + "epoch": 4.16, + "learning_rate": 1.9477449853072697e-05, + "loss": 1.0728, + "step": 10850 + }, + { + "epoch": 4.16, + "learning_rate": 1.9464673565861763e-05, + "loss": 0.9823, + "step": 10860 + }, + { + "epoch": 4.17, + "learning_rate": 1.9451897278650822e-05, + "loss": 0.9576, + "step": 10870 + }, + { + "epoch": 4.17, + "learning_rate": 1.9439120991439887e-05, + "loss": 1.0259, + "step": 10880 + }, + { + "epoch": 4.17, + "learning_rate": 1.942634470422895e-05, + "loss": 1.0078, + "step": 10890 + }, + { + "epoch": 4.18, + "learning_rate": 1.9413568417018016e-05, + "loss": 0.9973, + "step": 10900 + }, + { + "epoch": 4.18, + "learning_rate": 1.940079212980708e-05, + "loss": 1.0395, + "step": 10910 + }, + { + "epoch": 4.19, + "learning_rate": 1.938801584259614e-05, + "loss": 1.0008, + "step": 10920 + }, + { + "epoch": 4.19, + "learning_rate": 1.9375239555385206e-05, + "loss": 1.0114, + "step": 10930 + }, + { + "epoch": 4.19, + "learning_rate": 1.936246326817427e-05, + "loss": 0.9611, + "step": 10940 + }, + { + "epoch": 4.2, + "learning_rate": 1.9349686980963334e-05, + "loss": 0.9867, + "step": 10950 + }, + { + "epoch": 4.2, + "learning_rate": 1.9336910693752397e-05, + "loss": 0.9812, + "step": 10960 + }, + { + "epoch": 4.2, + "learning_rate": 1.932413440654146e-05, + "loss": 1.0514, + "step": 10970 + }, + { + "epoch": 4.21, + "learning_rate": 1.931135811933052e-05, + "loss": 1.0008, + "step": 10980 + }, + { + "epoch": 4.21, + "learning_rate": 1.9298581832119587e-05, + "loss": 1.1416, + "step": 10990 + }, + { + "epoch": 4.22, + "learning_rate": 1.928580554490865e-05, + "loss": 0.9546, + "step": 11000 + }, + { + "epoch": 4.22, + "learning_rate": 1.9273029257697715e-05, + "loss": 0.9935, + "step": 11010 + }, + { + "epoch": 4.22, + "learning_rate": 1.9260252970486777e-05, + "loss": 1.0199, + "step": 11020 + }, + { + "epoch": 4.23, + "learning_rate": 1.924747668327584e-05, + "loss": 0.982, + "step": 11030 + }, + { + "epoch": 4.23, + "learning_rate": 1.9234700396064902e-05, + "loss": 0.9565, + "step": 11040 + }, + { + "epoch": 4.24, + "learning_rate": 1.9221924108853968e-05, + "loss": 1.0392, + "step": 11050 + }, + { + "epoch": 4.24, + "learning_rate": 1.920914782164303e-05, + "loss": 1.0471, + "step": 11060 + }, + { + "epoch": 4.24, + "learning_rate": 1.9196371534432096e-05, + "loss": 0.994, + "step": 11070 + }, + { + "epoch": 4.25, + "learning_rate": 1.918359524722116e-05, + "loss": 1.0374, + "step": 11080 + }, + { + "epoch": 4.25, + "learning_rate": 1.917081896001022e-05, + "loss": 0.9268, + "step": 11090 + }, + { + "epoch": 4.25, + "learning_rate": 1.9158042672799287e-05, + "loss": 0.9702, + "step": 11100 + }, + { + "epoch": 4.26, + "learning_rate": 1.914526638558835e-05, + "loss": 0.9774, + "step": 11110 + }, + { + "epoch": 4.26, + "learning_rate": 1.9132490098377415e-05, + "loss": 0.9614, + "step": 11120 + }, + { + "epoch": 4.27, + "learning_rate": 1.9119713811166474e-05, + "loss": 1.0095, + "step": 11130 + }, + { + "epoch": 4.27, + "learning_rate": 1.910693752395554e-05, + "loss": 1.0305, + "step": 11140 + }, + { + "epoch": 4.27, + "learning_rate": 1.9094161236744602e-05, + "loss": 0.9512, + "step": 11150 + }, + { + "epoch": 4.28, + "learning_rate": 1.9081384949533667e-05, + "loss": 1.0214, + "step": 11160 + }, + { + "epoch": 4.28, + "learning_rate": 1.906860866232273e-05, + "loss": 0.9975, + "step": 11170 + }, + { + "epoch": 4.29, + "learning_rate": 1.9055832375111792e-05, + "loss": 0.9814, + "step": 11180 + }, + { + "epoch": 4.29, + "learning_rate": 1.9043056087900855e-05, + "loss": 0.9298, + "step": 11190 + }, + { + "epoch": 4.29, + "learning_rate": 1.903027980068992e-05, + "loss": 0.9918, + "step": 11200 + }, + { + "epoch": 4.3, + "learning_rate": 1.9017503513478983e-05, + "loss": 1.0058, + "step": 11210 + }, + { + "epoch": 4.3, + "learning_rate": 1.900472722626805e-05, + "loss": 0.9774, + "step": 11220 + }, + { + "epoch": 4.3, + "learning_rate": 1.8991950939057107e-05, + "loss": 0.9492, + "step": 11230 + }, + { + "epoch": 4.31, + "learning_rate": 1.8979174651846173e-05, + "loss": 1.0274, + "step": 11240 + }, + { + "epoch": 4.31, + "learning_rate": 1.896639836463524e-05, + "loss": 0.8601, + "step": 11250 + }, + { + "epoch": 4.32, + "learning_rate": 1.89536220774243e-05, + "loss": 1.1159, + "step": 11260 + }, + { + "epoch": 4.32, + "learning_rate": 1.8940845790213367e-05, + "loss": 1.0081, + "step": 11270 + }, + { + "epoch": 4.32, + "learning_rate": 1.8928069503002426e-05, + "loss": 1.0421, + "step": 11280 + }, + { + "epoch": 4.33, + "learning_rate": 1.8915293215791492e-05, + "loss": 1.0195, + "step": 11290 + }, + { + "epoch": 4.33, + "learning_rate": 1.8902516928580554e-05, + "loss": 0.9817, + "step": 11300 + }, + { + "epoch": 4.33, + "learning_rate": 1.888974064136962e-05, + "loss": 0.9258, + "step": 11310 + }, + { + "epoch": 4.34, + "learning_rate": 1.8876964354158682e-05, + "loss": 1.0313, + "step": 11320 + }, + { + "epoch": 4.34, + "learning_rate": 1.8864188066947745e-05, + "loss": 1.0741, + "step": 11330 + }, + { + "epoch": 4.35, + "learning_rate": 1.8851411779736807e-05, + "loss": 0.9507, + "step": 11340 + }, + { + "epoch": 4.35, + "learning_rate": 1.8838635492525873e-05, + "loss": 1.004, + "step": 11350 + }, + { + "epoch": 4.35, + "learning_rate": 1.8825859205314935e-05, + "loss": 0.9851, + "step": 11360 + }, + { + "epoch": 4.36, + "learning_rate": 1.8813082918104e-05, + "loss": 0.9581, + "step": 11370 + }, + { + "epoch": 4.36, + "learning_rate": 1.8800306630893063e-05, + "loss": 1.0172, + "step": 11380 + }, + { + "epoch": 4.37, + "learning_rate": 1.8787530343682126e-05, + "loss": 1.0631, + "step": 11390 + }, + { + "epoch": 4.37, + "learning_rate": 1.8774754056471188e-05, + "loss": 0.9673, + "step": 11400 + }, + { + "epoch": 4.37, + "learning_rate": 1.8761977769260254e-05, + "loss": 0.9853, + "step": 11410 + }, + { + "epoch": 4.38, + "learning_rate": 1.874920148204932e-05, + "loss": 0.9911, + "step": 11420 + }, + { + "epoch": 4.38, + "learning_rate": 1.8736425194838382e-05, + "loss": 1.0252, + "step": 11430 + }, + { + "epoch": 4.38, + "learning_rate": 1.8723648907627444e-05, + "loss": 0.9863, + "step": 11440 + }, + { + "epoch": 4.39, + "learning_rate": 1.8710872620416507e-05, + "loss": 0.9982, + "step": 11450 + }, + { + "epoch": 4.39, + "learning_rate": 1.8698096333205572e-05, + "loss": 0.9994, + "step": 11460 + }, + { + "epoch": 4.4, + "learning_rate": 1.8685320045994635e-05, + "loss": 0.9207, + "step": 11470 + }, + { + "epoch": 4.4, + "learning_rate": 1.86725437587837e-05, + "loss": 1.0277, + "step": 11480 + }, + { + "epoch": 4.4, + "learning_rate": 1.865976747157276e-05, + "loss": 1.0076, + "step": 11490 + }, + { + "epoch": 4.41, + "learning_rate": 1.8646991184361825e-05, + "loss": 0.9431, + "step": 11500 + }, + { + "epoch": 4.41, + "learning_rate": 1.8634214897150887e-05, + "loss": 1.0096, + "step": 11510 + }, + { + "epoch": 4.42, + "learning_rate": 1.8621438609939953e-05, + "loss": 0.9848, + "step": 11520 + }, + { + "epoch": 4.42, + "learning_rate": 1.8608662322729016e-05, + "loss": 1.0283, + "step": 11530 + }, + { + "epoch": 4.42, + "learning_rate": 1.8595886035518078e-05, + "loss": 0.9438, + "step": 11540 + }, + { + "epoch": 4.43, + "learning_rate": 1.858310974830714e-05, + "loss": 0.954, + "step": 11550 + }, + { + "epoch": 4.43, + "learning_rate": 1.8570333461096206e-05, + "loss": 0.9268, + "step": 11560 + }, + { + "epoch": 4.43, + "learning_rate": 1.855755717388527e-05, + "loss": 0.9719, + "step": 11570 + }, + { + "epoch": 4.44, + "learning_rate": 1.8544780886674334e-05, + "loss": 1.0252, + "step": 11580 + }, + { + "epoch": 4.44, + "learning_rate": 1.8532004599463397e-05, + "loss": 0.9273, + "step": 11590 + }, + { + "epoch": 4.45, + "learning_rate": 1.851922831225246e-05, + "loss": 1.0104, + "step": 11600 + }, + { + "epoch": 4.45, + "learning_rate": 1.8506452025041525e-05, + "loss": 1.0399, + "step": 11610 + }, + { + "epoch": 4.45, + "learning_rate": 1.8493675737830587e-05, + "loss": 1.0342, + "step": 11620 + }, + { + "epoch": 4.46, + "learning_rate": 1.8480899450619653e-05, + "loss": 0.9441, + "step": 11630 + }, + { + "epoch": 4.46, + "learning_rate": 1.8468123163408712e-05, + "loss": 0.949, + "step": 11640 + }, + { + "epoch": 4.47, + "learning_rate": 1.8455346876197777e-05, + "loss": 1.0071, + "step": 11650 + }, + { + "epoch": 4.47, + "learning_rate": 1.844257058898684e-05, + "loss": 1.0028, + "step": 11660 + }, + { + "epoch": 4.47, + "learning_rate": 1.8429794301775906e-05, + "loss": 1.013, + "step": 11670 + }, + { + "epoch": 4.48, + "learning_rate": 1.8417018014564968e-05, + "loss": 0.983, + "step": 11680 + }, + { + "epoch": 4.48, + "learning_rate": 1.840424172735403e-05, + "loss": 1.0092, + "step": 11690 + }, + { + "epoch": 4.48, + "learning_rate": 1.8391465440143093e-05, + "loss": 0.9959, + "step": 11700 + }, + { + "epoch": 4.49, + "learning_rate": 1.837868915293216e-05, + "loss": 0.981, + "step": 11710 + }, + { + "epoch": 4.49, + "learning_rate": 1.836591286572122e-05, + "loss": 1.0389, + "step": 11720 + }, + { + "epoch": 4.5, + "learning_rate": 1.8353136578510287e-05, + "loss": 0.972, + "step": 11730 + }, + { + "epoch": 4.5, + "learning_rate": 1.834036029129935e-05, + "loss": 0.975, + "step": 11740 + }, + { + "epoch": 4.5, + "learning_rate": 1.832758400408841e-05, + "loss": 0.9072, + "step": 11750 + }, + { + "epoch": 4.51, + "learning_rate": 1.8314807716877477e-05, + "loss": 1.013, + "step": 11760 + }, + { + "epoch": 4.51, + "learning_rate": 1.830203142966654e-05, + "loss": 0.9246, + "step": 11770 + }, + { + "epoch": 4.52, + "learning_rate": 1.8289255142455605e-05, + "loss": 1.0591, + "step": 11780 + }, + { + "epoch": 4.52, + "learning_rate": 1.827775648396576e-05, + "loss": 0.9849, + "step": 11790 + }, + { + "epoch": 4.52, + "learning_rate": 1.8264980196754824e-05, + "loss": 1.026, + "step": 11800 + }, + { + "epoch": 4.53, + "learning_rate": 1.825220390954389e-05, + "loss": 1.061, + "step": 11810 + }, + { + "epoch": 4.53, + "learning_rate": 1.823942762233295e-05, + "loss": 1.0452, + "step": 11820 + }, + { + "epoch": 4.53, + "learning_rate": 1.8226651335122015e-05, + "loss": 0.9118, + "step": 11830 + }, + { + "epoch": 4.54, + "learning_rate": 1.8213875047911077e-05, + "loss": 0.9805, + "step": 11840 + }, + { + "epoch": 4.54, + "learning_rate": 1.8201098760700143e-05, + "loss": 0.9626, + "step": 11850 + }, + { + "epoch": 4.55, + "learning_rate": 1.8188322473489205e-05, + "loss": 1.053, + "step": 11860 + }, + { + "epoch": 4.55, + "learning_rate": 1.8175546186278268e-05, + "loss": 1.0135, + "step": 11870 + }, + { + "epoch": 4.55, + "learning_rate": 1.816276989906733e-05, + "loss": 0.9814, + "step": 11880 + }, + { + "epoch": 4.56, + "learning_rate": 1.8149993611856396e-05, + "loss": 0.9771, + "step": 11890 + }, + { + "epoch": 4.56, + "learning_rate": 1.8137217324645458e-05, + "loss": 1.0723, + "step": 11900 + }, + { + "epoch": 4.56, + "learning_rate": 1.8124441037434524e-05, + "loss": 1.0822, + "step": 11910 + }, + { + "epoch": 4.57, + "learning_rate": 1.8111664750223583e-05, + "loss": 0.9721, + "step": 11920 + }, + { + "epoch": 4.57, + "learning_rate": 1.809888846301265e-05, + "loss": 1.0491, + "step": 11930 + }, + { + "epoch": 4.58, + "learning_rate": 1.808611217580171e-05, + "loss": 1.035, + "step": 11940 + }, + { + "epoch": 4.58, + "learning_rate": 1.8073335888590777e-05, + "loss": 0.9814, + "step": 11950 + }, + { + "epoch": 4.58, + "learning_rate": 1.806055960137984e-05, + "loss": 1.0619, + "step": 11960 + }, + { + "epoch": 4.59, + "learning_rate": 1.80477833141689e-05, + "loss": 1.0584, + "step": 11970 + }, + { + "epoch": 4.59, + "learning_rate": 1.8035007026957967e-05, + "loss": 0.8946, + "step": 11980 + }, + { + "epoch": 4.6, + "learning_rate": 1.802223073974703e-05, + "loss": 0.9485, + "step": 11990 + }, + { + "epoch": 4.6, + "learning_rate": 1.8009454452536095e-05, + "loss": 0.9647, + "step": 12000 + }, + { + "epoch": 4.6, + "learning_rate": 1.7996678165325158e-05, + "loss": 0.9895, + "step": 12010 + }, + { + "epoch": 4.61, + "learning_rate": 1.7983901878114223e-05, + "loss": 0.982, + "step": 12020 + }, + { + "epoch": 4.61, + "learning_rate": 1.7971125590903282e-05, + "loss": 0.9681, + "step": 12030 + }, + { + "epoch": 4.61, + "learning_rate": 1.7958349303692348e-05, + "loss": 1.0043, + "step": 12040 + }, + { + "epoch": 4.62, + "learning_rate": 1.794557301648141e-05, + "loss": 1.0314, + "step": 12050 + }, + { + "epoch": 4.62, + "learning_rate": 1.7932796729270476e-05, + "loss": 1.0258, + "step": 12060 + }, + { + "epoch": 4.63, + "learning_rate": 1.792002044205954e-05, + "loss": 1.0732, + "step": 12070 + }, + { + "epoch": 4.63, + "learning_rate": 1.79072441548486e-05, + "loss": 1.0242, + "step": 12080 + }, + { + "epoch": 4.63, + "learning_rate": 1.7894467867637663e-05, + "loss": 0.9567, + "step": 12090 + }, + { + "epoch": 4.64, + "learning_rate": 1.788169158042673e-05, + "loss": 0.9396, + "step": 12100 + }, + { + "epoch": 4.64, + "learning_rate": 1.786891529321579e-05, + "loss": 0.9696, + "step": 12110 + }, + { + "epoch": 4.65, + "learning_rate": 1.7856139006004857e-05, + "loss": 1.0943, + "step": 12120 + }, + { + "epoch": 4.65, + "learning_rate": 1.7843362718793916e-05, + "loss": 0.9693, + "step": 12130 + }, + { + "epoch": 4.65, + "learning_rate": 1.7830586431582982e-05, + "loss": 0.9157, + "step": 12140 + }, + { + "epoch": 4.66, + "learning_rate": 1.7817810144372048e-05, + "loss": 0.9401, + "step": 12150 + }, + { + "epoch": 4.66, + "learning_rate": 1.780503385716111e-05, + "loss": 0.9495, + "step": 12160 + }, + { + "epoch": 4.66, + "learning_rate": 1.7792257569950176e-05, + "loss": 0.936, + "step": 12170 + }, + { + "epoch": 4.67, + "learning_rate": 1.7779481282739235e-05, + "loss": 0.9385, + "step": 12180 + }, + { + "epoch": 4.67, + "learning_rate": 1.77667049955283e-05, + "loss": 0.9262, + "step": 12190 + }, + { + "epoch": 4.68, + "learning_rate": 1.7753928708317363e-05, + "loss": 1.0073, + "step": 12200 + }, + { + "epoch": 4.68, + "learning_rate": 1.774115242110643e-05, + "loss": 1.0286, + "step": 12210 + }, + { + "epoch": 4.68, + "learning_rate": 1.772837613389549e-05, + "loss": 0.9921, + "step": 12220 + }, + { + "epoch": 4.69, + "learning_rate": 1.7715599846684553e-05, + "loss": 0.9697, + "step": 12230 + }, + { + "epoch": 4.69, + "learning_rate": 1.7702823559473616e-05, + "loss": 1.0053, + "step": 12240 + }, + { + "epoch": 4.7, + "learning_rate": 1.769004727226268e-05, + "loss": 1.0966, + "step": 12250 + }, + { + "epoch": 4.7, + "learning_rate": 1.7677270985051744e-05, + "loss": 0.9925, + "step": 12260 + }, + { + "epoch": 4.7, + "learning_rate": 1.766449469784081e-05, + "loss": 1.0647, + "step": 12270 + }, + { + "epoch": 4.71, + "learning_rate": 1.765171841062987e-05, + "loss": 1.0401, + "step": 12280 + }, + { + "epoch": 4.71, + "learning_rate": 1.7638942123418934e-05, + "loss": 0.9786, + "step": 12290 + }, + { + "epoch": 4.71, + "learning_rate": 1.7626165836207997e-05, + "loss": 1.0068, + "step": 12300 + }, + { + "epoch": 4.72, + "learning_rate": 1.7613389548997062e-05, + "loss": 0.999, + "step": 12310 + }, + { + "epoch": 4.72, + "learning_rate": 1.7600613261786125e-05, + "loss": 1.0131, + "step": 12320 + }, + { + "epoch": 4.73, + "learning_rate": 1.758783697457519e-05, + "loss": 0.9801, + "step": 12330 + }, + { + "epoch": 4.73, + "learning_rate": 1.7575060687364253e-05, + "loss": 0.998, + "step": 12340 + }, + { + "epoch": 4.73, + "learning_rate": 1.7562284400153315e-05, + "loss": 0.9733, + "step": 12350 + }, + { + "epoch": 4.74, + "learning_rate": 1.754950811294238e-05, + "loss": 1.005, + "step": 12360 + }, + { + "epoch": 4.74, + "learning_rate": 1.7536731825731443e-05, + "loss": 1.0983, + "step": 12370 + }, + { + "epoch": 4.75, + "learning_rate": 1.752395553852051e-05, + "loss": 0.9449, + "step": 12380 + }, + { + "epoch": 4.75, + "learning_rate": 1.7511179251309568e-05, + "loss": 0.8714, + "step": 12390 + }, + { + "epoch": 4.75, + "learning_rate": 1.7498402964098634e-05, + "loss": 1.0294, + "step": 12400 + }, + { + "epoch": 4.76, + "learning_rate": 1.7485626676887696e-05, + "loss": 0.9287, + "step": 12410 + }, + { + "epoch": 4.76, + "learning_rate": 1.7472850389676762e-05, + "loss": 0.9364, + "step": 12420 + }, + { + "epoch": 4.76, + "learning_rate": 1.7460074102465824e-05, + "loss": 0.9208, + "step": 12430 + }, + { + "epoch": 4.77, + "learning_rate": 1.7447297815254887e-05, + "loss": 0.9774, + "step": 12440 + }, + { + "epoch": 4.77, + "learning_rate": 1.743452152804395e-05, + "loss": 1.0821, + "step": 12450 + }, + { + "epoch": 4.78, + "learning_rate": 1.7421745240833015e-05, + "loss": 0.9437, + "step": 12460 + }, + { + "epoch": 4.78, + "learning_rate": 1.7408968953622077e-05, + "loss": 0.9584, + "step": 12470 + }, + { + "epoch": 4.78, + "learning_rate": 1.7396192666411143e-05, + "loss": 1.0863, + "step": 12480 + }, + { + "epoch": 4.79, + "learning_rate": 1.7383416379200202e-05, + "loss": 0.9668, + "step": 12490 + }, + { + "epoch": 4.79, + "learning_rate": 1.7370640091989268e-05, + "loss": 0.9899, + "step": 12500 + }, + { + "epoch": 4.79, + "learning_rate": 1.7357863804778333e-05, + "loss": 1.0286, + "step": 12510 + }, + { + "epoch": 4.8, + "learning_rate": 1.7345087517567396e-05, + "loss": 1.0362, + "step": 12520 + }, + { + "epoch": 4.8, + "learning_rate": 1.733231123035646e-05, + "loss": 0.9126, + "step": 12530 + }, + { + "epoch": 4.81, + "learning_rate": 1.731953494314552e-05, + "loss": 0.9714, + "step": 12540 + }, + { + "epoch": 4.81, + "learning_rate": 1.7306758655934586e-05, + "loss": 1.002, + "step": 12550 + }, + { + "epoch": 4.81, + "learning_rate": 1.729398236872365e-05, + "loss": 0.9519, + "step": 12560 + }, + { + "epoch": 4.82, + "learning_rate": 1.7281206081512714e-05, + "loss": 0.9881, + "step": 12570 + }, + { + "epoch": 4.82, + "learning_rate": 1.7268429794301777e-05, + "loss": 1.0048, + "step": 12580 + }, + { + "epoch": 4.83, + "learning_rate": 1.725565350709084e-05, + "loss": 0.991, + "step": 12590 + }, + { + "epoch": 4.83, + "learning_rate": 1.72428772198799e-05, + "loss": 1.0719, + "step": 12600 + }, + { + "epoch": 4.83, + "learning_rate": 1.7230100932668967e-05, + "loss": 0.9645, + "step": 12610 + }, + { + "epoch": 4.84, + "learning_rate": 1.721732464545803e-05, + "loss": 0.9867, + "step": 12620 + }, + { + "epoch": 4.84, + "learning_rate": 1.7204548358247095e-05, + "loss": 0.9421, + "step": 12630 + }, + { + "epoch": 4.84, + "learning_rate": 1.7191772071036154e-05, + "loss": 0.8979, + "step": 12640 + }, + { + "epoch": 4.85, + "learning_rate": 1.717899578382522e-05, + "loss": 1.0321, + "step": 12650 + }, + { + "epoch": 4.85, + "learning_rate": 1.7166219496614282e-05, + "loss": 0.993, + "step": 12660 + }, + { + "epoch": 4.86, + "learning_rate": 1.7153443209403348e-05, + "loss": 0.989, + "step": 12670 + }, + { + "epoch": 4.86, + "learning_rate": 1.7140666922192414e-05, + "loss": 0.9692, + "step": 12680 + }, + { + "epoch": 4.86, + "learning_rate": 1.7127890634981476e-05, + "loss": 1.0294, + "step": 12690 + }, + { + "epoch": 4.87, + "learning_rate": 1.711511434777054e-05, + "loss": 0.8949, + "step": 12700 + }, + { + "epoch": 4.87, + "learning_rate": 1.71023380605596e-05, + "loss": 0.9204, + "step": 12710 + }, + { + "epoch": 4.88, + "learning_rate": 1.7089561773348667e-05, + "loss": 0.9863, + "step": 12720 + }, + { + "epoch": 4.88, + "learning_rate": 1.707678548613773e-05, + "loss": 0.9261, + "step": 12730 + }, + { + "epoch": 4.88, + "learning_rate": 1.7064009198926795e-05, + "loss": 1.0644, + "step": 12740 + }, + { + "epoch": 4.89, + "learning_rate": 1.7051232911715854e-05, + "loss": 0.9793, + "step": 12750 + }, + { + "epoch": 4.89, + "learning_rate": 1.703845662450492e-05, + "loss": 0.9588, + "step": 12760 + }, + { + "epoch": 4.89, + "learning_rate": 1.7025680337293982e-05, + "loss": 1.019, + "step": 12770 + }, + { + "epoch": 4.9, + "learning_rate": 1.7012904050083048e-05, + "loss": 1.0621, + "step": 12780 + }, + { + "epoch": 4.9, + "learning_rate": 1.700012776287211e-05, + "loss": 0.9755, + "step": 12790 + }, + { + "epoch": 4.91, + "learning_rate": 1.6987351475661172e-05, + "loss": 0.9292, + "step": 12800 + }, + { + "epoch": 4.91, + "learning_rate": 1.6974575188450235e-05, + "loss": 1.0005, + "step": 12810 + }, + { + "epoch": 4.91, + "learning_rate": 1.69617989012393e-05, + "loss": 1.0226, + "step": 12820 + }, + { + "epoch": 4.92, + "learning_rate": 1.6949022614028363e-05, + "loss": 1.0131, + "step": 12830 + }, + { + "epoch": 4.92, + "learning_rate": 1.693624632681743e-05, + "loss": 0.963, + "step": 12840 + }, + { + "epoch": 4.93, + "learning_rate": 1.692347003960649e-05, + "loss": 1.0655, + "step": 12850 + }, + { + "epoch": 4.93, + "learning_rate": 1.6910693752395553e-05, + "loss": 0.996, + "step": 12860 + }, + { + "epoch": 4.93, + "learning_rate": 1.689791746518462e-05, + "loss": 0.996, + "step": 12870 + }, + { + "epoch": 4.94, + "learning_rate": 1.688514117797368e-05, + "loss": 1.0607, + "step": 12880 + }, + { + "epoch": 4.94, + "learning_rate": 1.6872364890762747e-05, + "loss": 0.9656, + "step": 12890 + }, + { + "epoch": 4.94, + "learning_rate": 1.6859588603551806e-05, + "loss": 0.9606, + "step": 12900 + }, + { + "epoch": 4.95, + "learning_rate": 1.6846812316340872e-05, + "loss": 0.9681, + "step": 12910 + }, + { + "epoch": 4.95, + "learning_rate": 1.6834036029129934e-05, + "loss": 0.9653, + "step": 12920 + }, + { + "epoch": 4.96, + "learning_rate": 1.6821259741919e-05, + "loss": 1.0569, + "step": 12930 + }, + { + "epoch": 4.96, + "learning_rate": 1.6808483454708062e-05, + "loss": 0.9666, + "step": 12940 + }, + { + "epoch": 4.96, + "learning_rate": 1.6795707167497125e-05, + "loss": 1.0277, + "step": 12950 + }, + { + "epoch": 4.97, + "learning_rate": 1.6782930880286187e-05, + "loss": 0.9886, + "step": 12960 + }, + { + "epoch": 4.97, + "learning_rate": 1.6770154593075253e-05, + "loss": 0.9212, + "step": 12970 + }, + { + "epoch": 4.98, + "learning_rate": 1.6757378305864315e-05, + "loss": 0.9798, + "step": 12980 + }, + { + "epoch": 4.98, + "learning_rate": 1.674460201865338e-05, + "loss": 1.0217, + "step": 12990 + }, + { + "epoch": 4.98, + "learning_rate": 1.6731825731442443e-05, + "loss": 1.0463, + "step": 13000 + }, + { + "epoch": 4.99, + "learning_rate": 1.6719049444231506e-05, + "loss": 1.0125, + "step": 13010 + }, + { + "epoch": 4.99, + "learning_rate": 1.670627315702057e-05, + "loss": 0.9281, + "step": 13020 + }, + { + "epoch": 4.99, + "learning_rate": 1.6693496869809634e-05, + "loss": 1.02, + "step": 13030 + }, + { + "epoch": 5.0, + "learning_rate": 1.66807205825987e-05, + "loss": 0.9933, + "step": 13040 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.7321706633447524, + "eval_loss": 0.7696112990379333, + "eval_runtime": 295.2461, + "eval_samples_per_second": 141.384, + "eval_steps_per_second": 70.694, + "step": 13045 + }, + { + "epoch": 5.0, + "learning_rate": 1.6667944295387762e-05, + "loss": 0.9511, + "step": 13050 + }, + { + "epoch": 5.01, + "learning_rate": 1.6655168008176824e-05, + "loss": 0.8864, + "step": 13060 + }, + { + "epoch": 5.01, + "learning_rate": 1.6642391720965887e-05, + "loss": 1.041, + "step": 13070 + }, + { + "epoch": 5.01, + "learning_rate": 1.6629615433754952e-05, + "loss": 0.9998, + "step": 13080 + }, + { + "epoch": 5.02, + "learning_rate": 1.6616839146544015e-05, + "loss": 0.9828, + "step": 13090 + }, + { + "epoch": 5.02, + "learning_rate": 1.660406285933308e-05, + "loss": 0.985, + "step": 13100 + }, + { + "epoch": 5.02, + "learning_rate": 1.659128657212214e-05, + "loss": 1.005, + "step": 13110 + }, + { + "epoch": 5.03, + "learning_rate": 1.6578510284911205e-05, + "loss": 0.8982, + "step": 13120 + }, + { + "epoch": 5.03, + "learning_rate": 1.6565733997700268e-05, + "loss": 0.9311, + "step": 13130 + }, + { + "epoch": 5.04, + "learning_rate": 1.6552957710489333e-05, + "loss": 0.9662, + "step": 13140 + }, + { + "epoch": 5.04, + "learning_rate": 1.6540181423278396e-05, + "loss": 0.8836, + "step": 13150 + }, + { + "epoch": 5.04, + "learning_rate": 1.6527405136067458e-05, + "loss": 0.9816, + "step": 13160 + }, + { + "epoch": 5.05, + "learning_rate": 1.651462884885652e-05, + "loss": 0.9692, + "step": 13170 + }, + { + "epoch": 5.05, + "learning_rate": 1.6501852561645586e-05, + "loss": 0.9834, + "step": 13180 + }, + { + "epoch": 5.06, + "learning_rate": 1.6489076274434652e-05, + "loss": 0.962, + "step": 13190 + }, + { + "epoch": 5.06, + "learning_rate": 1.6476299987223714e-05, + "loss": 0.8939, + "step": 13200 + }, + { + "epoch": 5.06, + "learning_rate": 1.6463523700012777e-05, + "loss": 1.0154, + "step": 13210 + }, + { + "epoch": 5.07, + "learning_rate": 1.645074741280184e-05, + "loss": 1.0056, + "step": 13220 + }, + { + "epoch": 5.07, + "learning_rate": 1.6437971125590905e-05, + "loss": 0.9138, + "step": 13230 + }, + { + "epoch": 5.07, + "learning_rate": 1.6425194838379967e-05, + "loss": 0.8636, + "step": 13240 + }, + { + "epoch": 5.08, + "learning_rate": 1.6412418551169033e-05, + "loss": 0.9425, + "step": 13250 + }, + { + "epoch": 5.08, + "learning_rate": 1.6399642263958092e-05, + "loss": 0.9244, + "step": 13260 + }, + { + "epoch": 5.09, + "learning_rate": 1.6386865976747158e-05, + "loss": 1.0511, + "step": 13270 + }, + { + "epoch": 5.09, + "learning_rate": 1.637408968953622e-05, + "loss": 0.9437, + "step": 13280 + }, + { + "epoch": 5.09, + "learning_rate": 1.6361313402325286e-05, + "loss": 0.9603, + "step": 13290 + }, + { + "epoch": 5.1, + "learning_rate": 1.6348537115114348e-05, + "loss": 0.9806, + "step": 13300 + }, + { + "epoch": 5.1, + "learning_rate": 1.633576082790341e-05, + "loss": 1.0418, + "step": 13310 + }, + { + "epoch": 5.11, + "learning_rate": 1.6322984540692473e-05, + "loss": 0.9977, + "step": 13320 + }, + { + "epoch": 5.11, + "learning_rate": 1.631020825348154e-05, + "loss": 0.9562, + "step": 13330 + }, + { + "epoch": 5.11, + "learning_rate": 1.62974319662706e-05, + "loss": 0.9789, + "step": 13340 + }, + { + "epoch": 5.12, + "learning_rate": 1.6284655679059667e-05, + "loss": 0.981, + "step": 13350 + }, + { + "epoch": 5.12, + "learning_rate": 1.6271879391848732e-05, + "loss": 0.9924, + "step": 13360 + }, + { + "epoch": 5.12, + "learning_rate": 1.625910310463779e-05, + "loss": 0.9103, + "step": 13370 + }, + { + "epoch": 5.13, + "learning_rate": 1.6246326817426857e-05, + "loss": 0.9105, + "step": 13380 + }, + { + "epoch": 5.13, + "learning_rate": 1.623355053021592e-05, + "loss": 0.9018, + "step": 13390 + }, + { + "epoch": 5.14, + "learning_rate": 1.6220774243004985e-05, + "loss": 1.008, + "step": 13400 + }, + { + "epoch": 5.14, + "learning_rate": 1.6207997955794048e-05, + "loss": 0.9163, + "step": 13410 + }, + { + "epoch": 5.14, + "learning_rate": 1.619522166858311e-05, + "loss": 0.9285, + "step": 13420 + }, + { + "epoch": 5.15, + "learning_rate": 1.6182445381372172e-05, + "loss": 1.0056, + "step": 13430 + }, + { + "epoch": 5.15, + "learning_rate": 1.6169669094161238e-05, + "loss": 1.0056, + "step": 13440 + }, + { + "epoch": 5.16, + "learning_rate": 1.61568928069503e-05, + "loss": 1.0101, + "step": 13450 + }, + { + "epoch": 5.16, + "learning_rate": 1.6144116519739366e-05, + "loss": 0.916, + "step": 13460 + }, + { + "epoch": 5.16, + "learning_rate": 1.6131340232528425e-05, + "loss": 0.8751, + "step": 13470 + }, + { + "epoch": 5.17, + "learning_rate": 1.611856394531749e-05, + "loss": 0.9678, + "step": 13480 + }, + { + "epoch": 5.17, + "learning_rate": 1.6105787658106553e-05, + "loss": 0.9592, + "step": 13490 + }, + { + "epoch": 5.17, + "learning_rate": 1.609301137089562e-05, + "loss": 0.9084, + "step": 13500 + }, + { + "epoch": 5.18, + "learning_rate": 1.608023508368468e-05, + "loss": 1.0476, + "step": 13510 + }, + { + "epoch": 5.18, + "learning_rate": 1.6067458796473744e-05, + "loss": 0.9773, + "step": 13520 + }, + { + "epoch": 5.19, + "learning_rate": 1.605468250926281e-05, + "loss": 0.9905, + "step": 13530 + }, + { + "epoch": 5.19, + "learning_rate": 1.6041906222051872e-05, + "loss": 1.0131, + "step": 13540 + }, + { + "epoch": 5.19, + "learning_rate": 1.6029129934840938e-05, + "loss": 0.9017, + "step": 13550 + }, + { + "epoch": 5.2, + "learning_rate": 1.601635364763e-05, + "loss": 0.9386, + "step": 13560 + }, + { + "epoch": 5.2, + "learning_rate": 1.6003577360419062e-05, + "loss": 0.8702, + "step": 13570 + }, + { + "epoch": 5.21, + "learning_rate": 1.5990801073208125e-05, + "loss": 0.9276, + "step": 13580 + }, + { + "epoch": 5.21, + "learning_rate": 1.597802478599719e-05, + "loss": 0.9717, + "step": 13590 + }, + { + "epoch": 5.21, + "learning_rate": 1.5965248498786253e-05, + "loss": 0.9509, + "step": 13600 + }, + { + "epoch": 5.22, + "learning_rate": 1.595247221157532e-05, + "loss": 0.9955, + "step": 13610 + }, + { + "epoch": 5.22, + "learning_rate": 1.5939695924364378e-05, + "loss": 0.9675, + "step": 13620 + }, + { + "epoch": 5.22, + "learning_rate": 1.5926919637153443e-05, + "loss": 0.9306, + "step": 13630 + }, + { + "epoch": 5.23, + "learning_rate": 1.5914143349942506e-05, + "loss": 1.0027, + "step": 13640 + }, + { + "epoch": 5.23, + "learning_rate": 1.590136706273157e-05, + "loss": 0.9495, + "step": 13650 + }, + { + "epoch": 5.24, + "learning_rate": 1.5888590775520634e-05, + "loss": 0.9397, + "step": 13660 + }, + { + "epoch": 5.24, + "learning_rate": 1.5875814488309696e-05, + "loss": 0.9794, + "step": 13670 + }, + { + "epoch": 5.24, + "learning_rate": 1.586303820109876e-05, + "loss": 0.9686, + "step": 13680 + }, + { + "epoch": 5.25, + "learning_rate": 1.5850261913887824e-05, + "loss": 1.0132, + "step": 13690 + }, + { + "epoch": 5.25, + "learning_rate": 1.583748562667689e-05, + "loss": 0.9909, + "step": 13700 + }, + { + "epoch": 5.25, + "learning_rate": 1.5824709339465952e-05, + "loss": 0.9189, + "step": 13710 + }, + { + "epoch": 5.26, + "learning_rate": 1.5811933052255018e-05, + "loss": 0.9516, + "step": 13720 + }, + { + "epoch": 5.26, + "learning_rate": 1.5799156765044077e-05, + "loss": 0.8799, + "step": 13730 + }, + { + "epoch": 5.27, + "learning_rate": 1.5786380477833143e-05, + "loss": 0.9896, + "step": 13740 + }, + { + "epoch": 5.27, + "learning_rate": 1.5773604190622205e-05, + "loss": 0.9109, + "step": 13750 + }, + { + "epoch": 5.27, + "learning_rate": 1.576082790341127e-05, + "loss": 1.0067, + "step": 13760 + }, + { + "epoch": 5.28, + "learning_rate": 1.5748051616200333e-05, + "loss": 0.9225, + "step": 13770 + }, + { + "epoch": 5.28, + "learning_rate": 1.5735275328989396e-05, + "loss": 0.9435, + "step": 13780 + }, + { + "epoch": 5.29, + "learning_rate": 1.5722499041778458e-05, + "loss": 0.9736, + "step": 13790 + }, + { + "epoch": 5.29, + "learning_rate": 1.5709722754567524e-05, + "loss": 0.8851, + "step": 13800 + }, + { + "epoch": 5.29, + "learning_rate": 1.5696946467356586e-05, + "loss": 0.9621, + "step": 13810 + }, + { + "epoch": 5.3, + "learning_rate": 1.5684170180145652e-05, + "loss": 0.9986, + "step": 13820 + }, + { + "epoch": 5.3, + "learning_rate": 1.567139389293471e-05, + "loss": 1.0393, + "step": 13830 + }, + { + "epoch": 5.3, + "learning_rate": 1.5658617605723777e-05, + "loss": 0.9755, + "step": 13840 + }, + { + "epoch": 5.31, + "learning_rate": 1.564584131851284e-05, + "loss": 0.9263, + "step": 13850 + }, + { + "epoch": 5.31, + "learning_rate": 1.5633065031301905e-05, + "loss": 0.9422, + "step": 13860 + }, + { + "epoch": 5.32, + "learning_rate": 1.562028874409097e-05, + "loss": 0.9224, + "step": 13870 + }, + { + "epoch": 5.32, + "learning_rate": 1.560751245688003e-05, + "loss": 0.9424, + "step": 13880 + }, + { + "epoch": 5.32, + "learning_rate": 1.5594736169669095e-05, + "loss": 1.0011, + "step": 13890 + }, + { + "epoch": 5.33, + "learning_rate": 1.5581959882458158e-05, + "loss": 0.9618, + "step": 13900 + }, + { + "epoch": 5.33, + "learning_rate": 1.5569183595247223e-05, + "loss": 0.9749, + "step": 13910 + }, + { + "epoch": 5.34, + "learning_rate": 1.5556407308036286e-05, + "loss": 0.9849, + "step": 13920 + }, + { + "epoch": 5.34, + "learning_rate": 1.5543631020825348e-05, + "loss": 0.9788, + "step": 13930 + }, + { + "epoch": 5.34, + "learning_rate": 1.553085473361441e-05, + "loss": 1.0272, + "step": 13940 + }, + { + "epoch": 5.35, + "learning_rate": 1.5518078446403476e-05, + "loss": 0.9311, + "step": 13950 + }, + { + "epoch": 5.35, + "learning_rate": 1.550530215919254e-05, + "loss": 0.9522, + "step": 13960 + }, + { + "epoch": 5.35, + "learning_rate": 1.5492525871981604e-05, + "loss": 0.9477, + "step": 13970 + }, + { + "epoch": 5.36, + "learning_rate": 1.5479749584770663e-05, + "loss": 1.0389, + "step": 13980 + }, + { + "epoch": 5.36, + "learning_rate": 1.546697329755973e-05, + "loss": 1.0002, + "step": 13990 + }, + { + "epoch": 5.37, + "learning_rate": 1.545419701034879e-05, + "loss": 0.9685, + "step": 14000 + }, + { + "epoch": 5.37, + "learning_rate": 1.5441420723137857e-05, + "loss": 1.0261, + "step": 14010 + }, + { + "epoch": 5.37, + "learning_rate": 1.542864443592692e-05, + "loss": 0.9437, + "step": 14020 + }, + { + "epoch": 5.38, + "learning_rate": 1.5415868148715985e-05, + "loss": 0.9554, + "step": 14030 + }, + { + "epoch": 5.38, + "learning_rate": 1.5403091861505048e-05, + "loss": 0.9672, + "step": 14040 + }, + { + "epoch": 5.39, + "learning_rate": 1.539031557429411e-05, + "loss": 1.0075, + "step": 14050 + }, + { + "epoch": 5.39, + "learning_rate": 1.5377539287083176e-05, + "loss": 0.9677, + "step": 14060 + }, + { + "epoch": 5.39, + "learning_rate": 1.5364762999872238e-05, + "loss": 0.9479, + "step": 14070 + }, + { + "epoch": 5.4, + "learning_rate": 1.5351986712661304e-05, + "loss": 0.8863, + "step": 14080 + }, + { + "epoch": 5.4, + "learning_rate": 1.5339210425450363e-05, + "loss": 1.02, + "step": 14090 + }, + { + "epoch": 5.4, + "learning_rate": 1.532643413823943e-05, + "loss": 1.0428, + "step": 14100 + }, + { + "epoch": 5.41, + "learning_rate": 1.531365785102849e-05, + "loss": 1.0251, + "step": 14110 + }, + { + "epoch": 5.41, + "learning_rate": 1.5300881563817557e-05, + "loss": 0.9373, + "step": 14120 + }, + { + "epoch": 5.42, + "learning_rate": 1.528810527660662e-05, + "loss": 0.9771, + "step": 14130 + }, + { + "epoch": 5.42, + "learning_rate": 1.527532898939568e-05, + "loss": 0.8552, + "step": 14140 + }, + { + "epoch": 5.42, + "learning_rate": 1.5262552702184744e-05, + "loss": 1.0306, + "step": 14150 + }, + { + "epoch": 5.43, + "learning_rate": 1.524977641497381e-05, + "loss": 1.0745, + "step": 14160 + }, + { + "epoch": 5.43, + "learning_rate": 1.5237000127762872e-05, + "loss": 0.9924, + "step": 14170 + }, + { + "epoch": 5.44, + "learning_rate": 1.5224223840551936e-05, + "loss": 1.0457, + "step": 14180 + }, + { + "epoch": 5.44, + "learning_rate": 1.5211447553340998e-05, + "loss": 0.9553, + "step": 14190 + }, + { + "epoch": 5.44, + "learning_rate": 1.5198671266130062e-05, + "loss": 0.9672, + "step": 14200 + }, + { + "epoch": 5.45, + "learning_rate": 1.5185894978919128e-05, + "loss": 0.9946, + "step": 14210 + }, + { + "epoch": 5.45, + "learning_rate": 1.517311869170819e-05, + "loss": 0.9928, + "step": 14220 + }, + { + "epoch": 5.45, + "learning_rate": 1.5160342404497255e-05, + "loss": 0.9947, + "step": 14230 + }, + { + "epoch": 5.46, + "learning_rate": 1.5147566117286317e-05, + "loss": 1.0306, + "step": 14240 + }, + { + "epoch": 5.46, + "learning_rate": 1.5134789830075381e-05, + "loss": 1.0141, + "step": 14250 + }, + { + "epoch": 5.47, + "learning_rate": 1.5122013542864443e-05, + "loss": 0.9512, + "step": 14260 + }, + { + "epoch": 5.47, + "learning_rate": 1.5109237255653509e-05, + "loss": 0.9386, + "step": 14270 + }, + { + "epoch": 5.47, + "learning_rate": 1.509646096844257e-05, + "loss": 0.9933, + "step": 14280 + }, + { + "epoch": 5.48, + "learning_rate": 1.5083684681231635e-05, + "loss": 0.9377, + "step": 14290 + }, + { + "epoch": 5.48, + "learning_rate": 1.5070908394020698e-05, + "loss": 0.9007, + "step": 14300 + }, + { + "epoch": 5.48, + "learning_rate": 1.5058132106809762e-05, + "loss": 0.9382, + "step": 14310 + }, + { + "epoch": 5.49, + "learning_rate": 1.5045355819598824e-05, + "loss": 0.8972, + "step": 14320 + }, + { + "epoch": 5.49, + "learning_rate": 1.5032579532387888e-05, + "loss": 1.0583, + "step": 14330 + }, + { + "epoch": 5.5, + "learning_rate": 1.501980324517695e-05, + "loss": 0.992, + "step": 14340 + }, + { + "epoch": 5.5, + "learning_rate": 1.5007026957966016e-05, + "loss": 0.9516, + "step": 14350 + }, + { + "epoch": 5.5, + "learning_rate": 1.4994250670755079e-05, + "loss": 0.9898, + "step": 14360 + }, + { + "epoch": 5.51, + "learning_rate": 1.4981474383544143e-05, + "loss": 0.9197, + "step": 14370 + }, + { + "epoch": 5.51, + "learning_rate": 1.4968698096333205e-05, + "loss": 0.9178, + "step": 14380 + }, + { + "epoch": 5.52, + "learning_rate": 1.495592180912227e-05, + "loss": 0.9039, + "step": 14390 + }, + { + "epoch": 5.52, + "learning_rate": 1.4943145521911333e-05, + "loss": 0.9795, + "step": 14400 + }, + { + "epoch": 5.52, + "learning_rate": 1.4930369234700396e-05, + "loss": 1.0887, + "step": 14410 + }, + { + "epoch": 5.53, + "learning_rate": 1.491759294748946e-05, + "loss": 0.9682, + "step": 14420 + }, + { + "epoch": 5.53, + "learning_rate": 1.4904816660278524e-05, + "loss": 1.0227, + "step": 14430 + }, + { + "epoch": 5.53, + "learning_rate": 1.4892040373067586e-05, + "loss": 0.9106, + "step": 14440 + }, + { + "epoch": 5.54, + "learning_rate": 1.487926408585665e-05, + "loss": 0.9762, + "step": 14450 + }, + { + "epoch": 5.54, + "learning_rate": 1.4866487798645713e-05, + "loss": 0.8884, + "step": 14460 + }, + { + "epoch": 5.55, + "learning_rate": 1.4853711511434778e-05, + "loss": 0.9572, + "step": 14470 + }, + { + "epoch": 5.55, + "learning_rate": 1.4840935224223842e-05, + "loss": 0.965, + "step": 14480 + }, + { + "epoch": 5.55, + "learning_rate": 1.4828158937012905e-05, + "loss": 0.9821, + "step": 14490 + }, + { + "epoch": 5.56, + "learning_rate": 1.4815382649801969e-05, + "loss": 1.0218, + "step": 14500 + }, + { + "epoch": 5.56, + "learning_rate": 1.4802606362591031e-05, + "loss": 0.9829, + "step": 14510 + }, + { + "epoch": 5.57, + "learning_rate": 1.4789830075380095e-05, + "loss": 0.9546, + "step": 14520 + }, + { + "epoch": 5.57, + "learning_rate": 1.477705378816916e-05, + "loss": 0.9789, + "step": 14530 + }, + { + "epoch": 5.57, + "learning_rate": 1.4764277500958222e-05, + "loss": 0.9226, + "step": 14540 + }, + { + "epoch": 5.58, + "learning_rate": 1.4751501213747286e-05, + "loss": 0.9627, + "step": 14550 + }, + { + "epoch": 5.58, + "learning_rate": 1.4738724926536348e-05, + "loss": 0.9271, + "step": 14560 + }, + { + "epoch": 5.58, + "learning_rate": 1.4725948639325412e-05, + "loss": 0.9789, + "step": 14570 + }, + { + "epoch": 5.59, + "learning_rate": 1.4713172352114476e-05, + "loss": 0.962, + "step": 14580 + }, + { + "epoch": 5.59, + "learning_rate": 1.4700396064903539e-05, + "loss": 0.9724, + "step": 14590 + }, + { + "epoch": 5.6, + "learning_rate": 1.4687619777692603e-05, + "loss": 0.9596, + "step": 14600 + }, + { + "epoch": 5.6, + "learning_rate": 1.4674843490481667e-05, + "loss": 0.9355, + "step": 14610 + }, + { + "epoch": 5.6, + "learning_rate": 1.4662067203270729e-05, + "loss": 0.9744, + "step": 14620 + }, + { + "epoch": 5.61, + "learning_rate": 1.4649290916059793e-05, + "loss": 0.9599, + "step": 14630 + }, + { + "epoch": 5.61, + "learning_rate": 1.4636514628848857e-05, + "loss": 0.9202, + "step": 14640 + }, + { + "epoch": 5.62, + "learning_rate": 1.4623738341637921e-05, + "loss": 0.9353, + "step": 14650 + }, + { + "epoch": 5.62, + "learning_rate": 1.4610962054426985e-05, + "loss": 1.0016, + "step": 14660 + }, + { + "epoch": 5.62, + "learning_rate": 1.4598185767216048e-05, + "loss": 0.9177, + "step": 14670 + }, + { + "epoch": 5.63, + "learning_rate": 1.4585409480005112e-05, + "loss": 0.8621, + "step": 14680 + }, + { + "epoch": 5.63, + "learning_rate": 1.4572633192794174e-05, + "loss": 0.9553, + "step": 14690 + }, + { + "epoch": 5.63, + "learning_rate": 1.4559856905583238e-05, + "loss": 0.9857, + "step": 14700 + }, + { + "epoch": 5.64, + "learning_rate": 1.4547080618372302e-05, + "loss": 0.9332, + "step": 14710 + }, + { + "epoch": 5.64, + "learning_rate": 1.4534304331161364e-05, + "loss": 0.9719, + "step": 14720 + }, + { + "epoch": 5.65, + "learning_rate": 1.4521528043950429e-05, + "loss": 0.9366, + "step": 14730 + }, + { + "epoch": 5.65, + "learning_rate": 1.4508751756739491e-05, + "loss": 0.8713, + "step": 14740 + }, + { + "epoch": 5.65, + "learning_rate": 1.4495975469528555e-05, + "loss": 0.925, + "step": 14750 + }, + { + "epoch": 5.66, + "learning_rate": 1.4483199182317619e-05, + "loss": 0.9503, + "step": 14760 + }, + { + "epoch": 5.66, + "learning_rate": 1.4470422895106681e-05, + "loss": 0.9484, + "step": 14770 + }, + { + "epoch": 5.67, + "learning_rate": 1.4457646607895745e-05, + "loss": 0.9774, + "step": 14780 + }, + { + "epoch": 5.67, + "learning_rate": 1.444487032068481e-05, + "loss": 0.9409, + "step": 14790 + }, + { + "epoch": 5.67, + "learning_rate": 1.4432094033473872e-05, + "loss": 0.9403, + "step": 14800 + }, + { + "epoch": 5.68, + "learning_rate": 1.4419317746262936e-05, + "loss": 0.8935, + "step": 14810 + }, + { + "epoch": 5.68, + "learning_rate": 1.4406541459052e-05, + "loss": 0.9192, + "step": 14820 + }, + { + "epoch": 5.68, + "learning_rate": 1.4393765171841064e-05, + "loss": 0.9793, + "step": 14830 + }, + { + "epoch": 5.69, + "learning_rate": 1.4380988884630128e-05, + "loss": 0.9864, + "step": 14840 + }, + { + "epoch": 5.69, + "learning_rate": 1.436821259741919e-05, + "loss": 0.947, + "step": 14850 + }, + { + "epoch": 5.7, + "learning_rate": 1.4355436310208255e-05, + "loss": 0.8894, + "step": 14860 + }, + { + "epoch": 5.7, + "learning_rate": 1.4342660022997317e-05, + "loss": 0.9636, + "step": 14870 + }, + { + "epoch": 5.7, + "learning_rate": 1.4329883735786381e-05, + "loss": 1.0282, + "step": 14880 + }, + { + "epoch": 5.71, + "learning_rate": 1.4317107448575445e-05, + "loss": 0.8897, + "step": 14890 + }, + { + "epoch": 5.71, + "learning_rate": 1.4304331161364507e-05, + "loss": 0.9213, + "step": 14900 + }, + { + "epoch": 5.71, + "learning_rate": 1.4291554874153571e-05, + "loss": 0.984, + "step": 14910 + }, + { + "epoch": 5.72, + "learning_rate": 1.4278778586942634e-05, + "loss": 0.9425, + "step": 14920 + }, + { + "epoch": 5.72, + "learning_rate": 1.4266002299731698e-05, + "loss": 0.8759, + "step": 14930 + }, + { + "epoch": 5.73, + "learning_rate": 1.4253226012520762e-05, + "loss": 1.0491, + "step": 14940 + }, + { + "epoch": 5.73, + "learning_rate": 1.4240449725309824e-05, + "loss": 0.9523, + "step": 14950 + }, + { + "epoch": 5.73, + "learning_rate": 1.4227673438098888e-05, + "loss": 0.9773, + "step": 14960 + }, + { + "epoch": 5.74, + "learning_rate": 1.4214897150887952e-05, + "loss": 1.0238, + "step": 14970 + }, + { + "epoch": 5.74, + "learning_rate": 1.4202120863677015e-05, + "loss": 0.8875, + "step": 14980 + }, + { + "epoch": 5.75, + "learning_rate": 1.418934457646608e-05, + "loss": 0.9249, + "step": 14990 + }, + { + "epoch": 5.75, + "learning_rate": 1.4176568289255143e-05, + "loss": 0.931, + "step": 15000 + }, + { + "epoch": 5.75, + "learning_rate": 1.4163792002044207e-05, + "loss": 0.9782, + "step": 15010 + }, + { + "epoch": 5.76, + "learning_rate": 1.4151015714833271e-05, + "loss": 0.9454, + "step": 15020 + }, + { + "epoch": 5.76, + "learning_rate": 1.4138239427622333e-05, + "loss": 0.985, + "step": 15030 + }, + { + "epoch": 5.76, + "learning_rate": 1.4125463140411397e-05, + "loss": 0.9814, + "step": 15040 + }, + { + "epoch": 5.77, + "learning_rate": 1.411268685320046e-05, + "loss": 0.994, + "step": 15050 + }, + { + "epoch": 5.77, + "learning_rate": 1.4099910565989524e-05, + "loss": 0.981, + "step": 15060 + }, + { + "epoch": 5.78, + "learning_rate": 1.4087134278778588e-05, + "loss": 0.8869, + "step": 15070 + }, + { + "epoch": 5.78, + "learning_rate": 1.407435799156765e-05, + "loss": 0.8948, + "step": 15080 + }, + { + "epoch": 5.78, + "learning_rate": 1.4061581704356714e-05, + "loss": 1.0393, + "step": 15090 + }, + { + "epoch": 5.79, + "learning_rate": 1.4048805417145778e-05, + "loss": 0.9359, + "step": 15100 + }, + { + "epoch": 5.79, + "learning_rate": 1.403602912993484e-05, + "loss": 0.9872, + "step": 15110 + }, + { + "epoch": 5.8, + "learning_rate": 1.4023252842723905e-05, + "loss": 0.9779, + "step": 15120 + }, + { + "epoch": 5.8, + "learning_rate": 1.4010476555512967e-05, + "loss": 1.022, + "step": 15130 + }, + { + "epoch": 5.8, + "learning_rate": 1.3997700268302031e-05, + "loss": 0.9735, + "step": 15140 + }, + { + "epoch": 5.81, + "learning_rate": 1.3984923981091095e-05, + "loss": 0.9278, + "step": 15150 + }, + { + "epoch": 5.81, + "learning_rate": 1.397214769388016e-05, + "loss": 0.9582, + "step": 15160 + }, + { + "epoch": 5.81, + "learning_rate": 1.3959371406669223e-05, + "loss": 0.9715, + "step": 15170 + }, + { + "epoch": 5.82, + "learning_rate": 1.3946595119458286e-05, + "loss": 0.9954, + "step": 15180 + }, + { + "epoch": 5.82, + "learning_rate": 1.393381883224735e-05, + "loss": 0.9101, + "step": 15190 + }, + { + "epoch": 5.83, + "learning_rate": 1.3921042545036414e-05, + "loss": 0.8907, + "step": 15200 + }, + { + "epoch": 5.83, + "learning_rate": 1.3908266257825476e-05, + "loss": 0.896, + "step": 15210 + }, + { + "epoch": 5.83, + "learning_rate": 1.389548997061454e-05, + "loss": 1.0151, + "step": 15220 + }, + { + "epoch": 5.84, + "learning_rate": 1.3882713683403603e-05, + "loss": 0.9799, + "step": 15230 + }, + { + "epoch": 5.84, + "learning_rate": 1.3869937396192667e-05, + "loss": 0.9436, + "step": 15240 + }, + { + "epoch": 5.85, + "learning_rate": 1.385716110898173e-05, + "loss": 0.9756, + "step": 15250 + }, + { + "epoch": 5.85, + "learning_rate": 1.3844384821770793e-05, + "loss": 0.9419, + "step": 15260 + }, + { + "epoch": 5.85, + "learning_rate": 1.3831608534559857e-05, + "loss": 0.9821, + "step": 15270 + }, + { + "epoch": 5.86, + "learning_rate": 1.3818832247348921e-05, + "loss": 0.9894, + "step": 15280 + }, + { + "epoch": 5.86, + "learning_rate": 1.3806055960137984e-05, + "loss": 0.9639, + "step": 15290 + }, + { + "epoch": 5.86, + "learning_rate": 1.3793279672927048e-05, + "loss": 0.9347, + "step": 15300 + }, + { + "epoch": 5.87, + "learning_rate": 1.378050338571611e-05, + "loss": 0.9394, + "step": 15310 + }, + { + "epoch": 5.87, + "learning_rate": 1.3767727098505174e-05, + "loss": 0.9341, + "step": 15320 + }, + { + "epoch": 5.88, + "learning_rate": 1.375495081129424e-05, + "loss": 0.9435, + "step": 15330 + }, + { + "epoch": 5.88, + "learning_rate": 1.3742174524083302e-05, + "loss": 0.9809, + "step": 15340 + }, + { + "epoch": 5.88, + "learning_rate": 1.3729398236872366e-05, + "loss": 0.9137, + "step": 15350 + }, + { + "epoch": 5.89, + "learning_rate": 1.3716621949661429e-05, + "loss": 0.9425, + "step": 15360 + }, + { + "epoch": 5.89, + "learning_rate": 1.3703845662450493e-05, + "loss": 0.9033, + "step": 15370 + }, + { + "epoch": 5.89, + "learning_rate": 1.3691069375239557e-05, + "loss": 0.9462, + "step": 15380 + }, + { + "epoch": 5.9, + "learning_rate": 1.3678293088028619e-05, + "loss": 0.9076, + "step": 15390 + }, + { + "epoch": 5.9, + "learning_rate": 1.3665516800817683e-05, + "loss": 0.9702, + "step": 15400 + }, + { + "epoch": 5.91, + "learning_rate": 1.3652740513606745e-05, + "loss": 0.9613, + "step": 15410 + }, + { + "epoch": 5.91, + "learning_rate": 1.363996422639581e-05, + "loss": 0.9325, + "step": 15420 + }, + { + "epoch": 5.91, + "learning_rate": 1.3627187939184874e-05, + "loss": 0.9128, + "step": 15430 + }, + { + "epoch": 5.92, + "learning_rate": 1.3614411651973936e-05, + "loss": 0.9716, + "step": 15440 + }, + { + "epoch": 5.92, + "learning_rate": 1.3601635364763e-05, + "loss": 0.912, + "step": 15450 + }, + { + "epoch": 5.93, + "learning_rate": 1.3588859077552064e-05, + "loss": 0.8953, + "step": 15460 + }, + { + "epoch": 5.93, + "learning_rate": 1.3576082790341126e-05, + "loss": 0.9225, + "step": 15470 + }, + { + "epoch": 5.93, + "learning_rate": 1.356330650313019e-05, + "loss": 1.0659, + "step": 15480 + }, + { + "epoch": 5.94, + "learning_rate": 1.3550530215919253e-05, + "loss": 0.9815, + "step": 15490 + }, + { + "epoch": 5.94, + "learning_rate": 1.3537753928708319e-05, + "loss": 0.9231, + "step": 15500 + }, + { + "epoch": 5.94, + "learning_rate": 1.3524977641497383e-05, + "loss": 0.9542, + "step": 15510 + }, + { + "epoch": 5.95, + "learning_rate": 1.3512201354286445e-05, + "loss": 0.8962, + "step": 15520 + }, + { + "epoch": 5.95, + "learning_rate": 1.3499425067075509e-05, + "loss": 0.976, + "step": 15530 + }, + { + "epoch": 5.96, + "learning_rate": 1.3486648779864571e-05, + "loss": 0.9271, + "step": 15540 + }, + { + "epoch": 5.96, + "learning_rate": 1.3473872492653635e-05, + "loss": 0.9489, + "step": 15550 + }, + { + "epoch": 5.96, + "learning_rate": 1.34610962054427e-05, + "loss": 0.9426, + "step": 15560 + }, + { + "epoch": 5.97, + "learning_rate": 1.3448319918231762e-05, + "loss": 0.983, + "step": 15570 + }, + { + "epoch": 5.97, + "learning_rate": 1.3435543631020826e-05, + "loss": 0.9935, + "step": 15580 + }, + { + "epoch": 5.98, + "learning_rate": 1.3422767343809888e-05, + "loss": 0.9129, + "step": 15590 + }, + { + "epoch": 5.98, + "learning_rate": 1.3409991056598952e-05, + "loss": 0.949, + "step": 15600 + }, + { + "epoch": 5.98, + "learning_rate": 1.3397214769388016e-05, + "loss": 1.095, + "step": 15610 + }, + { + "epoch": 5.99, + "learning_rate": 1.3384438482177079e-05, + "loss": 0.8744, + "step": 15620 + }, + { + "epoch": 5.99, + "learning_rate": 1.3371662194966143e-05, + "loss": 0.9047, + "step": 15630 + }, + { + "epoch": 5.99, + "learning_rate": 1.3358885907755207e-05, + "loss": 0.9946, + "step": 15640 + }, + { + "epoch": 6.0, + "learning_rate": 1.334610962054427e-05, + "loss": 0.9818, + "step": 15650 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.7412021177203364, + "eval_loss": 0.7523965239524841, + "eval_runtime": 305.2278, + "eval_samples_per_second": 136.76, + "eval_steps_per_second": 68.382, + "step": 15654 + }, + { + "epoch": 6.0, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.9586, + "step": 15660 + }, + { + "epoch": 6.01, + "learning_rate": 1.3320557046122397e-05, + "loss": 0.8459, + "step": 15670 + }, + { + "epoch": 6.01, + "learning_rate": 1.3307780758911461e-05, + "loss": 0.8638, + "step": 15680 + }, + { + "epoch": 6.01, + "learning_rate": 1.3295004471700525e-05, + "loss": 0.8783, + "step": 15690 + }, + { + "epoch": 6.02, + "learning_rate": 1.3282228184489588e-05, + "loss": 0.9379, + "step": 15700 + }, + { + "epoch": 6.02, + "learning_rate": 1.3269451897278652e-05, + "loss": 0.8332, + "step": 15710 + }, + { + "epoch": 6.03, + "learning_rate": 1.3256675610067714e-05, + "loss": 1.028, + "step": 15720 + }, + { + "epoch": 6.03, + "learning_rate": 1.3243899322856778e-05, + "loss": 0.8896, + "step": 15730 + }, + { + "epoch": 6.03, + "learning_rate": 1.3231123035645842e-05, + "loss": 0.9274, + "step": 15740 + }, + { + "epoch": 6.04, + "learning_rate": 1.3218346748434905e-05, + "loss": 0.9472, + "step": 15750 + }, + { + "epoch": 6.04, + "learning_rate": 1.3205570461223969e-05, + "loss": 0.9594, + "step": 15760 + }, + { + "epoch": 6.04, + "learning_rate": 1.3192794174013033e-05, + "loss": 0.9271, + "step": 15770 + }, + { + "epoch": 6.05, + "learning_rate": 1.3180017886802095e-05, + "loss": 0.943, + "step": 15780 + }, + { + "epoch": 6.05, + "learning_rate": 1.316724159959116e-05, + "loss": 0.9039, + "step": 15790 + }, + { + "epoch": 6.06, + "learning_rate": 1.3154465312380222e-05, + "loss": 1.0425, + "step": 15800 + }, + { + "epoch": 6.06, + "learning_rate": 1.3141689025169286e-05, + "loss": 1.0, + "step": 15810 + }, + { + "epoch": 6.06, + "learning_rate": 1.312891273795835e-05, + "loss": 0.9602, + "step": 15820 + }, + { + "epoch": 6.07, + "learning_rate": 1.3116136450747412e-05, + "loss": 0.955, + "step": 15830 + }, + { + "epoch": 6.07, + "learning_rate": 1.3103360163536476e-05, + "loss": 0.9675, + "step": 15840 + }, + { + "epoch": 6.08, + "learning_rate": 1.309058387632554e-05, + "loss": 0.882, + "step": 15850 + }, + { + "epoch": 6.08, + "learning_rate": 1.3077807589114604e-05, + "loss": 0.9206, + "step": 15860 + }, + { + "epoch": 6.08, + "learning_rate": 1.306630893062476e-05, + "loss": 0.9227, + "step": 15870 + }, + { + "epoch": 6.09, + "learning_rate": 1.3053532643413823e-05, + "loss": 0.9688, + "step": 15880 + }, + { + "epoch": 6.09, + "learning_rate": 1.3040756356202887e-05, + "loss": 0.914, + "step": 15890 + }, + { + "epoch": 6.09, + "learning_rate": 1.3027980068991952e-05, + "loss": 0.9487, + "step": 15900 + }, + { + "epoch": 6.1, + "learning_rate": 1.3015203781781016e-05, + "loss": 0.928, + "step": 15910 + }, + { + "epoch": 6.1, + "learning_rate": 1.300242749457008e-05, + "loss": 1.0184, + "step": 15920 + }, + { + "epoch": 6.11, + "learning_rate": 1.2989651207359142e-05, + "loss": 0.9388, + "step": 15930 + }, + { + "epoch": 6.11, + "learning_rate": 1.2976874920148206e-05, + "loss": 0.9807, + "step": 15940 + }, + { + "epoch": 6.11, + "learning_rate": 1.2964098632937268e-05, + "loss": 0.9869, + "step": 15950 + }, + { + "epoch": 6.12, + "learning_rate": 1.2951322345726332e-05, + "loss": 0.9385, + "step": 15960 + }, + { + "epoch": 6.12, + "learning_rate": 1.2938546058515397e-05, + "loss": 0.9272, + "step": 15970 + }, + { + "epoch": 6.12, + "learning_rate": 1.2925769771304459e-05, + "loss": 0.915, + "step": 15980 + }, + { + "epoch": 6.13, + "learning_rate": 1.2912993484093523e-05, + "loss": 0.9132, + "step": 15990 + }, + { + "epoch": 6.13, + "learning_rate": 1.2900217196882585e-05, + "loss": 0.8885, + "step": 16000 + }, + { + "epoch": 6.14, + "learning_rate": 1.288744090967165e-05, + "loss": 0.9054, + "step": 16010 + }, + { + "epoch": 6.14, + "learning_rate": 1.2874664622460713e-05, + "loss": 0.8998, + "step": 16020 + }, + { + "epoch": 6.14, + "learning_rate": 1.2861888335249776e-05, + "loss": 0.8836, + "step": 16030 + }, + { + "epoch": 6.15, + "learning_rate": 1.284911204803884e-05, + "loss": 0.9405, + "step": 16040 + }, + { + "epoch": 6.15, + "learning_rate": 1.2836335760827902e-05, + "loss": 0.9278, + "step": 16050 + }, + { + "epoch": 6.16, + "learning_rate": 1.2823559473616966e-05, + "loss": 0.9781, + "step": 16060 + }, + { + "epoch": 6.16, + "learning_rate": 1.2810783186406032e-05, + "loss": 0.976, + "step": 16070 + }, + { + "epoch": 6.16, + "learning_rate": 1.2798006899195094e-05, + "loss": 0.9477, + "step": 16080 + }, + { + "epoch": 6.17, + "learning_rate": 1.2785230611984158e-05, + "loss": 0.9721, + "step": 16090 + }, + { + "epoch": 6.17, + "learning_rate": 1.2772454324773223e-05, + "loss": 0.9329, + "step": 16100 + }, + { + "epoch": 6.17, + "learning_rate": 1.2759678037562285e-05, + "loss": 0.9914, + "step": 16110 + }, + { + "epoch": 6.18, + "learning_rate": 1.2746901750351349e-05, + "loss": 1.0077, + "step": 16120 + }, + { + "epoch": 6.18, + "learning_rate": 1.2734125463140411e-05, + "loss": 0.9494, + "step": 16130 + }, + { + "epoch": 6.19, + "learning_rate": 1.2721349175929475e-05, + "loss": 0.8976, + "step": 16140 + }, + { + "epoch": 6.19, + "learning_rate": 1.270857288871854e-05, + "loss": 0.9463, + "step": 16150 + }, + { + "epoch": 6.19, + "learning_rate": 1.2695796601507602e-05, + "loss": 0.9427, + "step": 16160 + }, + { + "epoch": 6.2, + "learning_rate": 1.2683020314296666e-05, + "loss": 0.9242, + "step": 16170 + }, + { + "epoch": 6.2, + "learning_rate": 1.2670244027085728e-05, + "loss": 0.9318, + "step": 16180 + }, + { + "epoch": 6.21, + "learning_rate": 1.2657467739874792e-05, + "loss": 0.8956, + "step": 16190 + }, + { + "epoch": 6.21, + "learning_rate": 1.2644691452663856e-05, + "loss": 0.9115, + "step": 16200 + }, + { + "epoch": 6.21, + "learning_rate": 1.2631915165452919e-05, + "loss": 0.9006, + "step": 16210 + }, + { + "epoch": 6.22, + "learning_rate": 1.2619138878241983e-05, + "loss": 0.9181, + "step": 16220 + }, + { + "epoch": 6.22, + "learning_rate": 1.2606362591031047e-05, + "loss": 0.9392, + "step": 16230 + }, + { + "epoch": 6.22, + "learning_rate": 1.259358630382011e-05, + "loss": 0.9432, + "step": 16240 + }, + { + "epoch": 6.23, + "learning_rate": 1.2580810016609175e-05, + "loss": 0.9381, + "step": 16250 + }, + { + "epoch": 6.23, + "learning_rate": 1.2568033729398237e-05, + "loss": 0.8903, + "step": 16260 + }, + { + "epoch": 6.24, + "learning_rate": 1.2555257442187301e-05, + "loss": 0.8658, + "step": 16270 + }, + { + "epoch": 6.24, + "learning_rate": 1.2542481154976365e-05, + "loss": 0.9013, + "step": 16280 + }, + { + "epoch": 6.24, + "learning_rate": 1.2529704867765428e-05, + "loss": 0.8659, + "step": 16290 + }, + { + "epoch": 6.25, + "learning_rate": 1.2516928580554492e-05, + "loss": 0.8926, + "step": 16300 + }, + { + "epoch": 6.25, + "learning_rate": 1.2504152293343554e-05, + "loss": 0.9089, + "step": 16310 + }, + { + "epoch": 6.26, + "learning_rate": 1.2491376006132618e-05, + "loss": 0.9359, + "step": 16320 + }, + { + "epoch": 6.26, + "learning_rate": 1.2478599718921682e-05, + "loss": 0.8845, + "step": 16330 + }, + { + "epoch": 6.26, + "learning_rate": 1.2465823431710745e-05, + "loss": 0.9725, + "step": 16340 + }, + { + "epoch": 6.27, + "learning_rate": 1.2453047144499809e-05, + "loss": 0.9351, + "step": 16350 + }, + { + "epoch": 6.27, + "learning_rate": 1.2440270857288871e-05, + "loss": 0.8831, + "step": 16360 + }, + { + "epoch": 6.27, + "learning_rate": 1.2427494570077935e-05, + "loss": 0.9873, + "step": 16370 + }, + { + "epoch": 6.28, + "learning_rate": 1.2414718282867e-05, + "loss": 0.9707, + "step": 16380 + }, + { + "epoch": 6.28, + "learning_rate": 1.2401941995656062e-05, + "loss": 0.8314, + "step": 16390 + }, + { + "epoch": 6.29, + "learning_rate": 1.2389165708445126e-05, + "loss": 0.9669, + "step": 16400 + }, + { + "epoch": 6.29, + "learning_rate": 1.2376389421234191e-05, + "loss": 0.8932, + "step": 16410 + }, + { + "epoch": 6.29, + "learning_rate": 1.2363613134023254e-05, + "loss": 0.9126, + "step": 16420 + }, + { + "epoch": 6.3, + "learning_rate": 1.2350836846812318e-05, + "loss": 0.9265, + "step": 16430 + }, + { + "epoch": 6.3, + "learning_rate": 1.233806055960138e-05, + "loss": 1.0193, + "step": 16440 + }, + { + "epoch": 6.31, + "learning_rate": 1.2325284272390444e-05, + "loss": 0.9645, + "step": 16450 + }, + { + "epoch": 6.31, + "learning_rate": 1.2312507985179508e-05, + "loss": 0.9286, + "step": 16460 + }, + { + "epoch": 6.31, + "learning_rate": 1.229973169796857e-05, + "loss": 0.9656, + "step": 16470 + }, + { + "epoch": 6.32, + "learning_rate": 1.2286955410757635e-05, + "loss": 0.9616, + "step": 16480 + }, + { + "epoch": 6.32, + "learning_rate": 1.2274179123546697e-05, + "loss": 0.9177, + "step": 16490 + }, + { + "epoch": 6.32, + "learning_rate": 1.2261402836335761e-05, + "loss": 0.9946, + "step": 16500 + }, + { + "epoch": 6.33, + "learning_rate": 1.2248626549124825e-05, + "loss": 0.9088, + "step": 16510 + }, + { + "epoch": 6.33, + "learning_rate": 1.2235850261913887e-05, + "loss": 1.0018, + "step": 16520 + }, + { + "epoch": 6.34, + "learning_rate": 1.2223073974702952e-05, + "loss": 0.9384, + "step": 16530 + }, + { + "epoch": 6.34, + "learning_rate": 1.2210297687492014e-05, + "loss": 0.9727, + "step": 16540 + }, + { + "epoch": 6.34, + "learning_rate": 1.2197521400281078e-05, + "loss": 1.0048, + "step": 16550 + }, + { + "epoch": 6.35, + "learning_rate": 1.2184745113070142e-05, + "loss": 0.9819, + "step": 16560 + }, + { + "epoch": 6.35, + "learning_rate": 1.2171968825859204e-05, + "loss": 0.8762, + "step": 16570 + }, + { + "epoch": 6.35, + "learning_rate": 1.2159192538648268e-05, + "loss": 0.9546, + "step": 16580 + }, + { + "epoch": 6.36, + "learning_rate": 1.2146416251437334e-05, + "loss": 0.9259, + "step": 16590 + }, + { + "epoch": 6.36, + "learning_rate": 1.2133639964226397e-05, + "loss": 0.9361, + "step": 16600 + }, + { + "epoch": 6.37, + "learning_rate": 1.212086367701546e-05, + "loss": 0.9629, + "step": 16610 + }, + { + "epoch": 6.37, + "learning_rate": 1.2108087389804523e-05, + "loss": 0.9916, + "step": 16620 + }, + { + "epoch": 6.37, + "learning_rate": 1.2095311102593587e-05, + "loss": 0.9547, + "step": 16630 + }, + { + "epoch": 6.38, + "learning_rate": 1.2082534815382651e-05, + "loss": 0.9804, + "step": 16640 + }, + { + "epoch": 6.38, + "learning_rate": 1.2069758528171713e-05, + "loss": 0.9321, + "step": 16650 + }, + { + "epoch": 6.39, + "learning_rate": 1.2056982240960777e-05, + "loss": 0.9202, + "step": 16660 + }, + { + "epoch": 6.39, + "learning_rate": 1.204420595374984e-05, + "loss": 0.9242, + "step": 16670 + }, + { + "epoch": 6.39, + "learning_rate": 1.2031429666538904e-05, + "loss": 0.8881, + "step": 16680 + }, + { + "epoch": 6.4, + "learning_rate": 1.2018653379327968e-05, + "loss": 0.9281, + "step": 16690 + }, + { + "epoch": 6.4, + "learning_rate": 1.200587709211703e-05, + "loss": 0.9828, + "step": 16700 + }, + { + "epoch": 6.4, + "learning_rate": 1.1993100804906094e-05, + "loss": 0.9089, + "step": 16710 + }, + { + "epoch": 6.41, + "learning_rate": 1.1980324517695157e-05, + "loss": 0.9634, + "step": 16720 + }, + { + "epoch": 6.41, + "learning_rate": 1.196754823048422e-05, + "loss": 0.9364, + "step": 16730 + }, + { + "epoch": 6.42, + "learning_rate": 1.1954771943273285e-05, + "loss": 0.929, + "step": 16740 + }, + { + "epoch": 6.42, + "learning_rate": 1.1941995656062347e-05, + "loss": 0.8833, + "step": 16750 + }, + { + "epoch": 6.42, + "learning_rate": 1.1929219368851413e-05, + "loss": 0.9461, + "step": 16760 + }, + { + "epoch": 6.43, + "learning_rate": 1.1916443081640477e-05, + "loss": 0.9085, + "step": 16770 + }, + { + "epoch": 6.43, + "learning_rate": 1.190366679442954e-05, + "loss": 0.9154, + "step": 16780 + }, + { + "epoch": 6.44, + "learning_rate": 1.1890890507218603e-05, + "loss": 0.9294, + "step": 16790 + }, + { + "epoch": 6.44, + "learning_rate": 1.1878114220007666e-05, + "loss": 0.9265, + "step": 16800 + }, + { + "epoch": 6.44, + "learning_rate": 1.186533793279673e-05, + "loss": 0.9582, + "step": 16810 + }, + { + "epoch": 6.45, + "learning_rate": 1.1852561645585794e-05, + "loss": 0.9213, + "step": 16820 + }, + { + "epoch": 6.45, + "learning_rate": 1.1839785358374856e-05, + "loss": 0.9956, + "step": 16830 + }, + { + "epoch": 6.45, + "learning_rate": 1.182700907116392e-05, + "loss": 0.8833, + "step": 16840 + }, + { + "epoch": 6.46, + "learning_rate": 1.1814232783952983e-05, + "loss": 0.8304, + "step": 16850 + }, + { + "epoch": 6.46, + "learning_rate": 1.1801456496742047e-05, + "loss": 0.9825, + "step": 16860 + }, + { + "epoch": 6.47, + "learning_rate": 1.178868020953111e-05, + "loss": 0.8428, + "step": 16870 + }, + { + "epoch": 6.47, + "learning_rate": 1.1775903922320173e-05, + "loss": 0.8903, + "step": 16880 + }, + { + "epoch": 6.47, + "learning_rate": 1.1763127635109237e-05, + "loss": 1.0443, + "step": 16890 + }, + { + "epoch": 6.48, + "learning_rate": 1.1750351347898301e-05, + "loss": 0.8667, + "step": 16900 + }, + { + "epoch": 6.48, + "learning_rate": 1.1737575060687364e-05, + "loss": 0.919, + "step": 16910 + }, + { + "epoch": 6.49, + "learning_rate": 1.1724798773476428e-05, + "loss": 0.9725, + "step": 16920 + }, + { + "epoch": 6.49, + "learning_rate": 1.1712022486265492e-05, + "loss": 1.0142, + "step": 16930 + }, + { + "epoch": 6.49, + "learning_rate": 1.1699246199054556e-05, + "loss": 0.8658, + "step": 16940 + }, + { + "epoch": 6.5, + "learning_rate": 1.168646991184362e-05, + "loss": 0.9889, + "step": 16950 + }, + { + "epoch": 6.5, + "learning_rate": 1.1673693624632682e-05, + "loss": 0.9083, + "step": 16960 + }, + { + "epoch": 6.5, + "learning_rate": 1.1662194966142839e-05, + "loss": 1.0192, + "step": 16970 + }, + { + "epoch": 6.51, + "learning_rate": 1.1649418678931903e-05, + "loss": 0.9716, + "step": 16980 + }, + { + "epoch": 6.51, + "learning_rate": 1.1636642391720967e-05, + "loss": 0.9514, + "step": 16990 + }, + { + "epoch": 6.52, + "learning_rate": 1.162386610451003e-05, + "loss": 0.9483, + "step": 17000 + }, + { + "epoch": 6.52, + "learning_rate": 1.1611089817299094e-05, + "loss": 0.9177, + "step": 17010 + }, + { + "epoch": 6.52, + "learning_rate": 1.1598313530088158e-05, + "loss": 0.9618, + "step": 17020 + }, + { + "epoch": 6.53, + "learning_rate": 1.158553724287722e-05, + "loss": 0.925, + "step": 17030 + }, + { + "epoch": 6.53, + "learning_rate": 1.1572760955666284e-05, + "loss": 0.9186, + "step": 17040 + }, + { + "epoch": 6.54, + "learning_rate": 1.1559984668455348e-05, + "loss": 0.9548, + "step": 17050 + }, + { + "epoch": 6.54, + "learning_rate": 1.154720838124441e-05, + "loss": 0.8696, + "step": 17060 + }, + { + "epoch": 6.54, + "learning_rate": 1.1534432094033475e-05, + "loss": 0.9062, + "step": 17070 + }, + { + "epoch": 6.55, + "learning_rate": 1.1521655806822537e-05, + "loss": 0.911, + "step": 17080 + }, + { + "epoch": 6.55, + "learning_rate": 1.1508879519611601e-05, + "loss": 0.901, + "step": 17090 + }, + { + "epoch": 6.55, + "learning_rate": 1.1496103232400665e-05, + "loss": 0.8725, + "step": 17100 + }, + { + "epoch": 6.56, + "learning_rate": 1.1483326945189727e-05, + "loss": 0.9769, + "step": 17110 + }, + { + "epoch": 6.56, + "learning_rate": 1.1470550657978791e-05, + "loss": 0.9614, + "step": 17120 + }, + { + "epoch": 6.57, + "learning_rate": 1.1457774370767854e-05, + "loss": 0.8504, + "step": 17130 + }, + { + "epoch": 6.57, + "learning_rate": 1.1444998083556918e-05, + "loss": 0.8765, + "step": 17140 + }, + { + "epoch": 6.57, + "learning_rate": 1.1432221796345984e-05, + "loss": 1.0312, + "step": 17150 + }, + { + "epoch": 6.58, + "learning_rate": 1.1419445509135046e-05, + "loss": 0.94, + "step": 17160 + }, + { + "epoch": 6.58, + "learning_rate": 1.140666922192411e-05, + "loss": 0.9744, + "step": 17170 + }, + { + "epoch": 6.58, + "learning_rate": 1.1393892934713172e-05, + "loss": 0.9472, + "step": 17180 + }, + { + "epoch": 6.59, + "learning_rate": 1.1381116647502236e-05, + "loss": 0.9806, + "step": 17190 + }, + { + "epoch": 6.59, + "learning_rate": 1.13683403602913e-05, + "loss": 0.9444, + "step": 17200 + }, + { + "epoch": 6.6, + "learning_rate": 1.1355564073080363e-05, + "loss": 0.9197, + "step": 17210 + }, + { + "epoch": 6.6, + "learning_rate": 1.1342787785869427e-05, + "loss": 1.0164, + "step": 17220 + }, + { + "epoch": 6.6, + "learning_rate": 1.1330011498658491e-05, + "loss": 0.886, + "step": 17230 + }, + { + "epoch": 6.61, + "learning_rate": 1.1317235211447553e-05, + "loss": 0.9585, + "step": 17240 + }, + { + "epoch": 6.61, + "learning_rate": 1.1304458924236617e-05, + "loss": 0.9107, + "step": 17250 + }, + { + "epoch": 6.62, + "learning_rate": 1.129168263702568e-05, + "loss": 0.9869, + "step": 17260 + }, + { + "epoch": 6.62, + "learning_rate": 1.1278906349814744e-05, + "loss": 0.9038, + "step": 17270 + }, + { + "epoch": 6.62, + "learning_rate": 1.1266130062603808e-05, + "loss": 0.9483, + "step": 17280 + }, + { + "epoch": 6.63, + "learning_rate": 1.125335377539287e-05, + "loss": 0.9691, + "step": 17290 + }, + { + "epoch": 6.63, + "learning_rate": 1.1240577488181934e-05, + "loss": 0.8956, + "step": 17300 + }, + { + "epoch": 6.63, + "learning_rate": 1.1227801200970997e-05, + "loss": 0.9302, + "step": 17310 + }, + { + "epoch": 6.64, + "learning_rate": 1.121502491376006e-05, + "loss": 0.9601, + "step": 17320 + }, + { + "epoch": 6.64, + "learning_rate": 1.1202248626549126e-05, + "loss": 1.024, + "step": 17330 + }, + { + "epoch": 6.65, + "learning_rate": 1.1189472339338189e-05, + "loss": 0.8333, + "step": 17340 + }, + { + "epoch": 6.65, + "learning_rate": 1.1176696052127253e-05, + "loss": 0.9275, + "step": 17350 + }, + { + "epoch": 6.65, + "learning_rate": 1.1163919764916315e-05, + "loss": 0.9635, + "step": 17360 + }, + { + "epoch": 6.66, + "learning_rate": 1.115114347770538e-05, + "loss": 0.9281, + "step": 17370 + }, + { + "epoch": 6.66, + "learning_rate": 1.1138367190494443e-05, + "loss": 0.8926, + "step": 17380 + }, + { + "epoch": 6.67, + "learning_rate": 1.1125590903283506e-05, + "loss": 0.9328, + "step": 17390 + }, + { + "epoch": 6.67, + "learning_rate": 1.111281461607257e-05, + "loss": 0.9769, + "step": 17400 + }, + { + "epoch": 6.67, + "learning_rate": 1.1100038328861634e-05, + "loss": 0.9333, + "step": 17410 + }, + { + "epoch": 6.68, + "learning_rate": 1.1087262041650696e-05, + "loss": 0.9036, + "step": 17420 + }, + { + "epoch": 6.68, + "learning_rate": 1.107448575443976e-05, + "loss": 0.9757, + "step": 17430 + }, + { + "epoch": 6.68, + "learning_rate": 1.1061709467228823e-05, + "loss": 0.9172, + "step": 17440 + }, + { + "epoch": 6.69, + "learning_rate": 1.1048933180017887e-05, + "loss": 0.9398, + "step": 17450 + }, + { + "epoch": 6.69, + "learning_rate": 1.103615689280695e-05, + "loss": 0.9642, + "step": 17460 + }, + { + "epoch": 6.7, + "learning_rate": 1.1023380605596013e-05, + "loss": 0.9532, + "step": 17470 + }, + { + "epoch": 6.7, + "learning_rate": 1.1010604318385077e-05, + "loss": 1.044, + "step": 17480 + }, + { + "epoch": 6.7, + "learning_rate": 1.099782803117414e-05, + "loss": 0.8964, + "step": 17490 + }, + { + "epoch": 6.71, + "learning_rate": 1.0985051743963205e-05, + "loss": 0.9378, + "step": 17500 + }, + { + "epoch": 6.71, + "learning_rate": 1.097227545675227e-05, + "loss": 0.9711, + "step": 17510 + }, + { + "epoch": 6.72, + "learning_rate": 1.0959499169541332e-05, + "loss": 0.9112, + "step": 17520 + }, + { + "epoch": 6.72, + "learning_rate": 1.0946722882330396e-05, + "loss": 0.9576, + "step": 17530 + }, + { + "epoch": 6.72, + "learning_rate": 1.0933946595119458e-05, + "loss": 0.9392, + "step": 17540 + }, + { + "epoch": 6.73, + "learning_rate": 1.0921170307908522e-05, + "loss": 0.897, + "step": 17550 + }, + { + "epoch": 6.73, + "learning_rate": 1.0908394020697586e-05, + "loss": 0.9241, + "step": 17560 + }, + { + "epoch": 6.73, + "learning_rate": 1.0895617733486649e-05, + "loss": 0.9095, + "step": 17570 + }, + { + "epoch": 6.74, + "learning_rate": 1.0882841446275713e-05, + "loss": 0.8671, + "step": 17580 + }, + { + "epoch": 6.74, + "learning_rate": 1.0870065159064777e-05, + "loss": 0.97, + "step": 17590 + }, + { + "epoch": 6.75, + "learning_rate": 1.0857288871853839e-05, + "loss": 1.0, + "step": 17600 + }, + { + "epoch": 6.75, + "learning_rate": 1.0844512584642903e-05, + "loss": 1.016, + "step": 17610 + }, + { + "epoch": 6.75, + "learning_rate": 1.0831736297431965e-05, + "loss": 0.9341, + "step": 17620 + }, + { + "epoch": 6.76, + "learning_rate": 1.081896001022103e-05, + "loss": 0.9678, + "step": 17630 + }, + { + "epoch": 6.76, + "learning_rate": 1.0806183723010094e-05, + "loss": 0.9584, + "step": 17640 + }, + { + "epoch": 6.77, + "learning_rate": 1.0793407435799156e-05, + "loss": 0.9703, + "step": 17650 + }, + { + "epoch": 6.77, + "learning_rate": 1.078063114858822e-05, + "loss": 0.9487, + "step": 17660 + }, + { + "epoch": 6.77, + "learning_rate": 1.0767854861377284e-05, + "loss": 0.8318, + "step": 17670 + }, + { + "epoch": 6.78, + "learning_rate": 1.0755078574166348e-05, + "loss": 0.9905, + "step": 17680 + }, + { + "epoch": 6.78, + "learning_rate": 1.0742302286955412e-05, + "loss": 0.9442, + "step": 17690 + }, + { + "epoch": 6.78, + "learning_rate": 1.0729525999744475e-05, + "loss": 0.9329, + "step": 17700 + }, + { + "epoch": 6.79, + "learning_rate": 1.0716749712533539e-05, + "loss": 0.868, + "step": 17710 + }, + { + "epoch": 6.79, + "learning_rate": 1.0703973425322603e-05, + "loss": 0.9682, + "step": 17720 + }, + { + "epoch": 6.8, + "learning_rate": 1.0691197138111665e-05, + "loss": 0.9541, + "step": 17730 + }, + { + "epoch": 6.8, + "learning_rate": 1.0678420850900729e-05, + "loss": 0.9863, + "step": 17740 + }, + { + "epoch": 6.8, + "learning_rate": 1.0665644563689791e-05, + "loss": 0.9433, + "step": 17750 + }, + { + "epoch": 6.81, + "learning_rate": 1.0652868276478855e-05, + "loss": 0.9281, + "step": 17760 + }, + { + "epoch": 6.81, + "learning_rate": 1.064009198926792e-05, + "loss": 0.9478, + "step": 17770 + }, + { + "epoch": 6.81, + "learning_rate": 1.0627315702056982e-05, + "loss": 0.9242, + "step": 17780 + }, + { + "epoch": 6.82, + "learning_rate": 1.0614539414846046e-05, + "loss": 0.8586, + "step": 17790 + }, + { + "epoch": 6.82, + "learning_rate": 1.0601763127635108e-05, + "loss": 0.876, + "step": 17800 + }, + { + "epoch": 6.83, + "learning_rate": 1.0588986840424172e-05, + "loss": 0.9752, + "step": 17810 + }, + { + "epoch": 6.83, + "learning_rate": 1.0576210553213236e-05, + "loss": 0.9239, + "step": 17820 + }, + { + "epoch": 6.83, + "learning_rate": 1.0563434266002299e-05, + "loss": 0.8582, + "step": 17830 + }, + { + "epoch": 6.84, + "learning_rate": 1.0550657978791365e-05, + "loss": 1.0195, + "step": 17840 + }, + { + "epoch": 6.84, + "learning_rate": 1.0537881691580427e-05, + "loss": 0.864, + "step": 17850 + }, + { + "epoch": 6.85, + "learning_rate": 1.0525105404369491e-05, + "loss": 0.8776, + "step": 17860 + }, + { + "epoch": 6.85, + "learning_rate": 1.0512329117158555e-05, + "loss": 0.9705, + "step": 17870 + }, + { + "epoch": 6.85, + "learning_rate": 1.0499552829947617e-05, + "loss": 0.98, + "step": 17880 + }, + { + "epoch": 6.86, + "learning_rate": 1.0486776542736681e-05, + "loss": 0.9534, + "step": 17890 + }, + { + "epoch": 6.86, + "learning_rate": 1.0474000255525745e-05, + "loss": 0.939, + "step": 17900 + }, + { + "epoch": 6.86, + "learning_rate": 1.0461223968314808e-05, + "loss": 0.9252, + "step": 17910 + }, + { + "epoch": 6.87, + "learning_rate": 1.0448447681103872e-05, + "loss": 0.9684, + "step": 17920 + }, + { + "epoch": 6.87, + "learning_rate": 1.0435671393892934e-05, + "loss": 0.8427, + "step": 17930 + }, + { + "epoch": 6.88, + "learning_rate": 1.0422895106681998e-05, + "loss": 0.9144, + "step": 17940 + }, + { + "epoch": 6.88, + "learning_rate": 1.0410118819471062e-05, + "loss": 0.9737, + "step": 17950 + }, + { + "epoch": 6.88, + "learning_rate": 1.0397342532260125e-05, + "loss": 0.8941, + "step": 17960 + }, + { + "epoch": 6.89, + "learning_rate": 1.0384566245049189e-05, + "loss": 0.9312, + "step": 17970 + }, + { + "epoch": 6.89, + "learning_rate": 1.0371789957838251e-05, + "loss": 0.8658, + "step": 17980 + }, + { + "epoch": 6.9, + "learning_rate": 1.0359013670627315e-05, + "loss": 1.0223, + "step": 17990 + }, + { + "epoch": 6.9, + "learning_rate": 1.034623738341638e-05, + "loss": 0.9213, + "step": 18000 + }, + { + "epoch": 6.9, + "learning_rate": 1.0333461096205443e-05, + "loss": 0.973, + "step": 18010 + }, + { + "epoch": 6.91, + "learning_rate": 1.0320684808994507e-05, + "loss": 0.872, + "step": 18020 + }, + { + "epoch": 6.91, + "learning_rate": 1.030790852178357e-05, + "loss": 0.9214, + "step": 18030 + }, + { + "epoch": 6.91, + "learning_rate": 1.0295132234572634e-05, + "loss": 0.9037, + "step": 18040 + }, + { + "epoch": 6.92, + "learning_rate": 1.0282355947361698e-05, + "loss": 0.9465, + "step": 18050 + }, + { + "epoch": 6.92, + "learning_rate": 1.026957966015076e-05, + "loss": 1.0212, + "step": 18060 + }, + { + "epoch": 6.93, + "learning_rate": 1.0256803372939824e-05, + "loss": 0.9216, + "step": 18070 + }, + { + "epoch": 6.93, + "learning_rate": 1.0244027085728888e-05, + "loss": 0.8502, + "step": 18080 + }, + { + "epoch": 6.93, + "learning_rate": 1.023125079851795e-05, + "loss": 1.0534, + "step": 18090 + }, + { + "epoch": 6.94, + "learning_rate": 1.0218474511307015e-05, + "loss": 0.9301, + "step": 18100 + }, + { + "epoch": 6.94, + "learning_rate": 1.0205698224096077e-05, + "loss": 0.9158, + "step": 18110 + }, + { + "epoch": 6.95, + "learning_rate": 1.0192921936885141e-05, + "loss": 0.921, + "step": 18120 + }, + { + "epoch": 6.95, + "learning_rate": 1.0180145649674205e-05, + "loss": 0.961, + "step": 18130 + }, + { + "epoch": 6.95, + "learning_rate": 1.0167369362463268e-05, + "loss": 0.9089, + "step": 18140 + }, + { + "epoch": 6.96, + "learning_rate": 1.0154593075252332e-05, + "loss": 0.9501, + "step": 18150 + }, + { + "epoch": 6.96, + "learning_rate": 1.0141816788041394e-05, + "loss": 0.9391, + "step": 18160 + }, + { + "epoch": 6.96, + "learning_rate": 1.0129040500830458e-05, + "loss": 0.8947, + "step": 18170 + }, + { + "epoch": 6.97, + "learning_rate": 1.0116264213619524e-05, + "loss": 0.9362, + "step": 18180 + }, + { + "epoch": 6.97, + "learning_rate": 1.0103487926408586e-05, + "loss": 0.957, + "step": 18190 + }, + { + "epoch": 6.98, + "learning_rate": 1.009071163919765e-05, + "loss": 0.8636, + "step": 18200 + }, + { + "epoch": 6.98, + "learning_rate": 1.0077935351986713e-05, + "loss": 0.8835, + "step": 18210 + }, + { + "epoch": 6.98, + "learning_rate": 1.0065159064775777e-05, + "loss": 0.967, + "step": 18220 + }, + { + "epoch": 6.99, + "learning_rate": 1.005238277756484e-05, + "loss": 0.8585, + "step": 18230 + }, + { + "epoch": 6.99, + "learning_rate": 1.0039606490353903e-05, + "loss": 0.8728, + "step": 18240 + }, + { + "epoch": 7.0, + "learning_rate": 1.0026830203142967e-05, + "loss": 0.8082, + "step": 18250 + }, + { + "epoch": 7.0, + "learning_rate": 1.0014053915932031e-05, + "loss": 0.948, + "step": 18260 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.7396928826390053, + "eval_loss": 0.7853737473487854, + "eval_runtime": 306.6153, + "eval_samples_per_second": 136.141, + "eval_steps_per_second": 68.072, + "step": 18263 + }, + { + "epoch": 7.0, + "learning_rate": 1.0001277628721094e-05, + "loss": 0.917, + "step": 18270 + }, + { + "epoch": 7.01, + "learning_rate": 9.988501341510158e-06, + "loss": 0.9677, + "step": 18280 + }, + { + "epoch": 7.01, + "learning_rate": 9.97572505429922e-06, + "loss": 0.9087, + "step": 18290 + }, + { + "epoch": 7.01, + "learning_rate": 9.962948767088284e-06, + "loss": 0.9864, + "step": 18300 + }, + { + "epoch": 7.02, + "learning_rate": 9.950172479877348e-06, + "loss": 0.9212, + "step": 18310 + }, + { + "epoch": 7.02, + "learning_rate": 9.93739619266641e-06, + "loss": 0.9485, + "step": 18320 + }, + { + "epoch": 7.03, + "learning_rate": 9.924619905455475e-06, + "loss": 0.9506, + "step": 18330 + }, + { + "epoch": 7.03, + "learning_rate": 9.911843618244537e-06, + "loss": 0.9752, + "step": 18340 + }, + { + "epoch": 7.03, + "learning_rate": 9.899067331033601e-06, + "loss": 0.9582, + "step": 18350 + }, + { + "epoch": 7.04, + "learning_rate": 9.886291043822667e-06, + "loss": 0.9211, + "step": 18360 + }, + { + "epoch": 7.04, + "learning_rate": 9.873514756611729e-06, + "loss": 0.8882, + "step": 18370 + }, + { + "epoch": 7.04, + "learning_rate": 9.860738469400793e-06, + "loss": 0.9447, + "step": 18380 + }, + { + "epoch": 7.05, + "learning_rate": 9.847962182189857e-06, + "loss": 0.9005, + "step": 18390 + }, + { + "epoch": 7.05, + "learning_rate": 9.83518589497892e-06, + "loss": 0.8327, + "step": 18400 + }, + { + "epoch": 7.06, + "learning_rate": 9.822409607767984e-06, + "loss": 0.8961, + "step": 18410 + }, + { + "epoch": 7.06, + "learning_rate": 9.809633320557046e-06, + "loss": 0.8904, + "step": 18420 + }, + { + "epoch": 7.06, + "learning_rate": 9.79685703334611e-06, + "loss": 0.9239, + "step": 18430 + }, + { + "epoch": 7.07, + "learning_rate": 9.784080746135174e-06, + "loss": 0.9298, + "step": 18440 + }, + { + "epoch": 7.07, + "learning_rate": 9.771304458924236e-06, + "loss": 0.9099, + "step": 18450 + }, + { + "epoch": 7.08, + "learning_rate": 9.7585281717133e-06, + "loss": 0.9328, + "step": 18460 + }, + { + "epoch": 7.08, + "learning_rate": 9.745751884502363e-06, + "loss": 0.9255, + "step": 18470 + }, + { + "epoch": 7.08, + "learning_rate": 9.732975597291427e-06, + "loss": 0.8669, + "step": 18480 + }, + { + "epoch": 7.09, + "learning_rate": 9.720199310080491e-06, + "loss": 0.8689, + "step": 18490 + }, + { + "epoch": 7.09, + "learning_rate": 9.707423022869553e-06, + "loss": 0.9246, + "step": 18500 + }, + { + "epoch": 7.09, + "learning_rate": 9.694646735658617e-06, + "loss": 0.8907, + "step": 18510 + }, + { + "epoch": 7.1, + "learning_rate": 9.68187044844768e-06, + "loss": 0.9067, + "step": 18520 + }, + { + "epoch": 7.1, + "learning_rate": 9.669094161236745e-06, + "loss": 0.9925, + "step": 18530 + }, + { + "epoch": 7.11, + "learning_rate": 9.65631787402581e-06, + "loss": 0.9197, + "step": 18540 + }, + { + "epoch": 7.11, + "learning_rate": 9.643541586814872e-06, + "loss": 0.9109, + "step": 18550 + }, + { + "epoch": 7.11, + "learning_rate": 9.630765299603936e-06, + "loss": 0.9298, + "step": 18560 + }, + { + "epoch": 7.12, + "learning_rate": 9.617989012393e-06, + "loss": 0.966, + "step": 18570 + }, + { + "epoch": 7.12, + "learning_rate": 9.605212725182062e-06, + "loss": 0.965, + "step": 18580 + }, + { + "epoch": 7.13, + "learning_rate": 9.592436437971126e-06, + "loss": 0.8526, + "step": 18590 + }, + { + "epoch": 7.13, + "learning_rate": 9.579660150760189e-06, + "loss": 0.883, + "step": 18600 + }, + { + "epoch": 7.13, + "learning_rate": 9.566883863549253e-06, + "loss": 0.8712, + "step": 18610 + }, + { + "epoch": 7.14, + "learning_rate": 9.554107576338317e-06, + "loss": 0.9506, + "step": 18620 + }, + { + "epoch": 7.14, + "learning_rate": 9.54133128912738e-06, + "loss": 0.8746, + "step": 18630 + }, + { + "epoch": 7.14, + "learning_rate": 9.528555001916443e-06, + "loss": 0.938, + "step": 18640 + }, + { + "epoch": 7.15, + "learning_rate": 9.515778714705506e-06, + "loss": 0.9669, + "step": 18650 + }, + { + "epoch": 7.15, + "learning_rate": 9.50300242749457e-06, + "loss": 0.9333, + "step": 18660 + }, + { + "epoch": 7.16, + "learning_rate": 9.490226140283634e-06, + "loss": 0.8384, + "step": 18670 + }, + { + "epoch": 7.16, + "learning_rate": 9.477449853072696e-06, + "loss": 0.8996, + "step": 18680 + }, + { + "epoch": 7.16, + "learning_rate": 9.46467356586176e-06, + "loss": 0.8694, + "step": 18690 + }, + { + "epoch": 7.17, + "learning_rate": 9.451897278650824e-06, + "loss": 0.8928, + "step": 18700 + }, + { + "epoch": 7.17, + "learning_rate": 9.439120991439888e-06, + "loss": 0.9085, + "step": 18710 + }, + { + "epoch": 7.18, + "learning_rate": 9.426344704228952e-06, + "loss": 0.9642, + "step": 18720 + }, + { + "epoch": 7.18, + "learning_rate": 9.413568417018015e-06, + "loss": 0.9233, + "step": 18730 + }, + { + "epoch": 7.18, + "learning_rate": 9.400792129807079e-06, + "loss": 0.9378, + "step": 18740 + }, + { + "epoch": 7.19, + "learning_rate": 9.388015842596143e-06, + "loss": 0.929, + "step": 18750 + }, + { + "epoch": 7.19, + "learning_rate": 9.375239555385205e-06, + "loss": 0.8891, + "step": 18760 + }, + { + "epoch": 7.19, + "learning_rate": 9.36246326817427e-06, + "loss": 0.8708, + "step": 18770 + }, + { + "epoch": 7.2, + "learning_rate": 9.349686980963332e-06, + "loss": 0.9237, + "step": 18780 + }, + { + "epoch": 7.2, + "learning_rate": 9.336910693752396e-06, + "loss": 0.8177, + "step": 18790 + }, + { + "epoch": 7.21, + "learning_rate": 9.32413440654146e-06, + "loss": 0.8765, + "step": 18800 + }, + { + "epoch": 7.21, + "learning_rate": 9.311358119330522e-06, + "loss": 0.8823, + "step": 18810 + }, + { + "epoch": 7.21, + "learning_rate": 9.298581832119586e-06, + "loss": 0.936, + "step": 18820 + }, + { + "epoch": 7.22, + "learning_rate": 9.285805544908649e-06, + "loss": 0.9233, + "step": 18830 + }, + { + "epoch": 7.22, + "learning_rate": 9.273029257697713e-06, + "loss": 0.9215, + "step": 18840 + }, + { + "epoch": 7.22, + "learning_rate": 9.260252970486777e-06, + "loss": 0.955, + "step": 18850 + }, + { + "epoch": 7.23, + "learning_rate": 9.247476683275839e-06, + "loss": 0.9017, + "step": 18860 + }, + { + "epoch": 7.23, + "learning_rate": 9.234700396064905e-06, + "loss": 0.909, + "step": 18870 + }, + { + "epoch": 7.24, + "learning_rate": 9.221924108853967e-06, + "loss": 0.9449, + "step": 18880 + }, + { + "epoch": 7.24, + "learning_rate": 9.209147821643031e-06, + "loss": 0.8397, + "step": 18890 + }, + { + "epoch": 7.24, + "learning_rate": 9.196371534432095e-06, + "loss": 0.8534, + "step": 18900 + }, + { + "epoch": 7.25, + "learning_rate": 9.183595247221158e-06, + "loss": 0.8681, + "step": 18910 + }, + { + "epoch": 7.25, + "learning_rate": 9.170818960010222e-06, + "loss": 0.9161, + "step": 18920 + }, + { + "epoch": 7.26, + "learning_rate": 9.158042672799286e-06, + "loss": 0.9247, + "step": 18930 + }, + { + "epoch": 7.26, + "learning_rate": 9.145266385588348e-06, + "loss": 0.9136, + "step": 18940 + }, + { + "epoch": 7.26, + "learning_rate": 9.132490098377412e-06, + "loss": 0.8871, + "step": 18950 + }, + { + "epoch": 7.27, + "learning_rate": 9.119713811166475e-06, + "loss": 1.0471, + "step": 18960 + }, + { + "epoch": 7.27, + "learning_rate": 9.106937523955539e-06, + "loss": 0.8436, + "step": 18970 + }, + { + "epoch": 7.27, + "learning_rate": 9.094161236744603e-06, + "loss": 0.9205, + "step": 18980 + }, + { + "epoch": 7.28, + "learning_rate": 9.081384949533665e-06, + "loss": 0.9451, + "step": 18990 + }, + { + "epoch": 7.28, + "learning_rate": 9.068608662322729e-06, + "loss": 0.9911, + "step": 19000 + }, + { + "epoch": 7.29, + "learning_rate": 9.055832375111791e-06, + "loss": 0.9336, + "step": 19010 + }, + { + "epoch": 7.29, + "learning_rate": 9.043056087900855e-06, + "loss": 0.9144, + "step": 19020 + }, + { + "epoch": 7.29, + "learning_rate": 9.03027980068992e-06, + "loss": 0.9352, + "step": 19030 + }, + { + "epoch": 7.3, + "learning_rate": 9.017503513478984e-06, + "loss": 0.9402, + "step": 19040 + }, + { + "epoch": 7.3, + "learning_rate": 9.004727226268048e-06, + "loss": 0.942, + "step": 19050 + }, + { + "epoch": 7.31, + "learning_rate": 8.991950939057112e-06, + "loss": 0.8221, + "step": 19060 + }, + { + "epoch": 7.31, + "learning_rate": 8.979174651846174e-06, + "loss": 0.9079, + "step": 19070 + }, + { + "epoch": 7.31, + "learning_rate": 8.966398364635238e-06, + "loss": 0.9532, + "step": 19080 + }, + { + "epoch": 7.32, + "learning_rate": 8.9536220774243e-06, + "loss": 0.8565, + "step": 19090 + }, + { + "epoch": 7.32, + "learning_rate": 8.940845790213365e-06, + "loss": 0.9071, + "step": 19100 + }, + { + "epoch": 7.32, + "learning_rate": 8.928069503002429e-06, + "loss": 0.8813, + "step": 19110 + }, + { + "epoch": 7.33, + "learning_rate": 8.915293215791491e-06, + "loss": 0.9561, + "step": 19120 + }, + { + "epoch": 7.33, + "learning_rate": 8.902516928580555e-06, + "loss": 0.9081, + "step": 19130 + }, + { + "epoch": 7.34, + "learning_rate": 8.889740641369617e-06, + "loss": 0.9111, + "step": 19140 + }, + { + "epoch": 7.34, + "learning_rate": 8.876964354158681e-06, + "loss": 0.9259, + "step": 19150 + }, + { + "epoch": 7.34, + "learning_rate": 8.864188066947745e-06, + "loss": 0.924, + "step": 19160 + }, + { + "epoch": 7.35, + "learning_rate": 8.851411779736808e-06, + "loss": 0.881, + "step": 19170 + }, + { + "epoch": 7.35, + "learning_rate": 8.838635492525872e-06, + "loss": 0.9141, + "step": 19180 + }, + { + "epoch": 7.36, + "learning_rate": 8.825859205314934e-06, + "loss": 0.9058, + "step": 19190 + }, + { + "epoch": 7.36, + "learning_rate": 8.813082918103998e-06, + "loss": 0.8971, + "step": 19200 + }, + { + "epoch": 7.36, + "learning_rate": 8.800306630893062e-06, + "loss": 0.957, + "step": 19210 + }, + { + "epoch": 7.37, + "learning_rate": 8.787530343682126e-06, + "loss": 0.9251, + "step": 19220 + }, + { + "epoch": 7.37, + "learning_rate": 8.77475405647119e-06, + "loss": 0.9212, + "step": 19230 + }, + { + "epoch": 7.37, + "learning_rate": 8.761977769260255e-06, + "loss": 0.8916, + "step": 19240 + }, + { + "epoch": 7.38, + "learning_rate": 8.749201482049317e-06, + "loss": 0.9184, + "step": 19250 + }, + { + "epoch": 7.38, + "learning_rate": 8.736425194838381e-06, + "loss": 0.9677, + "step": 19260 + }, + { + "epoch": 7.39, + "learning_rate": 8.723648907627443e-06, + "loss": 0.8744, + "step": 19270 + }, + { + "epoch": 7.39, + "learning_rate": 8.710872620416507e-06, + "loss": 0.9614, + "step": 19280 + }, + { + "epoch": 7.39, + "learning_rate": 8.698096333205571e-06, + "loss": 0.8792, + "step": 19290 + }, + { + "epoch": 7.4, + "learning_rate": 8.685320045994634e-06, + "loss": 0.9592, + "step": 19300 + }, + { + "epoch": 7.4, + "learning_rate": 8.672543758783698e-06, + "loss": 0.8911, + "step": 19310 + }, + { + "epoch": 7.41, + "learning_rate": 8.65976747157276e-06, + "loss": 0.891, + "step": 19320 + }, + { + "epoch": 7.41, + "learning_rate": 8.646991184361824e-06, + "loss": 0.8937, + "step": 19330 + }, + { + "epoch": 7.41, + "learning_rate": 8.634214897150888e-06, + "loss": 0.9239, + "step": 19340 + }, + { + "epoch": 7.42, + "learning_rate": 8.62143860993995e-06, + "loss": 0.8465, + "step": 19350 + }, + { + "epoch": 7.42, + "learning_rate": 8.608662322729015e-06, + "loss": 0.9427, + "step": 19360 + }, + { + "epoch": 7.42, + "learning_rate": 8.595886035518077e-06, + "loss": 0.8904, + "step": 19370 + }, + { + "epoch": 7.43, + "learning_rate": 8.583109748307141e-06, + "loss": 1.0235, + "step": 19380 + }, + { + "epoch": 7.43, + "learning_rate": 8.570333461096207e-06, + "loss": 0.895, + "step": 19390 + }, + { + "epoch": 7.44, + "learning_rate": 8.55755717388527e-06, + "loss": 0.8767, + "step": 19400 + }, + { + "epoch": 7.44, + "learning_rate": 8.544780886674333e-06, + "loss": 0.9346, + "step": 19410 + }, + { + "epoch": 7.44, + "learning_rate": 8.532004599463397e-06, + "loss": 0.9869, + "step": 19420 + }, + { + "epoch": 7.45, + "learning_rate": 8.51922831225246e-06, + "loss": 0.9067, + "step": 19430 + }, + { + "epoch": 7.45, + "learning_rate": 8.506452025041524e-06, + "loss": 0.8218, + "step": 19440 + }, + { + "epoch": 7.45, + "learning_rate": 8.493675737830586e-06, + "loss": 0.8634, + "step": 19450 + }, + { + "epoch": 7.46, + "learning_rate": 8.48089945061965e-06, + "loss": 0.9779, + "step": 19460 + }, + { + "epoch": 7.46, + "learning_rate": 8.468123163408714e-06, + "loss": 0.9544, + "step": 19470 + }, + { + "epoch": 7.47, + "learning_rate": 8.455346876197777e-06, + "loss": 0.9239, + "step": 19480 + }, + { + "epoch": 7.47, + "learning_rate": 8.44257058898684e-06, + "loss": 0.921, + "step": 19490 + }, + { + "epoch": 7.47, + "learning_rate": 8.429794301775903e-06, + "loss": 0.894, + "step": 19500 + }, + { + "epoch": 7.48, + "learning_rate": 8.417018014564967e-06, + "loss": 0.9174, + "step": 19510 + }, + { + "epoch": 7.48, + "learning_rate": 8.404241727354031e-06, + "loss": 0.8581, + "step": 19520 + }, + { + "epoch": 7.49, + "learning_rate": 8.391465440143094e-06, + "loss": 0.9115, + "step": 19530 + }, + { + "epoch": 7.49, + "learning_rate": 8.378689152932158e-06, + "loss": 0.9198, + "step": 19540 + }, + { + "epoch": 7.49, + "learning_rate": 8.365912865721222e-06, + "loss": 0.9195, + "step": 19550 + }, + { + "epoch": 7.5, + "learning_rate": 8.353136578510286e-06, + "loss": 0.9593, + "step": 19560 + }, + { + "epoch": 7.5, + "learning_rate": 8.34036029129935e-06, + "loss": 0.9765, + "step": 19570 + }, + { + "epoch": 7.5, + "learning_rate": 8.327584004088412e-06, + "loss": 0.8636, + "step": 19580 + }, + { + "epoch": 7.51, + "learning_rate": 8.314807716877476e-06, + "loss": 0.8528, + "step": 19590 + }, + { + "epoch": 7.51, + "learning_rate": 8.30203142966654e-06, + "loss": 0.8767, + "step": 19600 + }, + { + "epoch": 7.52, + "learning_rate": 8.289255142455603e-06, + "loss": 0.8928, + "step": 19610 + }, + { + "epoch": 7.52, + "learning_rate": 8.276478855244667e-06, + "loss": 0.8565, + "step": 19620 + }, + { + "epoch": 7.52, + "learning_rate": 8.263702568033729e-06, + "loss": 0.996, + "step": 19630 + }, + { + "epoch": 7.53, + "learning_rate": 8.250926280822793e-06, + "loss": 0.9569, + "step": 19640 + }, + { + "epoch": 7.53, + "learning_rate": 8.238149993611857e-06, + "loss": 0.916, + "step": 19650 + }, + { + "epoch": 7.54, + "learning_rate": 8.22537370640092e-06, + "loss": 0.8914, + "step": 19660 + }, + { + "epoch": 7.54, + "learning_rate": 8.212597419189984e-06, + "loss": 0.8659, + "step": 19670 + }, + { + "epoch": 7.54, + "learning_rate": 8.199821131979046e-06, + "loss": 0.892, + "step": 19680 + }, + { + "epoch": 7.55, + "learning_rate": 8.18704484476811e-06, + "loss": 0.9377, + "step": 19690 + }, + { + "epoch": 7.55, + "learning_rate": 8.174268557557174e-06, + "loss": 0.9182, + "step": 19700 + }, + { + "epoch": 7.55, + "learning_rate": 8.161492270346236e-06, + "loss": 0.9041, + "step": 19710 + }, + { + "epoch": 7.56, + "learning_rate": 8.1487159831353e-06, + "loss": 0.8693, + "step": 19720 + }, + { + "epoch": 7.56, + "learning_rate": 8.135939695924366e-06, + "loss": 0.8469, + "step": 19730 + }, + { + "epoch": 7.57, + "learning_rate": 8.123163408713429e-06, + "loss": 0.9394, + "step": 19740 + }, + { + "epoch": 7.57, + "learning_rate": 8.110387121502493e-06, + "loss": 0.9496, + "step": 19750 + }, + { + "epoch": 7.57, + "learning_rate": 8.097610834291555e-06, + "loss": 0.8135, + "step": 19760 + }, + { + "epoch": 7.58, + "learning_rate": 8.084834547080619e-06, + "loss": 0.9006, + "step": 19770 + }, + { + "epoch": 7.58, + "learning_rate": 8.072058259869683e-06, + "loss": 0.8794, + "step": 19780 + }, + { + "epoch": 7.59, + "learning_rate": 8.059281972658745e-06, + "loss": 0.8807, + "step": 19790 + }, + { + "epoch": 7.59, + "learning_rate": 8.04650568544781e-06, + "loss": 0.9595, + "step": 19800 + }, + { + "epoch": 7.59, + "learning_rate": 8.033729398236872e-06, + "loss": 0.9101, + "step": 19810 + }, + { + "epoch": 7.6, + "learning_rate": 8.020953111025936e-06, + "loss": 0.8634, + "step": 19820 + }, + { + "epoch": 7.6, + "learning_rate": 8.008176823815e-06, + "loss": 1.003, + "step": 19830 + }, + { + "epoch": 7.6, + "learning_rate": 7.995400536604062e-06, + "loss": 0.9065, + "step": 19840 + }, + { + "epoch": 7.61, + "learning_rate": 7.982624249393126e-06, + "loss": 0.9402, + "step": 19850 + }, + { + "epoch": 7.61, + "learning_rate": 7.969847962182189e-06, + "loss": 0.9089, + "step": 19860 + }, + { + "epoch": 7.62, + "learning_rate": 7.957071674971253e-06, + "loss": 0.9288, + "step": 19870 + }, + { + "epoch": 7.62, + "learning_rate": 7.944295387760317e-06, + "loss": 0.9206, + "step": 19880 + }, + { + "epoch": 7.62, + "learning_rate": 7.93151910054938e-06, + "loss": 0.8987, + "step": 19890 + }, + { + "epoch": 7.63, + "learning_rate": 7.918742813338445e-06, + "loss": 0.8514, + "step": 19900 + }, + { + "epoch": 7.63, + "learning_rate": 7.905966526127509e-06, + "loss": 0.9646, + "step": 19910 + }, + { + "epoch": 7.64, + "learning_rate": 7.893190238916571e-06, + "loss": 0.8446, + "step": 19920 + }, + { + "epoch": 7.64, + "learning_rate": 7.880413951705635e-06, + "loss": 0.9699, + "step": 19930 + }, + { + "epoch": 7.64, + "learning_rate": 7.867637664494698e-06, + "loss": 0.9842, + "step": 19940 + }, + { + "epoch": 7.65, + "learning_rate": 7.854861377283762e-06, + "loss": 0.9289, + "step": 19950 + }, + { + "epoch": 7.65, + "learning_rate": 7.842085090072826e-06, + "loss": 0.8707, + "step": 19960 + }, + { + "epoch": 7.65, + "learning_rate": 7.829308802861888e-06, + "loss": 0.9045, + "step": 19970 + }, + { + "epoch": 7.66, + "learning_rate": 7.816532515650952e-06, + "loss": 0.8601, + "step": 19980 + }, + { + "epoch": 7.66, + "learning_rate": 7.803756228440015e-06, + "loss": 0.8528, + "step": 19990 + }, + { + "epoch": 7.67, + "learning_rate": 7.790979941229079e-06, + "loss": 0.896, + "step": 20000 + }, + { + "epoch": 7.67, + "learning_rate": 7.778203654018143e-06, + "loss": 0.9011, + "step": 20010 + }, + { + "epoch": 7.67, + "learning_rate": 7.765427366807205e-06, + "loss": 0.8921, + "step": 20020 + }, + { + "epoch": 7.68, + "learning_rate": 7.75265107959627e-06, + "loss": 0.8781, + "step": 20030 + }, + { + "epoch": 7.68, + "learning_rate": 7.739874792385332e-06, + "loss": 0.9161, + "step": 20040 + }, + { + "epoch": 7.68, + "learning_rate": 7.727098505174396e-06, + "loss": 0.951, + "step": 20050 + }, + { + "epoch": 7.69, + "learning_rate": 7.71432221796346e-06, + "loss": 0.8912, + "step": 20060 + }, + { + "epoch": 7.69, + "learning_rate": 7.701545930752524e-06, + "loss": 0.8727, + "step": 20070 + }, + { + "epoch": 7.7, + "learning_rate": 7.688769643541588e-06, + "loss": 0.8656, + "step": 20080 + }, + { + "epoch": 7.7, + "learning_rate": 7.675993356330652e-06, + "loss": 0.878, + "step": 20090 + }, + { + "epoch": 7.7, + "learning_rate": 7.663217069119714e-06, + "loss": 0.9238, + "step": 20100 + }, + { + "epoch": 7.71, + "learning_rate": 7.650440781908778e-06, + "loss": 0.8911, + "step": 20110 + }, + { + "epoch": 7.71, + "learning_rate": 7.63766449469784e-06, + "loss": 0.9215, + "step": 20120 + }, + { + "epoch": 7.72, + "learning_rate": 7.624888207486905e-06, + "loss": 0.8791, + "step": 20130 + }, + { + "epoch": 7.72, + "learning_rate": 7.612111920275968e-06, + "loss": 0.929, + "step": 20140 + }, + { + "epoch": 7.72, + "learning_rate": 7.599335633065031e-06, + "loss": 0.8388, + "step": 20150 + }, + { + "epoch": 7.73, + "learning_rate": 7.586559345854095e-06, + "loss": 0.9093, + "step": 20160 + }, + { + "epoch": 7.73, + "learning_rate": 7.5737830586431584e-06, + "loss": 0.8885, + "step": 20170 + }, + { + "epoch": 7.73, + "learning_rate": 7.561006771432222e-06, + "loss": 0.943, + "step": 20180 + }, + { + "epoch": 7.74, + "learning_rate": 7.548230484221285e-06, + "loss": 0.8925, + "step": 20190 + }, + { + "epoch": 7.74, + "learning_rate": 7.535454197010349e-06, + "loss": 0.9925, + "step": 20200 + }, + { + "epoch": 7.75, + "learning_rate": 7.522677909799412e-06, + "loss": 0.9013, + "step": 20210 + }, + { + "epoch": 7.75, + "learning_rate": 7.509901622588475e-06, + "loss": 0.9073, + "step": 20220 + }, + { + "epoch": 7.75, + "learning_rate": 7.497125335377539e-06, + "loss": 0.9233, + "step": 20230 + }, + { + "epoch": 7.76, + "learning_rate": 7.484349048166603e-06, + "loss": 0.9192, + "step": 20240 + }, + { + "epoch": 7.76, + "learning_rate": 7.471572760955667e-06, + "loss": 0.9543, + "step": 20250 + }, + { + "epoch": 7.77, + "learning_rate": 7.45879647374473e-06, + "loss": 0.9246, + "step": 20260 + }, + { + "epoch": 7.77, + "learning_rate": 7.446020186533793e-06, + "loss": 0.8648, + "step": 20270 + }, + { + "epoch": 7.77, + "learning_rate": 7.433243899322856e-06, + "loss": 0.9527, + "step": 20280 + }, + { + "epoch": 7.78, + "learning_rate": 7.420467612111921e-06, + "loss": 0.9113, + "step": 20290 + }, + { + "epoch": 7.78, + "learning_rate": 7.407691324900984e-06, + "loss": 0.9147, + "step": 20300 + }, + { + "epoch": 7.78, + "learning_rate": 7.394915037690048e-06, + "loss": 0.9232, + "step": 20310 + }, + { + "epoch": 7.79, + "learning_rate": 7.382138750479111e-06, + "loss": 0.8593, + "step": 20320 + }, + { + "epoch": 7.79, + "learning_rate": 7.369362463268174e-06, + "loss": 0.8959, + "step": 20330 + }, + { + "epoch": 7.8, + "learning_rate": 7.356586176057238e-06, + "loss": 0.9735, + "step": 20340 + }, + { + "epoch": 7.8, + "learning_rate": 7.343809888846301e-06, + "loss": 0.957, + "step": 20350 + }, + { + "epoch": 7.8, + "learning_rate": 7.3310336016353645e-06, + "loss": 0.8511, + "step": 20360 + }, + { + "epoch": 7.81, + "learning_rate": 7.3182573144244286e-06, + "loss": 0.9581, + "step": 20370 + }, + { + "epoch": 7.81, + "learning_rate": 7.305481027213493e-06, + "loss": 0.8906, + "step": 20380 + }, + { + "epoch": 7.82, + "learning_rate": 7.292704740002556e-06, + "loss": 0.8391, + "step": 20390 + }, + { + "epoch": 7.82, + "learning_rate": 7.279928452791619e-06, + "loss": 0.9566, + "step": 20400 + }, + { + "epoch": 7.82, + "learning_rate": 7.267152165580682e-06, + "loss": 0.9526, + "step": 20410 + }, + { + "epoch": 7.83, + "learning_rate": 7.2543758783697455e-06, + "loss": 0.9003, + "step": 20420 + }, + { + "epoch": 7.83, + "learning_rate": 7.2415995911588095e-06, + "loss": 0.882, + "step": 20430 + }, + { + "epoch": 7.83, + "learning_rate": 7.228823303947873e-06, + "loss": 0.9138, + "step": 20440 + }, + { + "epoch": 7.84, + "learning_rate": 7.216047016736936e-06, + "loss": 0.9135, + "step": 20450 + }, + { + "epoch": 7.84, + "learning_rate": 7.203270729526e-06, + "loss": 0.9206, + "step": 20460 + }, + { + "epoch": 7.85, + "learning_rate": 7.190494442315064e-06, + "loss": 0.8777, + "step": 20470 + }, + { + "epoch": 7.85, + "learning_rate": 7.177718155104127e-06, + "loss": 0.992, + "step": 20480 + }, + { + "epoch": 7.85, + "learning_rate": 7.1649418678931905e-06, + "loss": 0.8647, + "step": 20490 + }, + { + "epoch": 7.86, + "learning_rate": 7.152165580682254e-06, + "loss": 0.8841, + "step": 20500 + }, + { + "epoch": 7.86, + "learning_rate": 7.139389293471317e-06, + "loss": 0.8797, + "step": 20510 + }, + { + "epoch": 7.87, + "learning_rate": 7.126613006260381e-06, + "loss": 0.9484, + "step": 20520 + }, + { + "epoch": 7.87, + "learning_rate": 7.113836719049444e-06, + "loss": 0.857, + "step": 20530 + }, + { + "epoch": 7.87, + "learning_rate": 7.101060431838507e-06, + "loss": 0.8707, + "step": 20540 + }, + { + "epoch": 7.88, + "learning_rate": 7.088284144627571e-06, + "loss": 0.9466, + "step": 20550 + }, + { + "epoch": 7.88, + "learning_rate": 7.0755078574166355e-06, + "loss": 0.9228, + "step": 20560 + }, + { + "epoch": 7.88, + "learning_rate": 7.062731570205699e-06, + "loss": 0.8967, + "step": 20570 + }, + { + "epoch": 7.89, + "learning_rate": 7.049955282994762e-06, + "loss": 0.9361, + "step": 20580 + }, + { + "epoch": 7.89, + "learning_rate": 7.037178995783825e-06, + "loss": 0.9126, + "step": 20590 + }, + { + "epoch": 7.9, + "learning_rate": 7.024402708572889e-06, + "loss": 0.9717, + "step": 20600 + }, + { + "epoch": 7.9, + "learning_rate": 7.011626421361952e-06, + "loss": 0.9346, + "step": 20610 + }, + { + "epoch": 7.9, + "learning_rate": 6.9988501341510156e-06, + "loss": 0.9664, + "step": 20620 + }, + { + "epoch": 7.91, + "learning_rate": 6.98607384694008e-06, + "loss": 0.8919, + "step": 20630 + }, + { + "epoch": 7.91, + "learning_rate": 6.9745751884502364e-06, + "loss": 0.8816, + "step": 20640 + }, + { + "epoch": 7.91, + "learning_rate": 6.9617989012393e-06, + "loss": 1.0376, + "step": 20650 + }, + { + "epoch": 7.92, + "learning_rate": 6.949022614028364e-06, + "loss": 0.9102, + "step": 20660 + }, + { + "epoch": 7.92, + "learning_rate": 6.936246326817427e-06, + "loss": 0.9019, + "step": 20670 + }, + { + "epoch": 7.93, + "learning_rate": 6.923470039606491e-06, + "loss": 0.8672, + "step": 20680 + }, + { + "epoch": 7.93, + "learning_rate": 6.910693752395554e-06, + "loss": 0.846, + "step": 20690 + }, + { + "epoch": 7.93, + "learning_rate": 6.897917465184617e-06, + "loss": 0.9165, + "step": 20700 + }, + { + "epoch": 7.94, + "learning_rate": 6.8851411779736814e-06, + "loss": 0.927, + "step": 20710 + }, + { + "epoch": 7.94, + "learning_rate": 6.872364890762745e-06, + "loss": 0.831, + "step": 20720 + }, + { + "epoch": 7.95, + "learning_rate": 6.859588603551808e-06, + "loss": 0.8768, + "step": 20730 + }, + { + "epoch": 7.95, + "learning_rate": 6.846812316340871e-06, + "loss": 0.9396, + "step": 20740 + }, + { + "epoch": 7.95, + "learning_rate": 6.834036029129935e-06, + "loss": 0.9237, + "step": 20750 + }, + { + "epoch": 7.96, + "learning_rate": 6.821259741918998e-06, + "loss": 0.9281, + "step": 20760 + }, + { + "epoch": 7.96, + "learning_rate": 6.808483454708062e-06, + "loss": 0.9473, + "step": 20770 + }, + { + "epoch": 7.96, + "learning_rate": 6.795707167497126e-06, + "loss": 0.8429, + "step": 20780 + }, + { + "epoch": 7.97, + "learning_rate": 6.782930880286189e-06, + "loss": 0.9076, + "step": 20790 + }, + { + "epoch": 7.97, + "learning_rate": 6.770154593075253e-06, + "loss": 0.8291, + "step": 20800 + }, + { + "epoch": 7.98, + "learning_rate": 6.757378305864316e-06, + "loss": 0.9032, + "step": 20810 + }, + { + "epoch": 7.98, + "learning_rate": 6.744602018653379e-06, + "loss": 0.9355, + "step": 20820 + }, + { + "epoch": 7.98, + "learning_rate": 6.7318257314424425e-06, + "loss": 0.9126, + "step": 20830 + }, + { + "epoch": 7.99, + "learning_rate": 6.7190494442315066e-06, + "loss": 0.8687, + "step": 20840 + }, + { + "epoch": 7.99, + "learning_rate": 6.70627315702057e-06, + "loss": 0.8658, + "step": 20850 + }, + { + "epoch": 8.0, + "learning_rate": 6.693496869809634e-06, + "loss": 0.8725, + "step": 20860 + }, + { + "epoch": 8.0, + "learning_rate": 6.680720582598697e-06, + "loss": 0.9855, + "step": 20870 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.7598878853939582, + "eval_loss": 0.6982513070106506, + "eval_runtime": 298.5542, + "eval_samples_per_second": 139.817, + "eval_steps_per_second": 69.91, + "step": 20872 + }, + { + "epoch": 8.0, + "learning_rate": 6.66794429538776e-06, + "loss": 0.9312, + "step": 20880 + }, + { + "epoch": 8.01, + "learning_rate": 6.655168008176824e-06, + "loss": 0.8315, + "step": 20890 + }, + { + "epoch": 8.01, + "learning_rate": 6.6423917209658875e-06, + "loss": 0.904, + "step": 20900 + }, + { + "epoch": 8.01, + "learning_rate": 6.629615433754951e-06, + "loss": 0.9193, + "step": 20910 + }, + { + "epoch": 8.02, + "learning_rate": 6.616839146544014e-06, + "loss": 0.8572, + "step": 20920 + }, + { + "epoch": 8.02, + "learning_rate": 6.604062859333078e-06, + "loss": 0.9464, + "step": 20930 + }, + { + "epoch": 8.03, + "learning_rate": 6.591286572122142e-06, + "loss": 0.8234, + "step": 20940 + }, + { + "epoch": 8.03, + "learning_rate": 6.578510284911205e-06, + "loss": 0.8723, + "step": 20950 + }, + { + "epoch": 8.03, + "learning_rate": 6.5657339977002685e-06, + "loss": 0.9415, + "step": 20960 + }, + { + "epoch": 8.04, + "learning_rate": 6.552957710489332e-06, + "loss": 0.9802, + "step": 20970 + }, + { + "epoch": 8.04, + "learning_rate": 6.540181423278396e-06, + "loss": 0.8766, + "step": 20980 + }, + { + "epoch": 8.05, + "learning_rate": 6.527405136067459e-06, + "loss": 0.8674, + "step": 20990 + }, + { + "epoch": 8.05, + "learning_rate": 6.514628848856522e-06, + "loss": 0.9455, + "step": 21000 + }, + { + "epoch": 8.05, + "learning_rate": 6.501852561645585e-06, + "loss": 0.9649, + "step": 21010 + }, + { + "epoch": 8.06, + "learning_rate": 6.489076274434649e-06, + "loss": 0.9409, + "step": 21020 + }, + { + "epoch": 8.06, + "learning_rate": 6.4762999872237135e-06, + "loss": 0.9398, + "step": 21030 + }, + { + "epoch": 8.06, + "learning_rate": 6.463523700012777e-06, + "loss": 0.855, + "step": 21040 + }, + { + "epoch": 8.07, + "learning_rate": 6.45074741280184e-06, + "loss": 0.897, + "step": 21050 + }, + { + "epoch": 8.07, + "learning_rate": 6.437971125590903e-06, + "loss": 0.8957, + "step": 21060 + }, + { + "epoch": 8.08, + "learning_rate": 6.425194838379967e-06, + "loss": 0.9142, + "step": 21070 + }, + { + "epoch": 8.08, + "learning_rate": 6.41241855116903e-06, + "loss": 0.8697, + "step": 21080 + }, + { + "epoch": 8.08, + "learning_rate": 6.3996422639580936e-06, + "loss": 0.8454, + "step": 21090 + }, + { + "epoch": 8.09, + "learning_rate": 6.386865976747157e-06, + "loss": 0.906, + "step": 21100 + }, + { + "epoch": 8.09, + "learning_rate": 6.374089689536222e-06, + "loss": 0.9329, + "step": 21110 + }, + { + "epoch": 8.1, + "learning_rate": 6.361313402325285e-06, + "loss": 0.8734, + "step": 21120 + }, + { + "epoch": 8.1, + "learning_rate": 6.348537115114348e-06, + "loss": 0.9396, + "step": 21130 + }, + { + "epoch": 8.1, + "learning_rate": 6.335760827903411e-06, + "loss": 0.9262, + "step": 21140 + }, + { + "epoch": 8.11, + "learning_rate": 6.3229845406924745e-06, + "loss": 0.9217, + "step": 21150 + }, + { + "epoch": 8.11, + "learning_rate": 6.310208253481539e-06, + "loss": 0.8764, + "step": 21160 + }, + { + "epoch": 8.11, + "learning_rate": 6.297431966270602e-06, + "loss": 0.8846, + "step": 21170 + }, + { + "epoch": 8.12, + "learning_rate": 6.284655679059665e-06, + "loss": 0.8609, + "step": 21180 + }, + { + "epoch": 8.12, + "learning_rate": 6.271879391848728e-06, + "loss": 0.8395, + "step": 21190 + }, + { + "epoch": 8.13, + "learning_rate": 6.259103104637793e-06, + "loss": 0.8809, + "step": 21200 + }, + { + "epoch": 8.13, + "learning_rate": 6.246326817426856e-06, + "loss": 0.8261, + "step": 21210 + }, + { + "epoch": 8.13, + "learning_rate": 6.2335505302159195e-06, + "loss": 0.9381, + "step": 21220 + }, + { + "epoch": 8.14, + "learning_rate": 6.220774243004983e-06, + "loss": 0.9072, + "step": 21230 + }, + { + "epoch": 8.14, + "learning_rate": 6.207997955794047e-06, + "loss": 0.9348, + "step": 21240 + }, + { + "epoch": 8.14, + "learning_rate": 6.19522166858311e-06, + "loss": 0.8415, + "step": 21250 + }, + { + "epoch": 8.15, + "learning_rate": 6.182445381372173e-06, + "loss": 0.9064, + "step": 21260 + }, + { + "epoch": 8.15, + "learning_rate": 6.1696690941612364e-06, + "loss": 0.9454, + "step": 21270 + }, + { + "epoch": 8.16, + "learning_rate": 6.1568928069503e-06, + "loss": 0.9834, + "step": 21280 + }, + { + "epoch": 8.16, + "learning_rate": 6.1441165197393645e-06, + "loss": 0.8969, + "step": 21290 + }, + { + "epoch": 8.16, + "learning_rate": 6.131340232528428e-06, + "loss": 0.9067, + "step": 21300 + }, + { + "epoch": 8.17, + "learning_rate": 6.118563945317491e-06, + "loss": 0.8998, + "step": 21310 + }, + { + "epoch": 8.17, + "learning_rate": 6.105787658106554e-06, + "loss": 0.8821, + "step": 21320 + }, + { + "epoch": 8.18, + "learning_rate": 6.093011370895618e-06, + "loss": 0.877, + "step": 21330 + }, + { + "epoch": 8.18, + "learning_rate": 6.0802350836846814e-06, + "loss": 0.9425, + "step": 21340 + }, + { + "epoch": 8.18, + "learning_rate": 6.067458796473745e-06, + "loss": 0.9162, + "step": 21350 + }, + { + "epoch": 8.19, + "learning_rate": 6.054682509262808e-06, + "loss": 0.8861, + "step": 21360 + }, + { + "epoch": 8.19, + "learning_rate": 6.041906222051872e-06, + "loss": 0.9061, + "step": 21370 + }, + { + "epoch": 8.19, + "learning_rate": 6.029129934840936e-06, + "loss": 0.8659, + "step": 21380 + }, + { + "epoch": 8.2, + "learning_rate": 6.016353647629999e-06, + "loss": 0.9983, + "step": 21390 + }, + { + "epoch": 8.2, + "learning_rate": 6.003577360419062e-06, + "loss": 0.9033, + "step": 21400 + }, + { + "epoch": 8.21, + "learning_rate": 5.990801073208126e-06, + "loss": 0.8876, + "step": 21410 + }, + { + "epoch": 8.21, + "learning_rate": 5.97802478599719e-06, + "loss": 0.86, + "step": 21420 + }, + { + "epoch": 8.21, + "learning_rate": 5.965248498786253e-06, + "loss": 0.9695, + "step": 21430 + }, + { + "epoch": 8.22, + "learning_rate": 5.952472211575316e-06, + "loss": 0.9871, + "step": 21440 + }, + { + "epoch": 8.22, + "learning_rate": 5.939695924364379e-06, + "loss": 0.9573, + "step": 21450 + }, + { + "epoch": 8.23, + "learning_rate": 5.926919637153443e-06, + "loss": 0.8751, + "step": 21460 + }, + { + "epoch": 8.23, + "learning_rate": 5.914143349942507e-06, + "loss": 0.9682, + "step": 21470 + }, + { + "epoch": 8.23, + "learning_rate": 5.901367062731571e-06, + "loss": 0.936, + "step": 21480 + }, + { + "epoch": 8.24, + "learning_rate": 5.888590775520634e-06, + "loss": 0.9071, + "step": 21490 + }, + { + "epoch": 8.24, + "learning_rate": 5.875814488309697e-06, + "loss": 0.8265, + "step": 21500 + }, + { + "epoch": 8.24, + "learning_rate": 5.863038201098761e-06, + "loss": 0.8778, + "step": 21510 + }, + { + "epoch": 8.25, + "learning_rate": 5.850261913887824e-06, + "loss": 0.9853, + "step": 21520 + }, + { + "epoch": 8.25, + "learning_rate": 5.8374856266768875e-06, + "loss": 0.9725, + "step": 21530 + }, + { + "epoch": 8.26, + "learning_rate": 5.8247093394659516e-06, + "loss": 0.9485, + "step": 21540 + }, + { + "epoch": 8.26, + "learning_rate": 5.811933052255015e-06, + "loss": 0.8038, + "step": 21550 + }, + { + "epoch": 8.26, + "learning_rate": 5.799156765044079e-06, + "loss": 0.8746, + "step": 21560 + }, + { + "epoch": 8.27, + "learning_rate": 5.786380477833142e-06, + "loss": 0.9441, + "step": 21570 + }, + { + "epoch": 8.27, + "learning_rate": 5.773604190622205e-06, + "loss": 0.876, + "step": 21580 + }, + { + "epoch": 8.28, + "learning_rate": 5.7608279034112684e-06, + "loss": 0.9058, + "step": 21590 + }, + { + "epoch": 8.28, + "learning_rate": 5.7480516162003325e-06, + "loss": 0.8759, + "step": 21600 + }, + { + "epoch": 8.28, + "learning_rate": 5.735275328989396e-06, + "loss": 0.8642, + "step": 21610 + }, + { + "epoch": 8.29, + "learning_rate": 5.722499041778459e-06, + "loss": 0.8023, + "step": 21620 + }, + { + "epoch": 8.29, + "learning_rate": 5.709722754567523e-06, + "loss": 0.8719, + "step": 21630 + }, + { + "epoch": 8.29, + "learning_rate": 5.696946467356586e-06, + "loss": 0.8601, + "step": 21640 + }, + { + "epoch": 8.3, + "learning_rate": 5.68417018014565e-06, + "loss": 0.9119, + "step": 21650 + }, + { + "epoch": 8.3, + "learning_rate": 5.6713938929347135e-06, + "loss": 0.8946, + "step": 21660 + }, + { + "epoch": 8.31, + "learning_rate": 5.658617605723777e-06, + "loss": 0.8619, + "step": 21670 + }, + { + "epoch": 8.31, + "learning_rate": 5.64584131851284e-06, + "loss": 0.8055, + "step": 21680 + }, + { + "epoch": 8.31, + "learning_rate": 5.633065031301904e-06, + "loss": 0.9409, + "step": 21690 + }, + { + "epoch": 8.32, + "learning_rate": 5.620288744090967e-06, + "loss": 0.8886, + "step": 21700 + }, + { + "epoch": 8.32, + "learning_rate": 5.60751245688003e-06, + "loss": 0.9549, + "step": 21710 + }, + { + "epoch": 8.33, + "learning_rate": 5.594736169669094e-06, + "loss": 0.8889, + "step": 21720 + }, + { + "epoch": 8.33, + "learning_rate": 5.581959882458158e-06, + "loss": 0.8451, + "step": 21730 + }, + { + "epoch": 8.33, + "learning_rate": 5.569183595247222e-06, + "loss": 0.9339, + "step": 21740 + }, + { + "epoch": 8.34, + "learning_rate": 5.556407308036285e-06, + "loss": 0.8483, + "step": 21750 + }, + { + "epoch": 8.34, + "learning_rate": 5.543631020825348e-06, + "loss": 0.8599, + "step": 21760 + }, + { + "epoch": 8.34, + "learning_rate": 5.530854733614411e-06, + "loss": 0.7685, + "step": 21770 + }, + { + "epoch": 8.35, + "learning_rate": 5.518078446403475e-06, + "loss": 0.932, + "step": 21780 + }, + { + "epoch": 8.35, + "learning_rate": 5.5053021591925386e-06, + "loss": 0.9624, + "step": 21790 + }, + { + "epoch": 8.36, + "learning_rate": 5.492525871981603e-06, + "loss": 0.8659, + "step": 21800 + }, + { + "epoch": 8.36, + "learning_rate": 5.479749584770666e-06, + "loss": 0.9084, + "step": 21810 + }, + { + "epoch": 8.36, + "learning_rate": 5.466973297559729e-06, + "loss": 0.912, + "step": 21820 + }, + { + "epoch": 8.37, + "learning_rate": 5.454197010348793e-06, + "loss": 0.8673, + "step": 21830 + }, + { + "epoch": 8.37, + "learning_rate": 5.441420723137856e-06, + "loss": 0.8506, + "step": 21840 + }, + { + "epoch": 8.37, + "learning_rate": 5.4286444359269195e-06, + "loss": 0.7869, + "step": 21850 + }, + { + "epoch": 8.38, + "learning_rate": 5.415868148715983e-06, + "loss": 0.9777, + "step": 21860 + }, + { + "epoch": 8.38, + "learning_rate": 5.403091861505047e-06, + "loss": 0.9148, + "step": 21870 + }, + { + "epoch": 8.39, + "learning_rate": 5.39031557429411e-06, + "loss": 0.8874, + "step": 21880 + }, + { + "epoch": 8.39, + "learning_rate": 5.377539287083174e-06, + "loss": 0.8703, + "step": 21890 + }, + { + "epoch": 8.39, + "learning_rate": 5.364762999872237e-06, + "loss": 0.9067, + "step": 21900 + }, + { + "epoch": 8.4, + "learning_rate": 5.351986712661301e-06, + "loss": 0.8131, + "step": 21910 + }, + { + "epoch": 8.4, + "learning_rate": 5.3392104254503645e-06, + "loss": 0.8348, + "step": 21920 + }, + { + "epoch": 8.41, + "learning_rate": 5.326434138239428e-06, + "loss": 0.8441, + "step": 21930 + }, + { + "epoch": 8.41, + "learning_rate": 5.313657851028491e-06, + "loss": 1.042, + "step": 21940 + }, + { + "epoch": 8.41, + "learning_rate": 5.300881563817554e-06, + "loss": 0.8645, + "step": 21950 + }, + { + "epoch": 8.42, + "learning_rate": 5.288105276606618e-06, + "loss": 0.8494, + "step": 21960 + }, + { + "epoch": 8.42, + "learning_rate": 5.275328989395682e-06, + "loss": 0.9312, + "step": 21970 + }, + { + "epoch": 8.42, + "learning_rate": 5.2625527021847455e-06, + "loss": 0.8624, + "step": 21980 + }, + { + "epoch": 8.43, + "learning_rate": 5.249776414973809e-06, + "loss": 0.9628, + "step": 21990 + }, + { + "epoch": 8.43, + "learning_rate": 5.237000127762873e-06, + "loss": 0.9118, + "step": 22000 + }, + { + "epoch": 8.44, + "learning_rate": 5.224223840551936e-06, + "loss": 0.9334, + "step": 22010 + }, + { + "epoch": 8.44, + "learning_rate": 5.211447553340999e-06, + "loss": 0.8442, + "step": 22020 + }, + { + "epoch": 8.44, + "learning_rate": 5.198671266130062e-06, + "loss": 0.9181, + "step": 22030 + }, + { + "epoch": 8.45, + "learning_rate": 5.185894978919126e-06, + "loss": 0.8719, + "step": 22040 + }, + { + "epoch": 8.45, + "learning_rate": 5.17311869170819e-06, + "loss": 0.8794, + "step": 22050 + }, + { + "epoch": 8.46, + "learning_rate": 5.160342404497254e-06, + "loss": 0.9218, + "step": 22060 + }, + { + "epoch": 8.46, + "learning_rate": 5.147566117286317e-06, + "loss": 1.0097, + "step": 22070 + }, + { + "epoch": 8.46, + "learning_rate": 5.13478983007538e-06, + "loss": 0.9816, + "step": 22080 + }, + { + "epoch": 8.47, + "learning_rate": 5.122013542864444e-06, + "loss": 0.9781, + "step": 22090 + }, + { + "epoch": 8.47, + "learning_rate": 5.109237255653507e-06, + "loss": 0.8809, + "step": 22100 + }, + { + "epoch": 8.47, + "learning_rate": 5.096460968442571e-06, + "loss": 0.9249, + "step": 22110 + }, + { + "epoch": 8.48, + "learning_rate": 5.083684681231634e-06, + "loss": 0.9203, + "step": 22120 + }, + { + "epoch": 8.48, + "learning_rate": 5.070908394020697e-06, + "loss": 0.9798, + "step": 22130 + }, + { + "epoch": 8.49, + "learning_rate": 5.058132106809762e-06, + "loss": 0.8419, + "step": 22140 + }, + { + "epoch": 8.49, + "learning_rate": 5.045355819598825e-06, + "loss": 0.9809, + "step": 22150 + }, + { + "epoch": 8.49, + "learning_rate": 5.032579532387888e-06, + "loss": 0.9398, + "step": 22160 + }, + { + "epoch": 8.5, + "learning_rate": 5.0198032451769515e-06, + "loss": 0.9118, + "step": 22170 + }, + { + "epoch": 8.5, + "learning_rate": 5.007026957966016e-06, + "loss": 0.8401, + "step": 22180 + }, + { + "epoch": 8.51, + "learning_rate": 4.994250670755079e-06, + "loss": 0.9148, + "step": 22190 + }, + { + "epoch": 8.51, + "learning_rate": 4.981474383544142e-06, + "loss": 0.8719, + "step": 22200 + }, + { + "epoch": 8.51, + "learning_rate": 4.968698096333205e-06, + "loss": 0.8894, + "step": 22210 + }, + { + "epoch": 8.52, + "learning_rate": 4.9559218091222684e-06, + "loss": 0.9964, + "step": 22220 + }, + { + "epoch": 8.52, + "learning_rate": 4.943145521911333e-06, + "loss": 0.8524, + "step": 22230 + }, + { + "epoch": 8.52, + "learning_rate": 4.9303692347003966e-06, + "loss": 0.9801, + "step": 22240 + }, + { + "epoch": 8.53, + "learning_rate": 4.91759294748946e-06, + "loss": 0.9724, + "step": 22250 + }, + { + "epoch": 8.53, + "learning_rate": 4.904816660278523e-06, + "loss": 0.8593, + "step": 22260 + }, + { + "epoch": 8.54, + "learning_rate": 4.892040373067587e-06, + "loss": 0.8703, + "step": 22270 + }, + { + "epoch": 8.54, + "learning_rate": 4.87926408585665e-06, + "loss": 0.9036, + "step": 22280 + }, + { + "epoch": 8.54, + "learning_rate": 4.8664877986457134e-06, + "loss": 0.894, + "step": 22290 + }, + { + "epoch": 8.55, + "learning_rate": 4.853711511434777e-06, + "loss": 0.7996, + "step": 22300 + }, + { + "epoch": 8.55, + "learning_rate": 4.84093522422384e-06, + "loss": 0.8349, + "step": 22310 + }, + { + "epoch": 8.56, + "learning_rate": 4.828158937012905e-06, + "loss": 0.8002, + "step": 22320 + }, + { + "epoch": 8.56, + "learning_rate": 4.815382649801968e-06, + "loss": 0.9046, + "step": 22330 + }, + { + "epoch": 8.56, + "learning_rate": 4.802606362591031e-06, + "loss": 0.968, + "step": 22340 + }, + { + "epoch": 8.57, + "learning_rate": 4.789830075380094e-06, + "loss": 0.8509, + "step": 22350 + }, + { + "epoch": 8.57, + "learning_rate": 4.7770537881691585e-06, + "loss": 0.8947, + "step": 22360 + }, + { + "epoch": 8.57, + "learning_rate": 4.764277500958222e-06, + "loss": 0.7755, + "step": 22370 + }, + { + "epoch": 8.58, + "learning_rate": 4.751501213747285e-06, + "loss": 0.8074, + "step": 22380 + }, + { + "epoch": 8.58, + "learning_rate": 4.738724926536348e-06, + "loss": 0.8925, + "step": 22390 + }, + { + "epoch": 8.59, + "learning_rate": 4.725948639325412e-06, + "loss": 0.866, + "step": 22400 + }, + { + "epoch": 8.59, + "learning_rate": 4.713172352114476e-06, + "loss": 0.9228, + "step": 22410 + }, + { + "epoch": 8.59, + "learning_rate": 4.700396064903539e-06, + "loss": 0.9365, + "step": 22420 + }, + { + "epoch": 8.6, + "learning_rate": 4.687619777692603e-06, + "loss": 0.8845, + "step": 22430 + }, + { + "epoch": 8.6, + "learning_rate": 4.674843490481666e-06, + "loss": 0.9089, + "step": 22440 + }, + { + "epoch": 8.6, + "learning_rate": 4.66206720327073e-06, + "loss": 0.899, + "step": 22450 + }, + { + "epoch": 8.61, + "learning_rate": 4.649290916059793e-06, + "loss": 0.8708, + "step": 22460 + }, + { + "epoch": 8.61, + "learning_rate": 4.636514628848856e-06, + "loss": 0.8776, + "step": 22470 + }, + { + "epoch": 8.62, + "learning_rate": 4.6237383416379195e-06, + "loss": 0.8305, + "step": 22480 + }, + { + "epoch": 8.62, + "learning_rate": 4.6109620544269836e-06, + "loss": 0.8431, + "step": 22490 + }, + { + "epoch": 8.62, + "learning_rate": 4.598185767216048e-06, + "loss": 0.9398, + "step": 22500 + }, + { + "epoch": 8.63, + "learning_rate": 4.585409480005111e-06, + "loss": 0.9261, + "step": 22510 + }, + { + "epoch": 8.63, + "learning_rate": 4.572633192794174e-06, + "loss": 0.8081, + "step": 22520 + }, + { + "epoch": 8.64, + "learning_rate": 4.559856905583237e-06, + "loss": 0.9381, + "step": 22530 + }, + { + "epoch": 8.64, + "learning_rate": 4.547080618372301e-06, + "loss": 0.8543, + "step": 22540 + }, + { + "epoch": 8.64, + "learning_rate": 4.5343043311613645e-06, + "loss": 0.8721, + "step": 22550 + }, + { + "epoch": 8.65, + "learning_rate": 4.521528043950428e-06, + "loss": 0.9618, + "step": 22560 + }, + { + "epoch": 8.65, + "learning_rate": 4.508751756739492e-06, + "loss": 0.8853, + "step": 22570 + }, + { + "epoch": 8.65, + "learning_rate": 4.495975469528556e-06, + "loss": 0.9033, + "step": 22580 + }, + { + "epoch": 8.66, + "learning_rate": 4.483199182317619e-06, + "loss": 0.9493, + "step": 22590 + }, + { + "epoch": 8.66, + "learning_rate": 4.470422895106682e-06, + "loss": 0.9213, + "step": 22600 + }, + { + "epoch": 8.67, + "learning_rate": 4.4576466078957455e-06, + "loss": 0.8884, + "step": 22610 + }, + { + "epoch": 8.67, + "learning_rate": 4.444870320684809e-06, + "loss": 0.8742, + "step": 22620 + }, + { + "epoch": 8.67, + "learning_rate": 4.432094033473873e-06, + "loss": 0.9303, + "step": 22630 + }, + { + "epoch": 8.68, + "learning_rate": 4.419317746262936e-06, + "loss": 0.8751, + "step": 22640 + }, + { + "epoch": 8.68, + "learning_rate": 4.406541459051999e-06, + "loss": 0.8977, + "step": 22650 + }, + { + "epoch": 8.69, + "learning_rate": 4.393765171841063e-06, + "loss": 0.916, + "step": 22660 + }, + { + "epoch": 8.69, + "learning_rate": 4.380988884630127e-06, + "loss": 1.0154, + "step": 22670 + }, + { + "epoch": 8.69, + "learning_rate": 4.3682125974191905e-06, + "loss": 0.9187, + "step": 22680 + }, + { + "epoch": 8.7, + "learning_rate": 4.355436310208254e-06, + "loss": 0.9263, + "step": 22690 + }, + { + "epoch": 8.7, + "learning_rate": 4.342660022997317e-06, + "loss": 0.8458, + "step": 22700 + }, + { + "epoch": 8.7, + "learning_rate": 4.32988373578638e-06, + "loss": 0.9166, + "step": 22710 + }, + { + "epoch": 8.71, + "learning_rate": 4.317107448575444e-06, + "loss": 0.8883, + "step": 22720 + }, + { + "epoch": 8.71, + "learning_rate": 4.304331161364507e-06, + "loss": 0.9751, + "step": 22730 + }, + { + "epoch": 8.72, + "learning_rate": 4.291554874153571e-06, + "loss": 0.8521, + "step": 22740 + }, + { + "epoch": 8.72, + "learning_rate": 4.278778586942635e-06, + "loss": 0.9101, + "step": 22750 + }, + { + "epoch": 8.72, + "learning_rate": 4.266002299731699e-06, + "loss": 0.9253, + "step": 22760 + }, + { + "epoch": 8.73, + "learning_rate": 4.253226012520762e-06, + "loss": 0.8784, + "step": 22770 + }, + { + "epoch": 8.73, + "learning_rate": 4.240449725309825e-06, + "loss": 0.928, + "step": 22780 + }, + { + "epoch": 8.74, + "learning_rate": 4.227673438098888e-06, + "loss": 0.8649, + "step": 22790 + }, + { + "epoch": 8.74, + "learning_rate": 4.2148971508879515e-06, + "loss": 0.8783, + "step": 22800 + }, + { + "epoch": 8.74, + "learning_rate": 4.202120863677016e-06, + "loss": 0.9044, + "step": 22810 + }, + { + "epoch": 8.75, + "learning_rate": 4.189344576466079e-06, + "loss": 0.8893, + "step": 22820 + }, + { + "epoch": 8.75, + "learning_rate": 4.176568289255143e-06, + "loss": 0.8716, + "step": 22830 + }, + { + "epoch": 8.75, + "learning_rate": 4.163792002044206e-06, + "loss": 0.9598, + "step": 22840 + }, + { + "epoch": 8.76, + "learning_rate": 4.15101571483327e-06, + "loss": 0.7954, + "step": 22850 + }, + { + "epoch": 8.76, + "learning_rate": 4.138239427622333e-06, + "loss": 0.8604, + "step": 22860 + }, + { + "epoch": 8.77, + "learning_rate": 4.1254631404113965e-06, + "loss": 0.8798, + "step": 22870 + }, + { + "epoch": 8.77, + "learning_rate": 4.11268685320046e-06, + "loss": 0.9116, + "step": 22880 + }, + { + "epoch": 8.77, + "learning_rate": 4.099910565989523e-06, + "loss": 0.9244, + "step": 22890 + }, + { + "epoch": 8.78, + "learning_rate": 4.087134278778587e-06, + "loss": 0.9035, + "step": 22900 + }, + { + "epoch": 8.78, + "learning_rate": 4.07435799156765e-06, + "loss": 0.8316, + "step": 22910 + }, + { + "epoch": 8.78, + "learning_rate": 4.061581704356714e-06, + "loss": 0.8543, + "step": 22920 + }, + { + "epoch": 8.79, + "learning_rate": 4.0488054171457775e-06, + "loss": 0.8646, + "step": 22930 + }, + { + "epoch": 8.79, + "learning_rate": 4.0360291299348416e-06, + "loss": 0.9101, + "step": 22940 + }, + { + "epoch": 8.8, + "learning_rate": 4.023252842723905e-06, + "loss": 0.9426, + "step": 22950 + }, + { + "epoch": 8.8, + "learning_rate": 4.010476555512968e-06, + "loss": 0.9152, + "step": 22960 + }, + { + "epoch": 8.8, + "learning_rate": 3.997700268302031e-06, + "loss": 0.8991, + "step": 22970 + }, + { + "epoch": 8.81, + "learning_rate": 3.984923981091094e-06, + "loss": 0.9125, + "step": 22980 + }, + { + "epoch": 8.81, + "learning_rate": 3.9721476938801584e-06, + "loss": 0.8832, + "step": 22990 + }, + { + "epoch": 8.82, + "learning_rate": 3.9593714066692225e-06, + "loss": 0.8985, + "step": 23000 + }, + { + "epoch": 8.82, + "learning_rate": 3.946595119458286e-06, + "loss": 0.8441, + "step": 23010 + }, + { + "epoch": 8.82, + "learning_rate": 3.933818832247349e-06, + "loss": 0.8681, + "step": 23020 + }, + { + "epoch": 8.83, + "learning_rate": 3.921042545036413e-06, + "loss": 0.9633, + "step": 23030 + }, + { + "epoch": 8.83, + "learning_rate": 3.908266257825476e-06, + "loss": 0.9412, + "step": 23040 + }, + { + "epoch": 8.83, + "learning_rate": 3.895489970614539e-06, + "loss": 0.8792, + "step": 23050 + }, + { + "epoch": 8.84, + "learning_rate": 3.882713683403603e-06, + "loss": 0.9904, + "step": 23060 + }, + { + "epoch": 8.84, + "learning_rate": 3.869937396192666e-06, + "loss": 0.9428, + "step": 23070 + }, + { + "epoch": 8.85, + "learning_rate": 3.85716110898173e-06, + "loss": 0.9492, + "step": 23080 + }, + { + "epoch": 8.85, + "learning_rate": 3.844384821770794e-06, + "loss": 0.9462, + "step": 23090 + }, + { + "epoch": 8.85, + "learning_rate": 3.831608534559857e-06, + "loss": 0.8152, + "step": 23100 + }, + { + "epoch": 8.86, + "learning_rate": 3.81883224734892e-06, + "loss": 0.8701, + "step": 23110 + }, + { + "epoch": 8.86, + "learning_rate": 3.806055960137984e-06, + "loss": 0.9181, + "step": 23120 + }, + { + "epoch": 8.87, + "learning_rate": 3.7932796729270476e-06, + "loss": 0.7705, + "step": 23130 + }, + { + "epoch": 8.87, + "learning_rate": 3.780503385716111e-06, + "loss": 0.8511, + "step": 23140 + }, + { + "epoch": 8.87, + "learning_rate": 3.7677270985051745e-06, + "loss": 0.9121, + "step": 23150 + }, + { + "epoch": 8.88, + "learning_rate": 3.7549508112942377e-06, + "loss": 0.8985, + "step": 23160 + }, + { + "epoch": 8.88, + "learning_rate": 3.7421745240833013e-06, + "loss": 0.9216, + "step": 23170 + }, + { + "epoch": 8.88, + "learning_rate": 3.729398236872365e-06, + "loss": 0.8353, + "step": 23180 + }, + { + "epoch": 8.89, + "learning_rate": 3.716621949661428e-06, + "loss": 0.8624, + "step": 23190 + }, + { + "epoch": 8.89, + "learning_rate": 3.703845662450492e-06, + "loss": 0.8525, + "step": 23200 + }, + { + "epoch": 8.9, + "learning_rate": 3.6910693752395554e-06, + "loss": 0.8716, + "step": 23210 + }, + { + "epoch": 8.9, + "learning_rate": 3.678293088028619e-06, + "loss": 0.8408, + "step": 23220 + }, + { + "epoch": 8.9, + "learning_rate": 3.6655168008176823e-06, + "loss": 0.9026, + "step": 23230 + }, + { + "epoch": 8.91, + "learning_rate": 3.6527405136067463e-06, + "loss": 0.902, + "step": 23240 + }, + { + "epoch": 8.91, + "learning_rate": 3.6399642263958095e-06, + "loss": 0.9572, + "step": 23250 + }, + { + "epoch": 8.92, + "learning_rate": 3.6271879391848727e-06, + "loss": 0.895, + "step": 23260 + }, + { + "epoch": 8.92, + "learning_rate": 3.6144116519739364e-06, + "loss": 0.9128, + "step": 23270 + }, + { + "epoch": 8.92, + "learning_rate": 3.601635364763e-06, + "loss": 0.9269, + "step": 23280 + }, + { + "epoch": 8.93, + "learning_rate": 3.5888590775520636e-06, + "loss": 0.9065, + "step": 23290 + }, + { + "epoch": 8.93, + "learning_rate": 3.576082790341127e-06, + "loss": 0.8654, + "step": 23300 + }, + { + "epoch": 8.93, + "learning_rate": 3.5633065031301905e-06, + "loss": 0.8228, + "step": 23310 + }, + { + "epoch": 8.94, + "learning_rate": 3.5505302159192537e-06, + "loss": 1.054, + "step": 23320 + }, + { + "epoch": 8.94, + "learning_rate": 3.5377539287083177e-06, + "loss": 0.827, + "step": 23330 + }, + { + "epoch": 8.95, + "learning_rate": 3.524977641497381e-06, + "loss": 0.8659, + "step": 23340 + }, + { + "epoch": 8.95, + "learning_rate": 3.5122013542864446e-06, + "loss": 0.9061, + "step": 23350 + }, + { + "epoch": 8.95, + "learning_rate": 3.4994250670755078e-06, + "loss": 0.9262, + "step": 23360 + }, + { + "epoch": 8.96, + "learning_rate": 3.4866487798645714e-06, + "loss": 0.9465, + "step": 23370 + }, + { + "epoch": 8.96, + "learning_rate": 3.473872492653635e-06, + "loss": 0.8705, + "step": 23380 + }, + { + "epoch": 8.97, + "learning_rate": 3.4610962054426983e-06, + "loss": 0.7709, + "step": 23390 + }, + { + "epoch": 8.97, + "learning_rate": 3.448319918231762e-06, + "loss": 0.9401, + "step": 23400 + }, + { + "epoch": 8.97, + "learning_rate": 3.4355436310208255e-06, + "loss": 0.8963, + "step": 23410 + }, + { + "epoch": 8.98, + "learning_rate": 3.422767343809889e-06, + "loss": 0.9124, + "step": 23420 + }, + { + "epoch": 8.98, + "learning_rate": 3.4099910565989524e-06, + "loss": 0.867, + "step": 23430 + }, + { + "epoch": 8.98, + "learning_rate": 3.397214769388016e-06, + "loss": 0.8558, + "step": 23440 + }, + { + "epoch": 8.99, + "learning_rate": 3.3844384821770796e-06, + "loss": 0.9042, + "step": 23450 + }, + { + "epoch": 8.99, + "learning_rate": 3.371662194966143e-06, + "loss": 0.8928, + "step": 23460 + }, + { + "epoch": 9.0, + "learning_rate": 3.3588859077552065e-06, + "loss": 0.9059, + "step": 23470 + }, + { + "epoch": 9.0, + "learning_rate": 3.3461096205442697e-06, + "loss": 0.9017, + "step": 23480 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.7608221737776394, + "eval_loss": 0.7128584384918213, + "eval_runtime": 301.0875, + "eval_samples_per_second": 138.641, + "eval_steps_per_second": 69.322, + "step": 23481 + }, + { + "epoch": 9.0, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.9507, + "step": 23490 + }, + { + "epoch": 9.01, + "learning_rate": 3.320557046122397e-06, + "loss": 0.9181, + "step": 23500 + }, + { + "epoch": 9.01, + "learning_rate": 3.3077807589114606e-06, + "loss": 0.9228, + "step": 23510 + }, + { + "epoch": 9.01, + "learning_rate": 3.295004471700524e-06, + "loss": 0.9009, + "step": 23520 + }, + { + "epoch": 9.02, + "learning_rate": 3.2822281844895874e-06, + "loss": 0.8826, + "step": 23530 + }, + { + "epoch": 9.02, + "learning_rate": 3.269451897278651e-06, + "loss": 0.8958, + "step": 23540 + }, + { + "epoch": 9.03, + "learning_rate": 3.2566756100677143e-06, + "loss": 0.9058, + "step": 23550 + }, + { + "epoch": 9.03, + "learning_rate": 3.243899322856778e-06, + "loss": 0.8906, + "step": 23560 + }, + { + "epoch": 9.03, + "learning_rate": 3.231123035645841e-06, + "loss": 0.9041, + "step": 23570 + }, + { + "epoch": 9.04, + "learning_rate": 3.218346748434905e-06, + "loss": 0.886, + "step": 23580 + }, + { + "epoch": 9.04, + "learning_rate": 3.2055704612239684e-06, + "loss": 0.8974, + "step": 23590 + }, + { + "epoch": 9.05, + "learning_rate": 3.192794174013032e-06, + "loss": 0.9301, + "step": 23600 + }, + { + "epoch": 9.05, + "learning_rate": 3.1800178868020952e-06, + "loss": 0.9374, + "step": 23610 + }, + { + "epoch": 9.05, + "learning_rate": 3.167241599591159e-06, + "loss": 0.7817, + "step": 23620 + }, + { + "epoch": 9.06, + "learning_rate": 3.1544653123802225e-06, + "loss": 0.8456, + "step": 23630 + }, + { + "epoch": 9.06, + "learning_rate": 3.1416890251692857e-06, + "loss": 0.8351, + "step": 23640 + }, + { + "epoch": 9.06, + "learning_rate": 3.1289127379583493e-06, + "loss": 0.9484, + "step": 23650 + }, + { + "epoch": 9.07, + "learning_rate": 3.1161364507474125e-06, + "loss": 0.8637, + "step": 23660 + }, + { + "epoch": 9.07, + "learning_rate": 3.1033601635364766e-06, + "loss": 0.8785, + "step": 23670 + }, + { + "epoch": 9.08, + "learning_rate": 3.09058387632554e-06, + "loss": 0.9531, + "step": 23680 + }, + { + "epoch": 9.08, + "learning_rate": 3.0778075891146034e-06, + "loss": 0.886, + "step": 23690 + }, + { + "epoch": 9.08, + "learning_rate": 3.0650313019036667e-06, + "loss": 0.8663, + "step": 23700 + }, + { + "epoch": 9.09, + "learning_rate": 3.0522550146927307e-06, + "loss": 0.8406, + "step": 23710 + }, + { + "epoch": 9.09, + "learning_rate": 3.039478727481794e-06, + "loss": 0.8344, + "step": 23720 + }, + { + "epoch": 9.1, + "learning_rate": 3.0267024402708576e-06, + "loss": 0.9386, + "step": 23730 + }, + { + "epoch": 9.1, + "learning_rate": 3.0139261530599208e-06, + "loss": 0.8994, + "step": 23740 + }, + { + "epoch": 9.1, + "learning_rate": 3.001149865848984e-06, + "loss": 0.9545, + "step": 23750 + }, + { + "epoch": 9.11, + "learning_rate": 2.988373578638048e-06, + "loss": 0.8869, + "step": 23760 + }, + { + "epoch": 9.11, + "learning_rate": 2.9755972914271112e-06, + "loss": 0.9274, + "step": 23770 + }, + { + "epoch": 9.11, + "learning_rate": 2.962821004216175e-06, + "loss": 0.9779, + "step": 23780 + }, + { + "epoch": 9.12, + "learning_rate": 2.950044717005238e-06, + "loss": 0.9058, + "step": 23790 + }, + { + "epoch": 9.12, + "learning_rate": 2.937268429794302e-06, + "loss": 0.8455, + "step": 23800 + }, + { + "epoch": 9.13, + "learning_rate": 2.9244921425833653e-06, + "loss": 0.85, + "step": 23810 + }, + { + "epoch": 9.13, + "learning_rate": 2.911715855372429e-06, + "loss": 0.9097, + "step": 23820 + }, + { + "epoch": 9.13, + "learning_rate": 2.898939568161492e-06, + "loss": 0.8656, + "step": 23830 + }, + { + "epoch": 9.14, + "learning_rate": 2.886163280950556e-06, + "loss": 0.8325, + "step": 23840 + }, + { + "epoch": 9.14, + "learning_rate": 2.8733869937396195e-06, + "loss": 0.8249, + "step": 23850 + }, + { + "epoch": 9.15, + "learning_rate": 2.8606107065286827e-06, + "loss": 0.9265, + "step": 23860 + }, + { + "epoch": 9.15, + "learning_rate": 2.8478344193177463e-06, + "loss": 0.902, + "step": 23870 + }, + { + "epoch": 9.15, + "learning_rate": 2.83505813210681e-06, + "loss": 0.9086, + "step": 23880 + }, + { + "epoch": 9.16, + "learning_rate": 2.8222818448958736e-06, + "loss": 0.9146, + "step": 23890 + }, + { + "epoch": 9.16, + "learning_rate": 2.8095055576849368e-06, + "loss": 0.9267, + "step": 23900 + }, + { + "epoch": 9.16, + "learning_rate": 2.7967292704740004e-06, + "loss": 0.8712, + "step": 23910 + }, + { + "epoch": 9.17, + "learning_rate": 2.7839529832630636e-06, + "loss": 0.8805, + "step": 23920 + }, + { + "epoch": 9.17, + "learning_rate": 2.7711766960521272e-06, + "loss": 0.8443, + "step": 23930 + }, + { + "epoch": 9.18, + "learning_rate": 2.758400408841191e-06, + "loss": 0.8683, + "step": 23940 + }, + { + "epoch": 9.18, + "learning_rate": 2.745624121630254e-06, + "loss": 0.9162, + "step": 23950 + }, + { + "epoch": 9.18, + "learning_rate": 2.7328478344193177e-06, + "loss": 0.8848, + "step": 23960 + }, + { + "epoch": 9.19, + "learning_rate": 2.7200715472083814e-06, + "loss": 0.9599, + "step": 23970 + }, + { + "epoch": 9.19, + "learning_rate": 2.707295259997445e-06, + "loss": 0.82, + "step": 23980 + }, + { + "epoch": 9.2, + "learning_rate": 2.694518972786508e-06, + "loss": 0.9204, + "step": 23990 + }, + { + "epoch": 9.2, + "learning_rate": 2.681742685575572e-06, + "loss": 0.8583, + "step": 24000 + }, + { + "epoch": 9.2, + "learning_rate": 2.6689663983646355e-06, + "loss": 0.9028, + "step": 24010 + }, + { + "epoch": 9.21, + "learning_rate": 2.656190111153699e-06, + "loss": 0.8569, + "step": 24020 + }, + { + "epoch": 9.21, + "learning_rate": 2.6434138239427623e-06, + "loss": 0.7817, + "step": 24030 + }, + { + "epoch": 9.21, + "learning_rate": 2.6306375367318255e-06, + "loss": 0.8167, + "step": 24040 + }, + { + "epoch": 9.22, + "learning_rate": 2.617861249520889e-06, + "loss": 0.8659, + "step": 24050 + }, + { + "epoch": 9.22, + "learning_rate": 2.6050849623099528e-06, + "loss": 0.8956, + "step": 24060 + }, + { + "epoch": 9.23, + "learning_rate": 2.5923086750990164e-06, + "loss": 0.8504, + "step": 24070 + }, + { + "epoch": 9.23, + "learning_rate": 2.5795323878880796e-06, + "loss": 0.9213, + "step": 24080 + }, + { + "epoch": 9.23, + "learning_rate": 2.5667561006771433e-06, + "loss": 0.9327, + "step": 24090 + }, + { + "epoch": 9.24, + "learning_rate": 2.553979813466207e-06, + "loss": 0.8836, + "step": 24100 + }, + { + "epoch": 9.24, + "learning_rate": 2.5412035262552705e-06, + "loss": 0.917, + "step": 24110 + }, + { + "epoch": 9.24, + "learning_rate": 2.5284272390443337e-06, + "loss": 0.8959, + "step": 24120 + }, + { + "epoch": 9.25, + "learning_rate": 2.515650951833397e-06, + "loss": 0.8548, + "step": 24130 + }, + { + "epoch": 9.25, + "learning_rate": 2.502874664622461e-06, + "loss": 0.9371, + "step": 24140 + }, + { + "epoch": 9.26, + "learning_rate": 2.4900983774115242e-06, + "loss": 0.8789, + "step": 24150 + }, + { + "epoch": 9.26, + "learning_rate": 2.477322090200588e-06, + "loss": 0.8334, + "step": 24160 + }, + { + "epoch": 9.26, + "learning_rate": 2.464545802989651e-06, + "loss": 0.9277, + "step": 24170 + }, + { + "epoch": 9.27, + "learning_rate": 2.451769515778715e-06, + "loss": 0.9204, + "step": 24180 + }, + { + "epoch": 9.27, + "learning_rate": 2.4389932285677783e-06, + "loss": 0.8449, + "step": 24190 + }, + { + "epoch": 9.28, + "learning_rate": 2.426216941356842e-06, + "loss": 0.9497, + "step": 24200 + }, + { + "epoch": 9.28, + "learning_rate": 2.413440654145905e-06, + "loss": 0.9056, + "step": 24210 + }, + { + "epoch": 9.28, + "learning_rate": 2.4006643669349684e-06, + "loss": 0.8841, + "step": 24220 + }, + { + "epoch": 9.29, + "learning_rate": 2.3878880797240324e-06, + "loss": 0.8616, + "step": 24230 + }, + { + "epoch": 9.29, + "learning_rate": 2.3751117925130956e-06, + "loss": 0.8244, + "step": 24240 + }, + { + "epoch": 9.29, + "learning_rate": 2.3623355053021593e-06, + "loss": 0.8809, + "step": 24250 + }, + { + "epoch": 9.3, + "learning_rate": 2.3495592180912225e-06, + "loss": 0.9184, + "step": 24260 + }, + { + "epoch": 9.3, + "learning_rate": 2.3367829308802865e-06, + "loss": 0.8938, + "step": 24270 + }, + { + "epoch": 9.31, + "learning_rate": 2.3240066436693497e-06, + "loss": 0.8416, + "step": 24280 + }, + { + "epoch": 9.31, + "learning_rate": 2.3112303564584134e-06, + "loss": 0.8895, + "step": 24290 + }, + { + "epoch": 9.31, + "learning_rate": 2.2984540692474766e-06, + "loss": 0.8441, + "step": 24300 + }, + { + "epoch": 9.32, + "learning_rate": 2.2856777820365402e-06, + "loss": 0.868, + "step": 24310 + }, + { + "epoch": 9.32, + "learning_rate": 2.272901494825604e-06, + "loss": 0.9158, + "step": 24320 + }, + { + "epoch": 9.33, + "learning_rate": 2.260125207614667e-06, + "loss": 0.8591, + "step": 24330 + }, + { + "epoch": 9.33, + "learning_rate": 2.2473489204037307e-06, + "loss": 0.8936, + "step": 24340 + }, + { + "epoch": 9.33, + "learning_rate": 2.234572633192794e-06, + "loss": 0.8518, + "step": 24350 + }, + { + "epoch": 9.34, + "learning_rate": 2.221796345981858e-06, + "loss": 0.8639, + "step": 24360 + }, + { + "epoch": 9.34, + "learning_rate": 2.209020058770921e-06, + "loss": 0.9183, + "step": 24370 + }, + { + "epoch": 9.34, + "learning_rate": 2.196243771559985e-06, + "loss": 0.8916, + "step": 24380 + }, + { + "epoch": 9.35, + "learning_rate": 2.183467484349048e-06, + "loss": 0.9673, + "step": 24390 + }, + { + "epoch": 9.35, + "learning_rate": 2.170691197138112e-06, + "loss": 0.9182, + "step": 24400 + }, + { + "epoch": 9.36, + "learning_rate": 2.1579149099271753e-06, + "loss": 0.917, + "step": 24410 + }, + { + "epoch": 9.36, + "learning_rate": 2.1451386227162385e-06, + "loss": 0.9187, + "step": 24420 + }, + { + "epoch": 9.36, + "learning_rate": 2.132362335505302e-06, + "loss": 0.8496, + "step": 24430 + }, + { + "epoch": 9.37, + "learning_rate": 2.1195860482943658e-06, + "loss": 0.8817, + "step": 24440 + }, + { + "epoch": 9.37, + "learning_rate": 2.1068097610834294e-06, + "loss": 0.8311, + "step": 24450 + }, + { + "epoch": 9.38, + "learning_rate": 2.0940334738724926e-06, + "loss": 0.8131, + "step": 24460 + }, + { + "epoch": 9.38, + "learning_rate": 2.0812571866615562e-06, + "loss": 0.8677, + "step": 24470 + }, + { + "epoch": 9.38, + "learning_rate": 2.0684808994506194e-06, + "loss": 0.8458, + "step": 24480 + }, + { + "epoch": 9.39, + "learning_rate": 2.0557046122396835e-06, + "loss": 0.8945, + "step": 24490 + }, + { + "epoch": 9.39, + "learning_rate": 2.0429283250287467e-06, + "loss": 0.9614, + "step": 24500 + }, + { + "epoch": 9.39, + "learning_rate": 2.03015203781781e-06, + "loss": 0.9041, + "step": 24510 + }, + { + "epoch": 9.4, + "learning_rate": 2.0173757506068736e-06, + "loss": 0.874, + "step": 24520 + }, + { + "epoch": 9.4, + "learning_rate": 2.004599463395937e-06, + "loss": 0.7861, + "step": 24530 + }, + { + "epoch": 9.41, + "learning_rate": 1.991823176185001e-06, + "loss": 0.8658, + "step": 24540 + }, + { + "epoch": 9.41, + "learning_rate": 1.979046888974064e-06, + "loss": 0.8822, + "step": 24550 + }, + { + "epoch": 9.41, + "learning_rate": 1.9662706017631277e-06, + "loss": 0.9477, + "step": 24560 + }, + { + "epoch": 9.42, + "learning_rate": 1.9534943145521913e-06, + "loss": 0.8248, + "step": 24570 + }, + { + "epoch": 9.42, + "learning_rate": 1.940718027341255e-06, + "loss": 0.8301, + "step": 24580 + }, + { + "epoch": 9.43, + "learning_rate": 1.927941740130318e-06, + "loss": 0.8404, + "step": 24590 + }, + { + "epoch": 9.43, + "learning_rate": 1.9151654529193813e-06, + "loss": 0.9288, + "step": 24600 + }, + { + "epoch": 9.43, + "learning_rate": 1.9023891657084454e-06, + "loss": 0.8764, + "step": 24610 + }, + { + "epoch": 9.44, + "learning_rate": 1.8896128784975088e-06, + "loss": 0.8312, + "step": 24620 + }, + { + "epoch": 9.44, + "learning_rate": 1.8768365912865722e-06, + "loss": 0.9223, + "step": 24630 + }, + { + "epoch": 9.44, + "learning_rate": 1.8640603040756357e-06, + "loss": 0.9138, + "step": 24640 + }, + { + "epoch": 9.45, + "learning_rate": 1.851284016864699e-06, + "loss": 0.8945, + "step": 24650 + }, + { + "epoch": 9.45, + "learning_rate": 1.8385077296537625e-06, + "loss": 0.8871, + "step": 24660 + }, + { + "epoch": 9.46, + "learning_rate": 1.8257314424428261e-06, + "loss": 0.8953, + "step": 24670 + }, + { + "epoch": 9.46, + "learning_rate": 1.8129551552318896e-06, + "loss": 0.8712, + "step": 24680 + }, + { + "epoch": 9.46, + "learning_rate": 1.8001788680209532e-06, + "loss": 0.7844, + "step": 24690 + }, + { + "epoch": 9.47, + "learning_rate": 1.7874025808100166e-06, + "loss": 0.8303, + "step": 24700 + }, + { + "epoch": 9.47, + "learning_rate": 1.7746262935990803e-06, + "loss": 0.8612, + "step": 24710 + }, + { + "epoch": 9.47, + "learning_rate": 1.7618500063881437e-06, + "loss": 0.8528, + "step": 24720 + }, + { + "epoch": 9.48, + "learning_rate": 1.7490737191772073e-06, + "loss": 0.88, + "step": 24730 + }, + { + "epoch": 9.48, + "learning_rate": 1.7362974319662705e-06, + "loss": 0.8481, + "step": 24740 + }, + { + "epoch": 9.49, + "learning_rate": 1.7235211447553342e-06, + "loss": 0.9088, + "step": 24750 + }, + { + "epoch": 9.49, + "learning_rate": 1.7107448575443976e-06, + "loss": 0.9231, + "step": 24760 + }, + { + "epoch": 9.49, + "learning_rate": 1.6979685703334612e-06, + "loss": 0.9253, + "step": 24770 + }, + { + "epoch": 9.5, + "learning_rate": 1.6851922831225246e-06, + "loss": 0.8375, + "step": 24780 + }, + { + "epoch": 9.5, + "learning_rate": 1.672415995911588e-06, + "loss": 0.8787, + "step": 24790 + }, + { + "epoch": 9.51, + "learning_rate": 1.6596397087006517e-06, + "loss": 0.8975, + "step": 24800 + }, + { + "epoch": 9.51, + "learning_rate": 1.646863421489715e-06, + "loss": 0.8286, + "step": 24810 + }, + { + "epoch": 9.51, + "learning_rate": 1.6340871342787787e-06, + "loss": 0.8539, + "step": 24820 + }, + { + "epoch": 9.52, + "learning_rate": 1.6213108470678422e-06, + "loss": 0.8729, + "step": 24830 + }, + { + "epoch": 9.52, + "learning_rate": 1.6085345598569056e-06, + "loss": 0.8833, + "step": 24840 + }, + { + "epoch": 9.52, + "learning_rate": 1.595758272645969e-06, + "loss": 0.9273, + "step": 24850 + }, + { + "epoch": 9.53, + "learning_rate": 1.5829819854350326e-06, + "loss": 0.9133, + "step": 24860 + }, + { + "epoch": 9.53, + "learning_rate": 1.570205698224096e-06, + "loss": 0.9569, + "step": 24870 + }, + { + "epoch": 9.54, + "learning_rate": 1.5574294110131597e-06, + "loss": 0.8787, + "step": 24880 + }, + { + "epoch": 9.54, + "learning_rate": 1.5446531238022231e-06, + "loss": 0.8777, + "step": 24890 + }, + { + "epoch": 9.54, + "learning_rate": 1.5331544653123803e-06, + "loss": 0.9187, + "step": 24900 + }, + { + "epoch": 9.55, + "learning_rate": 1.5203781781014438e-06, + "loss": 0.9011, + "step": 24910 + }, + { + "epoch": 9.55, + "learning_rate": 1.5076018908905072e-06, + "loss": 0.984, + "step": 24920 + }, + { + "epoch": 9.56, + "learning_rate": 1.4948256036795708e-06, + "loss": 0.9167, + "step": 24930 + }, + { + "epoch": 9.56, + "learning_rate": 1.4820493164686342e-06, + "loss": 0.7452, + "step": 24940 + }, + { + "epoch": 9.56, + "learning_rate": 1.4692730292576977e-06, + "loss": 0.9521, + "step": 24950 + }, + { + "epoch": 9.57, + "learning_rate": 1.4564967420467613e-06, + "loss": 0.934, + "step": 24960 + }, + { + "epoch": 9.57, + "learning_rate": 1.4437204548358247e-06, + "loss": 0.9003, + "step": 24970 + }, + { + "epoch": 9.57, + "learning_rate": 1.4309441676248883e-06, + "loss": 0.8777, + "step": 24980 + }, + { + "epoch": 9.58, + "learning_rate": 1.4181678804139518e-06, + "loss": 0.9531, + "step": 24990 + }, + { + "epoch": 9.58, + "learning_rate": 1.4053915932030152e-06, + "loss": 0.9781, + "step": 25000 + }, + { + "epoch": 9.59, + "learning_rate": 1.3926153059920786e-06, + "loss": 0.8931, + "step": 25010 + }, + { + "epoch": 9.59, + "learning_rate": 1.3798390187811422e-06, + "loss": 0.8565, + "step": 25020 + }, + { + "epoch": 9.59, + "learning_rate": 1.3670627315702057e-06, + "loss": 0.9222, + "step": 25030 + }, + { + "epoch": 9.6, + "learning_rate": 1.3542864443592693e-06, + "loss": 0.8402, + "step": 25040 + }, + { + "epoch": 9.6, + "learning_rate": 1.3415101571483327e-06, + "loss": 0.8715, + "step": 25050 + }, + { + "epoch": 9.61, + "learning_rate": 1.3287338699373963e-06, + "loss": 0.852, + "step": 25060 + }, + { + "epoch": 9.61, + "learning_rate": 1.3159575827264598e-06, + "loss": 0.8933, + "step": 25070 + }, + { + "epoch": 9.61, + "learning_rate": 1.3031812955155232e-06, + "loss": 0.8523, + "step": 25080 + }, + { + "epoch": 9.62, + "learning_rate": 1.2904050083045868e-06, + "loss": 0.8297, + "step": 25090 + }, + { + "epoch": 9.62, + "learning_rate": 1.27762872109365e-06, + "loss": 0.9061, + "step": 25100 + }, + { + "epoch": 9.62, + "learning_rate": 1.2648524338827137e-06, + "loss": 0.858, + "step": 25110 + }, + { + "epoch": 9.63, + "learning_rate": 1.252076146671777e-06, + "loss": 0.8085, + "step": 25120 + }, + { + "epoch": 9.63, + "learning_rate": 1.2392998594608407e-06, + "loss": 0.8008, + "step": 25130 + }, + { + "epoch": 9.64, + "learning_rate": 1.2265235722499041e-06, + "loss": 0.9034, + "step": 25140 + }, + { + "epoch": 9.64, + "learning_rate": 1.2137472850389678e-06, + "loss": 0.9067, + "step": 25150 + }, + { + "epoch": 9.64, + "learning_rate": 1.2009709978280312e-06, + "loss": 0.9019, + "step": 25160 + }, + { + "epoch": 9.65, + "learning_rate": 1.1881947106170948e-06, + "loss": 0.9576, + "step": 25170 + }, + { + "epoch": 9.65, + "learning_rate": 1.1754184234061583e-06, + "loss": 0.9147, + "step": 25180 + }, + { + "epoch": 9.66, + "learning_rate": 1.1626421361952219e-06, + "loss": 0.9099, + "step": 25190 + }, + { + "epoch": 9.66, + "learning_rate": 1.149865848984285e-06, + "loss": 0.8634, + "step": 25200 + }, + { + "epoch": 9.66, + "learning_rate": 1.1370895617733487e-06, + "loss": 0.9054, + "step": 25210 + }, + { + "epoch": 9.67, + "learning_rate": 1.1243132745624121e-06, + "loss": 0.9334, + "step": 25220 + }, + { + "epoch": 9.67, + "learning_rate": 1.1115369873514756e-06, + "loss": 0.8752, + "step": 25230 + }, + { + "epoch": 9.67, + "learning_rate": 1.0987607001405392e-06, + "loss": 0.9368, + "step": 25240 + }, + { + "epoch": 9.68, + "learning_rate": 1.0859844129296026e-06, + "loss": 0.8216, + "step": 25250 + }, + { + "epoch": 9.68, + "learning_rate": 1.0732081257186663e-06, + "loss": 0.9001, + "step": 25260 + }, + { + "epoch": 9.69, + "learning_rate": 1.0604318385077297e-06, + "loss": 0.9092, + "step": 25270 + }, + { + "epoch": 9.69, + "learning_rate": 1.0476555512967933e-06, + "loss": 0.9125, + "step": 25280 + }, + { + "epoch": 9.69, + "learning_rate": 1.0348792640858565e-06, + "loss": 0.8771, + "step": 25290 + }, + { + "epoch": 9.7, + "learning_rate": 1.0221029768749202e-06, + "loss": 0.903, + "step": 25300 + }, + { + "epoch": 9.7, + "learning_rate": 1.0093266896639836e-06, + "loss": 0.7834, + "step": 25310 + }, + { + "epoch": 9.7, + "learning_rate": 9.965504024530472e-07, + "loss": 0.8227, + "step": 25320 + }, + { + "epoch": 9.71, + "learning_rate": 9.837741152421106e-07, + "loss": 0.8716, + "step": 25330 + }, + { + "epoch": 9.71, + "learning_rate": 9.709978280311743e-07, + "loss": 0.802, + "step": 25340 + }, + { + "epoch": 9.72, + "learning_rate": 9.582215408202377e-07, + "loss": 0.8235, + "step": 25350 + }, + { + "epoch": 9.72, + "learning_rate": 9.454452536093012e-07, + "loss": 0.9491, + "step": 25360 + }, + { + "epoch": 9.72, + "learning_rate": 9.326689663983646e-07, + "loss": 0.892, + "step": 25370 + }, + { + "epoch": 9.73, + "learning_rate": 9.198926791874282e-07, + "loss": 0.9325, + "step": 25380 + }, + { + "epoch": 9.73, + "learning_rate": 9.071163919764917e-07, + "loss": 0.9197, + "step": 25390 + }, + { + "epoch": 9.74, + "learning_rate": 8.943401047655552e-07, + "loss": 0.885, + "step": 25400 + }, + { + "epoch": 9.74, + "learning_rate": 8.815638175546186e-07, + "loss": 0.8702, + "step": 25410 + }, + { + "epoch": 9.74, + "learning_rate": 8.687875303436822e-07, + "loss": 0.8566, + "step": 25420 + }, + { + "epoch": 9.75, + "learning_rate": 8.560112431327457e-07, + "loss": 0.8361, + "step": 25430 + }, + { + "epoch": 9.75, + "learning_rate": 8.432349559218092e-07, + "loss": 0.8913, + "step": 25440 + }, + { + "epoch": 9.75, + "learning_rate": 8.304586687108726e-07, + "loss": 0.9078, + "step": 25450 + }, + { + "epoch": 9.76, + "learning_rate": 8.176823814999361e-07, + "loss": 0.9227, + "step": 25460 + }, + { + "epoch": 9.76, + "learning_rate": 8.049060942889996e-07, + "loss": 0.8385, + "step": 25470 + }, + { + "epoch": 9.77, + "learning_rate": 7.921298070780631e-07, + "loss": 0.8668, + "step": 25480 + }, + { + "epoch": 9.77, + "learning_rate": 7.793535198671266e-07, + "loss": 0.8811, + "step": 25490 + }, + { + "epoch": 9.77, + "learning_rate": 7.665772326561902e-07, + "loss": 0.9254, + "step": 25500 + }, + { + "epoch": 9.78, + "learning_rate": 7.538009454452536e-07, + "loss": 0.8265, + "step": 25510 + }, + { + "epoch": 9.78, + "learning_rate": 7.410246582343171e-07, + "loss": 0.9664, + "step": 25520 + }, + { + "epoch": 9.79, + "learning_rate": 7.282483710233806e-07, + "loss": 0.9129, + "step": 25530 + }, + { + "epoch": 9.79, + "learning_rate": 7.154720838124442e-07, + "loss": 0.923, + "step": 25540 + }, + { + "epoch": 9.79, + "learning_rate": 7.026957966015076e-07, + "loss": 0.8464, + "step": 25550 + }, + { + "epoch": 9.8, + "learning_rate": 6.899195093905711e-07, + "loss": 0.9125, + "step": 25560 + }, + { + "epoch": 9.8, + "learning_rate": 6.771432221796346e-07, + "loss": 0.8628, + "step": 25570 + }, + { + "epoch": 9.8, + "learning_rate": 6.643669349686982e-07, + "loss": 0.9041, + "step": 25580 + }, + { + "epoch": 9.81, + "learning_rate": 6.515906477577616e-07, + "loss": 0.7553, + "step": 25590 + }, + { + "epoch": 9.81, + "learning_rate": 6.38814360546825e-07, + "loss": 0.9134, + "step": 25600 + }, + { + "epoch": 9.82, + "learning_rate": 6.260380733358885e-07, + "loss": 0.8363, + "step": 25610 + }, + { + "epoch": 9.82, + "learning_rate": 6.132617861249521e-07, + "loss": 0.9235, + "step": 25620 + }, + { + "epoch": 9.82, + "learning_rate": 6.004854989140156e-07, + "loss": 0.8643, + "step": 25630 + }, + { + "epoch": 9.83, + "learning_rate": 5.877092117030791e-07, + "loss": 0.8314, + "step": 25640 + }, + { + "epoch": 9.83, + "learning_rate": 5.749329244921425e-07, + "loss": 0.8884, + "step": 25650 + }, + { + "epoch": 9.84, + "learning_rate": 5.621566372812061e-07, + "loss": 0.9008, + "step": 25660 + }, + { + "epoch": 9.84, + "learning_rate": 5.493803500702696e-07, + "loss": 0.836, + "step": 25670 + }, + { + "epoch": 9.84, + "learning_rate": 5.366040628593331e-07, + "loss": 0.9073, + "step": 25680 + }, + { + "epoch": 9.85, + "learning_rate": 5.238277756483967e-07, + "loss": 0.8563, + "step": 25690 + }, + { + "epoch": 9.85, + "learning_rate": 5.110514884374601e-07, + "loss": 0.8679, + "step": 25700 + }, + { + "epoch": 9.85, + "learning_rate": 4.982752012265236e-07, + "loss": 0.8408, + "step": 25710 + }, + { + "epoch": 9.86, + "learning_rate": 4.854989140155871e-07, + "loss": 0.8618, + "step": 25720 + }, + { + "epoch": 9.86, + "learning_rate": 4.727226268046506e-07, + "loss": 0.8408, + "step": 25730 + }, + { + "epoch": 9.87, + "learning_rate": 4.599463395937141e-07, + "loss": 0.9157, + "step": 25740 + }, + { + "epoch": 9.87, + "learning_rate": 4.471700523827776e-07, + "loss": 0.846, + "step": 25750 + }, + { + "epoch": 9.87, + "learning_rate": 4.343937651718411e-07, + "loss": 0.8534, + "step": 25760 + }, + { + "epoch": 9.88, + "learning_rate": 4.216174779609046e-07, + "loss": 0.8676, + "step": 25770 + }, + { + "epoch": 9.88, + "learning_rate": 4.0884119074996803e-07, + "loss": 0.8592, + "step": 25780 + }, + { + "epoch": 9.89, + "learning_rate": 3.9606490353903156e-07, + "loss": 0.8271, + "step": 25790 + }, + { + "epoch": 9.89, + "learning_rate": 3.832886163280951e-07, + "loss": 0.8538, + "step": 25800 + }, + { + "epoch": 9.89, + "learning_rate": 3.7051232911715856e-07, + "loss": 0.8675, + "step": 25810 + }, + { + "epoch": 9.9, + "learning_rate": 3.577360419062221e-07, + "loss": 0.9328, + "step": 25820 + }, + { + "epoch": 9.9, + "learning_rate": 3.4495975469528556e-07, + "loss": 0.8798, + "step": 25830 + }, + { + "epoch": 9.9, + "learning_rate": 3.321834674843491e-07, + "loss": 0.9539, + "step": 25840 + }, + { + "epoch": 9.91, + "learning_rate": 3.194071802734125e-07, + "loss": 0.833, + "step": 25850 + }, + { + "epoch": 9.91, + "learning_rate": 3.0663089306247604e-07, + "loss": 0.8749, + "step": 25860 + }, + { + "epoch": 9.92, + "learning_rate": 2.9385460585153956e-07, + "loss": 0.8933, + "step": 25870 + }, + { + "epoch": 9.92, + "learning_rate": 2.8107831864060304e-07, + "loss": 0.8271, + "step": 25880 + }, + { + "epoch": 9.92, + "learning_rate": 2.6830203142966656e-07, + "loss": 0.8959, + "step": 25890 + }, + { + "epoch": 9.93, + "learning_rate": 2.5552574421873004e-07, + "loss": 0.832, + "step": 25900 + }, + { + "epoch": 9.93, + "learning_rate": 2.4274945700779357e-07, + "loss": 0.8726, + "step": 25910 + }, + { + "epoch": 9.93, + "learning_rate": 2.2997316979685704e-07, + "loss": 0.9485, + "step": 25920 + }, + { + "epoch": 9.94, + "learning_rate": 2.1719688258592054e-07, + "loss": 0.8342, + "step": 25930 + }, + { + "epoch": 9.94, + "learning_rate": 2.0442059537498401e-07, + "loss": 0.9169, + "step": 25940 + }, + { + "epoch": 9.95, + "learning_rate": 1.9164430816404754e-07, + "loss": 0.7973, + "step": 25950 + }, + { + "epoch": 9.95, + "learning_rate": 1.7886802095311104e-07, + "loss": 0.8861, + "step": 25960 + }, + { + "epoch": 9.95, + "learning_rate": 1.6609173374217454e-07, + "loss": 0.8022, + "step": 25970 + }, + { + "epoch": 9.96, + "learning_rate": 1.5331544653123802e-07, + "loss": 0.776, + "step": 25980 + }, + { + "epoch": 9.96, + "learning_rate": 1.4053915932030152e-07, + "loss": 0.8827, + "step": 25990 + }, + { + "epoch": 9.97, + "learning_rate": 1.2776287210936502e-07, + "loss": 0.799, + "step": 26000 + }, + { + "epoch": 9.97, + "learning_rate": 1.1498658489842852e-07, + "loss": 0.8771, + "step": 26010 + }, + { + "epoch": 9.97, + "learning_rate": 1.0221029768749201e-07, + "loss": 0.848, + "step": 26020 + }, + { + "epoch": 9.98, + "learning_rate": 8.943401047655552e-08, + "loss": 0.9332, + "step": 26030 + }, + { + "epoch": 9.98, + "learning_rate": 7.665772326561901e-08, + "loss": 0.9762, + "step": 26040 + }, + { + "epoch": 9.98, + "learning_rate": 6.388143605468251e-08, + "loss": 0.8634, + "step": 26050 + }, + { + "epoch": 9.99, + "learning_rate": 5.1105148843746004e-08, + "loss": 0.7815, + "step": 26060 + }, + { + "epoch": 9.99, + "learning_rate": 3.8328861632809504e-08, + "loss": 0.9461, + "step": 26070 + }, + { + "epoch": 10.0, + "learning_rate": 2.5552574421873002e-08, + "loss": 0.8885, + "step": 26080 + }, + { + "epoch": 10.0, + "learning_rate": 1.2776287210936501e-08, + "loss": 0.8495, + "step": 26090 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.7662602112929114, + "eval_loss": 0.7000287771224976, + "eval_runtime": 299.7126, + "eval_samples_per_second": 139.277, + "eval_steps_per_second": 69.64, + "step": 26090 + }, + { + "epoch": 10.0, + "step": 26090, + "total_flos": 1.6966966119854244e+19, + "train_loss": 1.0529476437910437, + "train_runtime": 11696.8841, + "train_samples_per_second": 142.747, + "train_steps_per_second": 2.231 + } + ], + "max_steps": 26090, + "num_train_epochs": 10, + "total_flos": 1.6966966119854244e+19, + "trial_name": null, + "trial_params": null +}