{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 8899, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.243445692883896e-07, "loss": 2.2217, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.2734082397003748e-06, "loss": 2.098, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.02247191011236e-06, "loss": 1.8324, "step": 30 }, { "epoch": 0.0, "learning_rate": 2.771535580524345e-06, "loss": 1.7504, "step": 40 }, { "epoch": 0.01, "learning_rate": 3.52059925093633e-06, "loss": 1.5636, "step": 50 }, { "epoch": 0.01, "learning_rate": 4.269662921348315e-06, "loss": 1.6101, "step": 60 }, { "epoch": 0.01, "learning_rate": 5.0187265917603005e-06, "loss": 2.1579, "step": 70 }, { "epoch": 0.01, "learning_rate": 5.7677902621722845e-06, "loss": 1.4598, "step": 80 }, { "epoch": 0.01, "learning_rate": 6.51685393258427e-06, "loss": 1.6046, "step": 90 }, { "epoch": 0.01, "learning_rate": 7.265917602996255e-06, "loss": 1.1965, "step": 100 }, { "epoch": 0.01, "learning_rate": 8.01498127340824e-06, "loss": 1.3175, "step": 110 }, { "epoch": 0.01, "learning_rate": 8.764044943820226e-06, "loss": 1.3043, "step": 120 }, { "epoch": 0.01, "learning_rate": 9.51310861423221e-06, "loss": 1.0915, "step": 130 }, { "epoch": 0.02, "learning_rate": 1.0262172284644197e-05, "loss": 0.8881, "step": 140 }, { "epoch": 0.02, "learning_rate": 1.101123595505618e-05, "loss": 0.7195, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.1760299625468165e-05, "loss": 0.6414, "step": 160 }, { "epoch": 0.02, "learning_rate": 1.250936329588015e-05, "loss": 0.5803, "step": 170 }, { "epoch": 0.02, "learning_rate": 1.3258426966292135e-05, "loss": 0.446, "step": 180 }, { "epoch": 0.02, "learning_rate": 1.4007490636704121e-05, "loss": 0.4358, "step": 190 }, { "epoch": 0.02, "learning_rate": 1.4756554307116106e-05, "loss": 0.4409, "step": 200 }, { "epoch": 0.02, "learning_rate": 1.5505617977528093e-05, "loss": 0.3052, "step": 210 }, { "epoch": 0.02, "learning_rate": 1.6254681647940076e-05, "loss": 0.3371, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.7003745318352062e-05, "loss": 0.3135, "step": 230 }, { "epoch": 0.03, "learning_rate": 1.7752808988764045e-05, "loss": 0.3813, "step": 240 }, { "epoch": 0.03, "learning_rate": 1.8501872659176032e-05, "loss": 0.3232, "step": 250 }, { "epoch": 0.03, "learning_rate": 1.925093632958802e-05, "loss": 0.301, "step": 260 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.1726, "step": 270 }, { "epoch": 0.03, "learning_rate": 1.999993377127307e-05, "loss": 0.3646, "step": 280 }, { "epoch": 0.03, "learning_rate": 1.999973508596952e-05, "loss": 0.2276, "step": 290 }, { "epoch": 0.03, "learning_rate": 1.999940394672109e-05, "loss": 0.334, "step": 300 }, { "epoch": 0.03, "learning_rate": 1.9998940357913964e-05, "loss": 0.2052, "step": 310 }, { "epoch": 0.04, "learning_rate": 1.9998344325688727e-05, "loss": 0.2715, "step": 320 }, { "epoch": 0.04, "learning_rate": 1.999761585794026e-05, "loss": 0.2602, "step": 330 }, { "epoch": 0.04, "learning_rate": 1.9996754964317668e-05, "loss": 0.2067, "step": 340 }, { "epoch": 0.04, "learning_rate": 1.999576165622413e-05, "loss": 0.3932, "step": 350 }, { "epoch": 0.04, "learning_rate": 1.9994635946816748e-05, "loss": 0.2637, "step": 360 }, { "epoch": 0.04, "learning_rate": 1.999337785100638e-05, "loss": 0.2291, "step": 370 }, { "epoch": 0.04, "learning_rate": 1.9991987385457452e-05, "loss": 0.2584, "step": 380 }, { "epoch": 0.04, "learning_rate": 1.9990464568587708e-05, "loss": 0.2392, "step": 390 }, { "epoch": 0.04, "learning_rate": 1.9988809420567998e-05, "loss": 0.2079, "step": 400 }, { "epoch": 0.05, "learning_rate": 1.998702196332199e-05, "loss": 0.2343, "step": 410 }, { "epoch": 0.05, "learning_rate": 1.998510222052588e-05, "loss": 0.211, "step": 420 }, { "epoch": 0.05, "learning_rate": 1.9983050217608106e-05, "loss": 0.2283, "step": 430 }, { "epoch": 0.05, "learning_rate": 1.998086598174896e-05, "loss": 0.2756, "step": 440 }, { "epoch": 0.05, "learning_rate": 1.9978549541880295e-05, "loss": 0.2038, "step": 450 }, { "epoch": 0.05, "learning_rate": 1.9976100928685063e-05, "loss": 0.2509, "step": 460 }, { "epoch": 0.05, "learning_rate": 1.9973520174596983e-05, "loss": 0.2034, "step": 470 }, { "epoch": 0.05, "learning_rate": 1.9970807313800063e-05, "loss": 0.2689, "step": 480 }, { "epoch": 0.06, "learning_rate": 1.9967962382228166e-05, "loss": 0.1766, "step": 490 }, { "epoch": 0.06, "learning_rate": 1.996498541756453e-05, "loss": 0.213, "step": 500 }, { "epoch": 0.06, "learning_rate": 1.9961876459241274e-05, "loss": 0.2533, "step": 510 }, { "epoch": 0.06, "learning_rate": 1.995863554843887e-05, "loss": 0.222, "step": 520 }, { "epoch": 0.06, "learning_rate": 1.9955262728085592e-05, "loss": 0.2103, "step": 530 }, { "epoch": 0.06, "learning_rate": 1.9951758042856963e-05, "loss": 0.2198, "step": 540 }, { "epoch": 0.06, "learning_rate": 1.994812153917515e-05, "loss": 0.2222, "step": 550 }, { "epoch": 0.06, "learning_rate": 1.994435326520835e-05, "loss": 0.2416, "step": 560 }, { "epoch": 0.06, "learning_rate": 1.9940453270870174e-05, "loss": 0.2062, "step": 570 }, { "epoch": 0.07, "learning_rate": 1.9936421607818942e-05, "loss": 0.2477, "step": 580 }, { "epoch": 0.07, "learning_rate": 1.993225832945704e-05, "loss": 0.1833, "step": 590 }, { "epoch": 0.07, "learning_rate": 1.9927963490930195e-05, "loss": 0.2297, "step": 600 }, { "epoch": 0.07, "learning_rate": 1.9923537149126738e-05, "loss": 0.2025, "step": 610 }, { "epoch": 0.07, "learning_rate": 1.9918979362676875e-05, "loss": 0.2207, "step": 620 }, { "epoch": 0.07, "learning_rate": 1.9914290191951875e-05, "loss": 0.1873, "step": 630 }, { "epoch": 0.07, "learning_rate": 1.990946969906331e-05, "loss": 0.2602, "step": 640 }, { "epoch": 0.07, "learning_rate": 1.9904517947862193e-05, "loss": 0.2692, "step": 650 }, { "epoch": 0.07, "learning_rate": 1.989943500393816e-05, "loss": 0.1779, "step": 660 }, { "epoch": 0.08, "learning_rate": 1.9894220934618598e-05, "loss": 0.1973, "step": 670 }, { "epoch": 0.08, "learning_rate": 1.988887580896774e-05, "loss": 0.1873, "step": 680 }, { "epoch": 0.08, "learning_rate": 1.9883399697785756e-05, "loss": 0.2301, "step": 690 }, { "epoch": 0.08, "learning_rate": 1.9877792673607823e-05, "loss": 0.2207, "step": 700 }, { "epoch": 0.08, "learning_rate": 1.9872054810703155e-05, "loss": 0.2069, "step": 710 }, { "epoch": 0.08, "learning_rate": 1.986618618507402e-05, "loss": 0.2315, "step": 720 }, { "epoch": 0.08, "learning_rate": 1.9860186874454746e-05, "loss": 0.1969, "step": 730 }, { "epoch": 0.08, "learning_rate": 1.9854056958310667e-05, "loss": 0.248, "step": 740 }, { "epoch": 0.08, "learning_rate": 1.984779651783709e-05, "loss": 0.211, "step": 750 }, { "epoch": 0.09, "learning_rate": 1.9841405635958225e-05, "loss": 0.1988, "step": 760 }, { "epoch": 0.09, "learning_rate": 1.983488439732606e-05, "loss": 0.1728, "step": 770 }, { "epoch": 0.09, "learning_rate": 1.9828232888319263e-05, "loss": 0.2126, "step": 780 }, { "epoch": 0.09, "learning_rate": 1.9821451197042028e-05, "loss": 0.2348, "step": 790 }, { "epoch": 0.09, "learning_rate": 1.981453941332291e-05, "loss": 0.2369, "step": 800 }, { "epoch": 0.09, "learning_rate": 1.980749762871364e-05, "loss": 0.1675, "step": 810 }, { "epoch": 0.09, "learning_rate": 1.98003259364879e-05, "loss": 0.2121, "step": 820 }, { "epoch": 0.09, "learning_rate": 1.9793024431640096e-05, "loss": 0.2358, "step": 830 }, { "epoch": 0.09, "learning_rate": 1.9785593210884112e-05, "loss": 0.1853, "step": 840 }, { "epoch": 0.1, "learning_rate": 1.9778032372652e-05, "loss": 0.2123, "step": 850 }, { "epoch": 0.1, "learning_rate": 1.97703420170927e-05, "loss": 0.1991, "step": 860 }, { "epoch": 0.1, "learning_rate": 1.9762522246070697e-05, "loss": 0.1983, "step": 870 }, { "epoch": 0.1, "learning_rate": 1.9754573163164697e-05, "loss": 0.1852, "step": 880 }, { "epoch": 0.1, "learning_rate": 1.9746494873666226e-05, "loss": 0.1656, "step": 890 }, { "epoch": 0.1, "learning_rate": 1.9738287484578243e-05, "loss": 0.1611, "step": 900 }, { "epoch": 0.1, "learning_rate": 1.972995110461374e-05, "loss": 0.1771, "step": 910 }, { "epoch": 0.1, "learning_rate": 1.9721485844194282e-05, "loss": 0.2002, "step": 920 }, { "epoch": 0.1, "learning_rate": 1.9712891815448557e-05, "loss": 0.1828, "step": 930 }, { "epoch": 0.11, "learning_rate": 1.9704169132210874e-05, "loss": 0.182, "step": 940 }, { "epoch": 0.11, "learning_rate": 1.969531791001968e-05, "loss": 0.1969, "step": 950 }, { "epoch": 0.11, "learning_rate": 1.9686338266116006e-05, "loss": 0.1736, "step": 960 }, { "epoch": 0.11, "learning_rate": 1.9677230319441936e-05, "loss": 0.1496, "step": 970 }, { "epoch": 0.11, "learning_rate": 1.9667994190639007e-05, "loss": 0.2126, "step": 980 }, { "epoch": 0.11, "learning_rate": 1.965863000204663e-05, "loss": 0.2366, "step": 990 }, { "epoch": 0.11, "learning_rate": 1.9649137877700462e-05, "loss": 0.2226, "step": 1000 }, { "epoch": 0.11, "learning_rate": 1.9639517943330768e-05, "loss": 0.2237, "step": 1010 }, { "epoch": 0.11, "learning_rate": 1.962977032636075e-05, "loss": 0.2526, "step": 1020 }, { "epoch": 0.12, "learning_rate": 1.9619895155904855e-05, "loss": 0.2078, "step": 1030 }, { "epoch": 0.12, "learning_rate": 1.9609892562767082e-05, "loss": 0.1882, "step": 1040 }, { "epoch": 0.12, "learning_rate": 1.959976267943923e-05, "loss": 0.2113, "step": 1050 }, { "epoch": 0.12, "learning_rate": 1.9589505640099156e-05, "loss": 0.2133, "step": 1060 }, { "epoch": 0.12, "learning_rate": 1.957912158060899e-05, "loss": 0.2228, "step": 1070 }, { "epoch": 0.12, "learning_rate": 1.9568610638513343e-05, "loss": 0.1903, "step": 1080 }, { "epoch": 0.12, "learning_rate": 1.9557972953037476e-05, "loss": 0.1865, "step": 1090 }, { "epoch": 0.12, "learning_rate": 1.954720866508546e-05, "loss": 0.2001, "step": 1100 }, { "epoch": 0.12, "learning_rate": 1.9536317917238312e-05, "loss": 0.1557, "step": 1110 }, { "epoch": 0.13, "learning_rate": 1.952530085375211e-05, "loss": 0.2571, "step": 1120 }, { "epoch": 0.13, "learning_rate": 1.9514157620556072e-05, "loss": 0.1855, "step": 1130 }, { "epoch": 0.13, "learning_rate": 1.9502888365250622e-05, "loss": 0.2161, "step": 1140 }, { "epoch": 0.13, "learning_rate": 1.949149323710545e-05, "loss": 0.1728, "step": 1150 }, { "epoch": 0.13, "learning_rate": 1.9479972387057523e-05, "loss": 0.1924, "step": 1160 }, { "epoch": 0.13, "learning_rate": 1.9468325967709084e-05, "loss": 0.2001, "step": 1170 }, { "epoch": 0.13, "learning_rate": 1.9456554133325642e-05, "loss": 0.1947, "step": 1180 }, { "epoch": 0.13, "learning_rate": 1.944465703983392e-05, "loss": 0.1822, "step": 1190 }, { "epoch": 0.13, "learning_rate": 1.943263484481978e-05, "loss": 0.1425, "step": 1200 }, { "epoch": 0.14, "learning_rate": 1.9420487707526163e-05, "loss": 0.1903, "step": 1210 }, { "epoch": 0.14, "learning_rate": 1.9408215788850958e-05, "loss": 0.1772, "step": 1220 }, { "epoch": 0.14, "learning_rate": 1.939581925134487e-05, "loss": 0.164, "step": 1230 }, { "epoch": 0.14, "learning_rate": 1.938329825920928e-05, "loss": 0.1882, "step": 1240 }, { "epoch": 0.14, "learning_rate": 1.9370652978294065e-05, "loss": 0.2023, "step": 1250 }, { "epoch": 0.14, "learning_rate": 1.9357883576095395e-05, "loss": 0.1628, "step": 1260 }, { "epoch": 0.14, "learning_rate": 1.9344990221753518e-05, "loss": 0.1713, "step": 1270 }, { "epoch": 0.14, "learning_rate": 1.9331973086050524e-05, "loss": 0.2396, "step": 1280 }, { "epoch": 0.14, "learning_rate": 1.9318832341408078e-05, "loss": 0.1939, "step": 1290 }, { "epoch": 0.15, "learning_rate": 1.930556816188514e-05, "loss": 0.1432, "step": 1300 }, { "epoch": 0.15, "learning_rate": 1.9292180723175656e-05, "loss": 0.169, "step": 1310 }, { "epoch": 0.15, "learning_rate": 1.9278670202606222e-05, "loss": 0.1771, "step": 1320 }, { "epoch": 0.15, "learning_rate": 1.926503677913376e-05, "loss": 0.189, "step": 1330 }, { "epoch": 0.15, "learning_rate": 1.9251280633343125e-05, "loss": 0.1619, "step": 1340 }, { "epoch": 0.15, "learning_rate": 1.9237401947444725e-05, "loss": 0.2056, "step": 1350 }, { "epoch": 0.15, "learning_rate": 1.9223400905272093e-05, "loss": 0.1932, "step": 1360 }, { "epoch": 0.15, "learning_rate": 1.9209277692279475e-05, "loss": 0.1878, "step": 1370 }, { "epoch": 0.16, "learning_rate": 1.919503249553935e-05, "loss": 0.1561, "step": 1380 }, { "epoch": 0.16, "learning_rate": 1.918066550373997e-05, "loss": 0.1532, "step": 1390 }, { "epoch": 0.16, "learning_rate": 1.9166176907182845e-05, "loss": 0.205, "step": 1400 }, { "epoch": 0.16, "learning_rate": 1.915156689778024e-05, "loss": 0.1945, "step": 1410 }, { "epoch": 0.16, "learning_rate": 1.9136835669052624e-05, "loss": 0.1793, "step": 1420 }, { "epoch": 0.16, "learning_rate": 1.9121983416126095e-05, "loss": 0.1879, "step": 1430 }, { "epoch": 0.16, "learning_rate": 1.910701033572982e-05, "loss": 0.1717, "step": 1440 }, { "epoch": 0.16, "learning_rate": 1.90919166261934e-05, "loss": 0.1615, "step": 1450 }, { "epoch": 0.16, "learning_rate": 1.9076702487444275e-05, "loss": 0.1998, "step": 1460 }, { "epoch": 0.17, "learning_rate": 1.9061368121005053e-05, "loss": 0.1775, "step": 1470 }, { "epoch": 0.17, "learning_rate": 1.904591372999085e-05, "loss": 0.1824, "step": 1480 }, { "epoch": 0.17, "learning_rate": 1.9030339519106588e-05, "loss": 0.1975, "step": 1490 }, { "epoch": 0.17, "learning_rate": 1.9014645694644302e-05, "loss": 0.1729, "step": 1500 }, { "epoch": 0.17, "learning_rate": 1.8998832464480396e-05, "loss": 0.1931, "step": 1510 }, { "epoch": 0.17, "learning_rate": 1.8982900038072892e-05, "loss": 0.1496, "step": 1520 }, { "epoch": 0.17, "learning_rate": 1.8966848626458647e-05, "loss": 0.2168, "step": 1530 }, { "epoch": 0.17, "learning_rate": 1.895067844225058e-05, "loss": 0.1656, "step": 1540 }, { "epoch": 0.17, "learning_rate": 1.893438969963483e-05, "loss": 0.1559, "step": 1550 }, { "epoch": 0.18, "learning_rate": 1.8917982614367933e-05, "loss": 0.16, "step": 1560 }, { "epoch": 0.18, "learning_rate": 1.890145740377397e-05, "loss": 0.1801, "step": 1570 }, { "epoch": 0.18, "learning_rate": 1.8884814286741663e-05, "loss": 0.2013, "step": 1580 }, { "epoch": 0.18, "learning_rate": 1.8868053483721507e-05, "loss": 0.2228, "step": 1590 }, { "epoch": 0.18, "learning_rate": 1.8851175216722834e-05, "loss": 0.1901, "step": 1600 }, { "epoch": 0.18, "learning_rate": 1.8834179709310868e-05, "loss": 0.1944, "step": 1610 }, { "epoch": 0.18, "learning_rate": 1.8817067186603774e-05, "loss": 0.1649, "step": 1620 }, { "epoch": 0.18, "learning_rate": 1.8799837875269672e-05, "loss": 0.163, "step": 1630 }, { "epoch": 0.18, "learning_rate": 1.878249200352363e-05, "loss": 0.1703, "step": 1640 }, { "epoch": 0.19, "learning_rate": 1.8765029801124653e-05, "loss": 0.1878, "step": 1650 }, { "epoch": 0.19, "learning_rate": 1.8747451499372623e-05, "loss": 0.1737, "step": 1660 }, { "epoch": 0.19, "learning_rate": 1.872975733110525e-05, "loss": 0.1844, "step": 1670 }, { "epoch": 0.19, "learning_rate": 1.8711947530694986e-05, "loss": 0.1845, "step": 1680 }, { "epoch": 0.19, "learning_rate": 1.869402233404591e-05, "loss": 0.1778, "step": 1690 }, { "epoch": 0.19, "learning_rate": 1.867598197859061e-05, "loss": 0.2139, "step": 1700 }, { "epoch": 0.19, "learning_rate": 1.865782670328705e-05, "loss": 0.1975, "step": 1710 }, { "epoch": 0.19, "learning_rate": 1.8639556748615372e-05, "loss": 0.1851, "step": 1720 }, { "epoch": 0.19, "learning_rate": 1.862117235657475e-05, "loss": 0.1872, "step": 1730 }, { "epoch": 0.2, "learning_rate": 1.860267377068016e-05, "loss": 0.181, "step": 1740 }, { "epoch": 0.2, "learning_rate": 1.8584061235959165e-05, "loss": 0.2022, "step": 1750 }, { "epoch": 0.2, "learning_rate": 1.8565334998948648e-05, "loss": 0.1593, "step": 1760 }, { "epoch": 0.2, "learning_rate": 1.854649530769159e-05, "loss": 0.1803, "step": 1770 }, { "epoch": 0.2, "learning_rate": 1.852754241173374e-05, "loss": 0.1676, "step": 1780 }, { "epoch": 0.2, "learning_rate": 1.8508476562120332e-05, "loss": 0.19, "step": 1790 }, { "epoch": 0.2, "learning_rate": 1.848929801139275e-05, "loss": 0.1908, "step": 1800 }, { "epoch": 0.2, "learning_rate": 1.8470007013585206e-05, "loss": 0.1558, "step": 1810 }, { "epoch": 0.2, "learning_rate": 1.8450603824221334e-05, "loss": 0.1709, "step": 1820 }, { "epoch": 0.21, "learning_rate": 1.8431088700310846e-05, "loss": 0.1947, "step": 1830 }, { "epoch": 0.21, "learning_rate": 1.84114619003461e-05, "loss": 0.1753, "step": 1840 }, { "epoch": 0.21, "learning_rate": 1.83917236842987e-05, "loss": 0.1932, "step": 1850 }, { "epoch": 0.21, "learning_rate": 1.8371874313616017e-05, "loss": 0.1672, "step": 1860 }, { "epoch": 0.21, "learning_rate": 1.8351914051217773e-05, "loss": 0.1965, "step": 1870 }, { "epoch": 0.21, "learning_rate": 1.833184316149251e-05, "loss": 0.1906, "step": 1880 }, { "epoch": 0.21, "learning_rate": 1.8311661910294138e-05, "loss": 0.1888, "step": 1890 }, { "epoch": 0.21, "learning_rate": 1.829137056493836e-05, "loss": 0.1797, "step": 1900 }, { "epoch": 0.21, "learning_rate": 1.8270969394199173e-05, "loss": 0.201, "step": 1910 }, { "epoch": 0.22, "learning_rate": 1.825045866830529e-05, "loss": 0.1914, "step": 1920 }, { "epoch": 0.22, "learning_rate": 1.8229838658936566e-05, "loss": 0.1542, "step": 1930 }, { "epoch": 0.22, "learning_rate": 1.8209109639220393e-05, "loss": 0.1642, "step": 1940 }, { "epoch": 0.22, "learning_rate": 1.818827188372809e-05, "loss": 0.1685, "step": 1950 }, { "epoch": 0.22, "learning_rate": 1.816732566847126e-05, "loss": 0.158, "step": 1960 }, { "epoch": 0.22, "learning_rate": 1.8146271270898138e-05, "loss": 0.1672, "step": 1970 }, { "epoch": 0.22, "learning_rate": 1.8125108969889908e-05, "loss": 0.2137, "step": 1980 }, { "epoch": 0.22, "learning_rate": 1.810383904575703e-05, "loss": 0.1766, "step": 1990 }, { "epoch": 0.22, "learning_rate": 1.8082461780235497e-05, "loss": 0.196, "step": 2000 }, { "epoch": 0.23, "learning_rate": 1.8060977456483127e-05, "loss": 0.1545, "step": 2010 }, { "epoch": 0.23, "learning_rate": 1.80393863590758e-05, "loss": 0.1856, "step": 2020 }, { "epoch": 0.23, "learning_rate": 1.80176887740037e-05, "loss": 0.1539, "step": 2030 }, { "epoch": 0.23, "learning_rate": 1.7995884988667513e-05, "loss": 0.1888, "step": 2040 }, { "epoch": 0.23, "learning_rate": 1.797397529187462e-05, "loss": 0.1929, "step": 2050 }, { "epoch": 0.23, "learning_rate": 1.79519599738353e-05, "loss": 0.177, "step": 2060 }, { "epoch": 0.23, "learning_rate": 1.7929839326158838e-05, "loss": 0.2152, "step": 2070 }, { "epoch": 0.23, "learning_rate": 1.7907613641849705e-05, "loss": 0.1677, "step": 2080 }, { "epoch": 0.23, "learning_rate": 1.788528321530366e-05, "loss": 0.1619, "step": 2090 }, { "epoch": 0.24, "learning_rate": 1.7862848342303845e-05, "loss": 0.1745, "step": 2100 }, { "epoch": 0.24, "learning_rate": 1.7840309320016875e-05, "loss": 0.1672, "step": 2110 }, { "epoch": 0.24, "learning_rate": 1.7817666446988896e-05, "loss": 0.156, "step": 2120 }, { "epoch": 0.24, "learning_rate": 1.7794920023141648e-05, "loss": 0.1866, "step": 2130 }, { "epoch": 0.24, "learning_rate": 1.7772070349768466e-05, "loss": 0.2002, "step": 2140 }, { "epoch": 0.24, "learning_rate": 1.7749117729530306e-05, "loss": 0.1579, "step": 2150 }, { "epoch": 0.24, "learning_rate": 1.772606246645173e-05, "loss": 0.1876, "step": 2160 }, { "epoch": 0.24, "learning_rate": 1.770290486591688e-05, "loss": 0.2025, "step": 2170 }, { "epoch": 0.24, "learning_rate": 1.7679645234665442e-05, "loss": 0.1821, "step": 2180 }, { "epoch": 0.25, "learning_rate": 1.7656283880788565e-05, "loss": 0.1663, "step": 2190 }, { "epoch": 0.25, "learning_rate": 1.7632821113724797e-05, "loss": 0.1802, "step": 2200 }, { "epoch": 0.25, "learning_rate": 1.7609257244255977e-05, "loss": 0.1493, "step": 2210 }, { "epoch": 0.25, "learning_rate": 1.758559258450312e-05, "loss": 0.1777, "step": 2220 }, { "epoch": 0.25, "learning_rate": 1.756182744792228e-05, "loss": 0.144, "step": 2230 }, { "epoch": 0.25, "learning_rate": 1.7537962149300412e-05, "loss": 0.1593, "step": 2240 }, { "epoch": 0.25, "learning_rate": 1.7513997004751178e-05, "loss": 0.1722, "step": 2250 }, { "epoch": 0.25, "learning_rate": 1.7489932331710785e-05, "loss": 0.1551, "step": 2260 }, { "epoch": 0.26, "learning_rate": 1.7465768448933768e-05, "loss": 0.2002, "step": 2270 }, { "epoch": 0.26, "learning_rate": 1.7441505676488758e-05, "loss": 0.1825, "step": 2280 }, { "epoch": 0.26, "learning_rate": 1.7417144335754265e-05, "loss": 0.1645, "step": 2290 }, { "epoch": 0.26, "learning_rate": 1.7392684749414406e-05, "loss": 0.1378, "step": 2300 }, { "epoch": 0.26, "learning_rate": 1.7368127241454634e-05, "loss": 0.1288, "step": 2310 }, { "epoch": 0.26, "learning_rate": 1.7343472137157444e-05, "loss": 0.1748, "step": 2320 }, { "epoch": 0.26, "learning_rate": 1.7318719763098077e-05, "loss": 0.1554, "step": 2330 }, { "epoch": 0.26, "learning_rate": 1.729387044714017e-05, "loss": 0.1881, "step": 2340 }, { "epoch": 0.26, "learning_rate": 1.7268924518431437e-05, "loss": 0.189, "step": 2350 }, { "epoch": 0.27, "learning_rate": 1.7243882307399302e-05, "loss": 0.1824, "step": 2360 }, { "epoch": 0.27, "learning_rate": 1.721874414574651e-05, "loss": 0.1751, "step": 2370 }, { "epoch": 0.27, "learning_rate": 1.719351036644676e-05, "loss": 0.1774, "step": 2380 }, { "epoch": 0.27, "learning_rate": 1.7168181303740256e-05, "loss": 0.1658, "step": 2390 }, { "epoch": 0.27, "learning_rate": 1.7142757293129318e-05, "loss": 0.1587, "step": 2400 }, { "epoch": 0.27, "learning_rate": 1.711723867137392e-05, "loss": 0.148, "step": 2410 }, { "epoch": 0.27, "learning_rate": 1.709162577648722e-05, "loss": 0.1603, "step": 2420 }, { "epoch": 0.27, "learning_rate": 1.706591894773112e-05, "loss": 0.1715, "step": 2430 }, { "epoch": 0.27, "learning_rate": 1.7040118525611705e-05, "loss": 0.1763, "step": 2440 }, { "epoch": 0.28, "learning_rate": 1.7014224851874814e-05, "loss": 0.1659, "step": 2450 }, { "epoch": 0.28, "learning_rate": 1.698823826950145e-05, "loss": 0.1612, "step": 2460 }, { "epoch": 0.28, "learning_rate": 1.696215912270327e-05, "loss": 0.1431, "step": 2470 }, { "epoch": 0.28, "learning_rate": 1.693598775691801e-05, "loss": 0.1597, "step": 2480 }, { "epoch": 0.28, "learning_rate": 1.6909724518804916e-05, "loss": 0.185, "step": 2490 }, { "epoch": 0.28, "learning_rate": 1.6883369756240157e-05, "loss": 0.1276, "step": 2500 }, { "epoch": 0.28, "learning_rate": 1.6856923818312205e-05, "loss": 0.1796, "step": 2510 }, { "epoch": 0.28, "learning_rate": 1.683038705531722e-05, "loss": 0.1899, "step": 2520 }, { "epoch": 0.28, "learning_rate": 1.680375981875441e-05, "loss": 0.1937, "step": 2530 }, { "epoch": 0.29, "learning_rate": 1.6777042461321374e-05, "loss": 0.138, "step": 2540 }, { "epoch": 0.29, "learning_rate": 1.6750235336909415e-05, "loss": 0.1661, "step": 2550 }, { "epoch": 0.29, "learning_rate": 1.6723338800598886e-05, "loss": 0.1518, "step": 2560 }, { "epoch": 0.29, "learning_rate": 1.669635320865446e-05, "loss": 0.1817, "step": 2570 }, { "epoch": 0.29, "learning_rate": 1.6669278918520413e-05, "loss": 0.1413, "step": 2580 }, { "epoch": 0.29, "learning_rate": 1.66421162888159e-05, "loss": 0.1796, "step": 2590 }, { "epoch": 0.29, "learning_rate": 1.6614865679330195e-05, "loss": 0.1863, "step": 2600 }, { "epoch": 0.29, "learning_rate": 1.658752745101794e-05, "loss": 0.1749, "step": 2610 }, { "epoch": 0.29, "learning_rate": 1.656010196599434e-05, "loss": 0.1853, "step": 2620 }, { "epoch": 0.3, "learning_rate": 1.653258958753039e-05, "loss": 0.139, "step": 2630 }, { "epoch": 0.3, "learning_rate": 1.6504990680048047e-05, "loss": 0.2061, "step": 2640 }, { "epoch": 0.3, "learning_rate": 1.6477305609115415e-05, "loss": 0.1795, "step": 2650 }, { "epoch": 0.3, "learning_rate": 1.6449534741441893e-05, "loss": 0.1514, "step": 2660 }, { "epoch": 0.3, "learning_rate": 1.6421678444873327e-05, "loss": 0.1785, "step": 2670 }, { "epoch": 0.3, "learning_rate": 1.6393737088387126e-05, "loss": 0.1751, "step": 2680 }, { "epoch": 0.3, "learning_rate": 1.6365711042087385e-05, "loss": 0.1904, "step": 2690 }, { "epoch": 0.3, "learning_rate": 1.6337600677199973e-05, "loss": 0.173, "step": 2700 }, { "epoch": 0.3, "learning_rate": 1.6309406366067633e-05, "loss": 0.1479, "step": 2710 }, { "epoch": 0.31, "learning_rate": 1.6281128482145027e-05, "loss": 0.1568, "step": 2720 }, { "epoch": 0.31, "learning_rate": 1.6252767399993807e-05, "loss": 0.1535, "step": 2730 }, { "epoch": 0.31, "learning_rate": 1.6224323495277646e-05, "loss": 0.1966, "step": 2740 }, { "epoch": 0.31, "learning_rate": 1.619579714475726e-05, "loss": 0.1676, "step": 2750 }, { "epoch": 0.31, "learning_rate": 1.6167188726285433e-05, "loss": 0.1514, "step": 2760 }, { "epoch": 0.31, "learning_rate": 1.6138498618801982e-05, "loss": 0.1348, "step": 2770 }, { "epoch": 0.31, "learning_rate": 1.6109727202328778e-05, "loss": 0.1837, "step": 2780 }, { "epoch": 0.31, "learning_rate": 1.6080874857964666e-05, "loss": 0.174, "step": 2790 }, { "epoch": 0.31, "learning_rate": 1.605194196788046e-05, "loss": 0.1558, "step": 2800 }, { "epoch": 0.32, "learning_rate": 1.602292891531385e-05, "loss": 0.1734, "step": 2810 }, { "epoch": 0.32, "learning_rate": 1.599383608456435e-05, "loss": 0.1728, "step": 2820 }, { "epoch": 0.32, "learning_rate": 1.5964663860988186e-05, "loss": 0.1475, "step": 2830 }, { "epoch": 0.32, "learning_rate": 1.59354126309932e-05, "loss": 0.1707, "step": 2840 }, { "epoch": 0.32, "learning_rate": 1.5906082782033744e-05, "loss": 0.1492, "step": 2850 }, { "epoch": 0.32, "learning_rate": 1.5876674702605524e-05, "loss": 0.1918, "step": 2860 }, { "epoch": 0.32, "learning_rate": 1.5847188782240473e-05, "loss": 0.1561, "step": 2870 }, { "epoch": 0.32, "learning_rate": 1.5817625411501583e-05, "loss": 0.161, "step": 2880 }, { "epoch": 0.32, "learning_rate": 1.5787984981977745e-05, "loss": 0.1497, "step": 2890 }, { "epoch": 0.33, "learning_rate": 1.5758267886278533e-05, "loss": 0.1905, "step": 2900 }, { "epoch": 0.33, "learning_rate": 1.572847451802903e-05, "loss": 0.135, "step": 2910 }, { "epoch": 0.33, "learning_rate": 1.5698605271864606e-05, "loss": 0.1885, "step": 2920 }, { "epoch": 0.33, "learning_rate": 1.56686605434257e-05, "loss": 0.1507, "step": 2930 }, { "epoch": 0.33, "learning_rate": 1.5638640729352548e-05, "loss": 0.182, "step": 2940 }, { "epoch": 0.33, "learning_rate": 1.5608546227279967e-05, "loss": 0.1501, "step": 2950 }, { "epoch": 0.33, "learning_rate": 1.557837743583208e-05, "loss": 0.1764, "step": 2960 }, { "epoch": 0.33, "learning_rate": 1.5548134754616998e-05, "loss": 0.1574, "step": 2970 }, { "epoch": 0.33, "learning_rate": 1.551781858422159e-05, "loss": 0.1835, "step": 2980 }, { "epoch": 0.34, "learning_rate": 1.5487429326206126e-05, "loss": 0.1904, "step": 2990 }, { "epoch": 0.34, "learning_rate": 1.5456967383098983e-05, "loss": 0.1604, "step": 3000 }, { "epoch": 0.34, "learning_rate": 1.54264331583913e-05, "loss": 0.1509, "step": 3010 }, { "epoch": 0.34, "learning_rate": 1.5395827056531643e-05, "loss": 0.1535, "step": 3020 }, { "epoch": 0.34, "learning_rate": 1.5365149482920646e-05, "loss": 0.1494, "step": 3030 }, { "epoch": 0.34, "learning_rate": 1.533440084390564e-05, "loss": 0.14, "step": 3040 }, { "epoch": 0.34, "learning_rate": 1.5303581546775263e-05, "loss": 0.1741, "step": 3050 }, { "epoch": 0.34, "learning_rate": 1.5272691999754084e-05, "loss": 0.2048, "step": 3060 }, { "epoch": 0.34, "learning_rate": 1.5241732611997174e-05, "loss": 0.1727, "step": 3070 }, { "epoch": 0.35, "learning_rate": 1.52107037935847e-05, "loss": 0.1552, "step": 3080 }, { "epoch": 0.35, "learning_rate": 1.517960595551649e-05, "loss": 0.1693, "step": 3090 }, { "epoch": 0.35, "learning_rate": 1.5148439509706596e-05, "loss": 0.1631, "step": 3100 }, { "epoch": 0.35, "learning_rate": 1.5117204868977815e-05, "loss": 0.1547, "step": 3110 }, { "epoch": 0.35, "learning_rate": 1.5085902447056249e-05, "loss": 0.1427, "step": 3120 }, { "epoch": 0.35, "learning_rate": 1.505453265856581e-05, "loss": 0.1705, "step": 3130 }, { "epoch": 0.35, "learning_rate": 1.5023095919022728e-05, "loss": 0.1711, "step": 3140 }, { "epoch": 0.35, "learning_rate": 1.499159264483005e-05, "loss": 0.151, "step": 3150 }, { "epoch": 0.36, "learning_rate": 1.4960023253272125e-05, "loss": 0.1723, "step": 3160 }, { "epoch": 0.36, "learning_rate": 1.4928388162509078e-05, "loss": 0.175, "step": 3170 }, { "epoch": 0.36, "learning_rate": 1.489668779157126e-05, "loss": 0.15, "step": 3180 }, { "epoch": 0.36, "learning_rate": 1.4864922560353722e-05, "loss": 0.1777, "step": 3190 }, { "epoch": 0.36, "learning_rate": 1.4833092889610624e-05, "loss": 0.1419, "step": 3200 }, { "epoch": 0.36, "learning_rate": 1.4801199200949678e-05, "loss": 0.1866, "step": 3210 }, { "epoch": 0.36, "learning_rate": 1.4769241916826571e-05, "loss": 0.1856, "step": 3220 }, { "epoch": 0.36, "learning_rate": 1.4737221460539344e-05, "loss": 0.198, "step": 3230 }, { "epoch": 0.36, "learning_rate": 1.4705138256222813e-05, "loss": 0.1478, "step": 3240 }, { "epoch": 0.37, "learning_rate": 1.467299272884293e-05, "loss": 0.1693, "step": 3250 }, { "epoch": 0.37, "learning_rate": 1.4640785304191169e-05, "loss": 0.1747, "step": 3260 }, { "epoch": 0.37, "learning_rate": 1.4608516408878875e-05, "loss": 0.1747, "step": 3270 }, { "epoch": 0.37, "learning_rate": 1.457618647033162e-05, "loss": 0.1556, "step": 3280 }, { "epoch": 0.37, "learning_rate": 1.4543795916783536e-05, "loss": 0.1375, "step": 3290 }, { "epoch": 0.37, "learning_rate": 1.451134517727165e-05, "loss": 0.1662, "step": 3300 }, { "epoch": 0.37, "learning_rate": 1.4478834681630199e-05, "loss": 0.1504, "step": 3310 }, { "epoch": 0.37, "learning_rate": 1.4446264860484924e-05, "loss": 0.1676, "step": 3320 }, { "epoch": 0.37, "learning_rate": 1.4413636145247386e-05, "loss": 0.177, "step": 3330 }, { "epoch": 0.38, "learning_rate": 1.438094896810924e-05, "loss": 0.1665, "step": 3340 }, { "epoch": 0.38, "learning_rate": 1.434820376203651e-05, "loss": 0.1875, "step": 3350 }, { "epoch": 0.38, "learning_rate": 1.4315400960763861e-05, "loss": 0.1765, "step": 3360 }, { "epoch": 0.38, "learning_rate": 1.4282540998788846e-05, "loss": 0.1535, "step": 3370 }, { "epoch": 0.38, "learning_rate": 1.4249624311366151e-05, "loss": 0.1577, "step": 3380 }, { "epoch": 0.38, "learning_rate": 1.421665133450184e-05, "loss": 0.1639, "step": 3390 }, { "epoch": 0.38, "learning_rate": 1.4183622504947571e-05, "loss": 0.1859, "step": 3400 }, { "epoch": 0.38, "learning_rate": 1.4150538260194806e-05, "loss": 0.1699, "step": 3410 }, { "epoch": 0.38, "learning_rate": 1.411739903846903e-05, "loss": 0.1565, "step": 3420 }, { "epoch": 0.39, "learning_rate": 1.4084205278723937e-05, "loss": 0.1661, "step": 3430 }, { "epoch": 0.39, "learning_rate": 1.4050957420635615e-05, "loss": 0.1627, "step": 3440 }, { "epoch": 0.39, "learning_rate": 1.4017655904596727e-05, "loss": 0.1655, "step": 3450 }, { "epoch": 0.39, "learning_rate": 1.3984301171710677e-05, "loss": 0.1704, "step": 3460 }, { "epoch": 0.39, "learning_rate": 1.3950893663785765e-05, "loss": 0.1622, "step": 3470 }, { "epoch": 0.39, "learning_rate": 1.391743382332933e-05, "loss": 0.1543, "step": 3480 }, { "epoch": 0.39, "learning_rate": 1.3883922093541903e-05, "loss": 0.1531, "step": 3490 }, { "epoch": 0.39, "learning_rate": 1.385035891831133e-05, "loss": 0.1542, "step": 3500 }, { "epoch": 0.39, "learning_rate": 1.3816744742206868e-05, "loss": 0.1538, "step": 3510 }, { "epoch": 0.4, "learning_rate": 1.3783080010473351e-05, "loss": 0.1758, "step": 3520 }, { "epoch": 0.4, "learning_rate": 1.374936516902524e-05, "loss": 0.148, "step": 3530 }, { "epoch": 0.4, "learning_rate": 1.3715600664440738e-05, "loss": 0.1782, "step": 3540 }, { "epoch": 0.4, "learning_rate": 1.3681786943955876e-05, "loss": 0.1672, "step": 3550 }, { "epoch": 0.4, "learning_rate": 1.3647924455458588e-05, "loss": 0.1565, "step": 3560 }, { "epoch": 0.4, "learning_rate": 1.3614013647482774e-05, "loss": 0.1725, "step": 3570 }, { "epoch": 0.4, "learning_rate": 1.3580054969202362e-05, "loss": 0.1471, "step": 3580 }, { "epoch": 0.4, "learning_rate": 1.3546048870425356e-05, "loss": 0.1697, "step": 3590 }, { "epoch": 0.4, "learning_rate": 1.3511995801587886e-05, "loss": 0.1775, "step": 3600 }, { "epoch": 0.41, "learning_rate": 1.3477896213748232e-05, "loss": 0.1491, "step": 3610 }, { "epoch": 0.41, "learning_rate": 1.3443750558580847e-05, "loss": 0.1715, "step": 3620 }, { "epoch": 0.41, "learning_rate": 1.340955928837039e-05, "loss": 0.1753, "step": 3630 }, { "epoch": 0.41, "learning_rate": 1.3375322856005719e-05, "loss": 0.1709, "step": 3640 }, { "epoch": 0.41, "learning_rate": 1.3341041714973901e-05, "loss": 0.1866, "step": 3650 }, { "epoch": 0.41, "learning_rate": 1.3306716319354197e-05, "loss": 0.1456, "step": 3660 }, { "epoch": 0.41, "learning_rate": 1.3272347123812063e-05, "loss": 0.2008, "step": 3670 }, { "epoch": 0.41, "learning_rate": 1.3237934583593112e-05, "loss": 0.1523, "step": 3680 }, { "epoch": 0.41, "learning_rate": 1.320347915451709e-05, "loss": 0.1425, "step": 3690 }, { "epoch": 0.42, "learning_rate": 1.3168981292971832e-05, "loss": 0.1609, "step": 3700 }, { "epoch": 0.42, "learning_rate": 1.3134441455907237e-05, "loss": 0.1571, "step": 3710 }, { "epoch": 0.42, "learning_rate": 1.3099860100829185e-05, "loss": 0.1712, "step": 3720 }, { "epoch": 0.42, "learning_rate": 1.3065237685793503e-05, "loss": 0.1579, "step": 3730 }, { "epoch": 0.42, "learning_rate": 1.303057466939989e-05, "loss": 0.1641, "step": 3740 }, { "epoch": 0.42, "learning_rate": 1.2995871510785829e-05, "loss": 0.1399, "step": 3750 }, { "epoch": 0.42, "learning_rate": 1.2961128669620528e-05, "loss": 0.161, "step": 3760 }, { "epoch": 0.42, "learning_rate": 1.2926346606098807e-05, "loss": 0.2, "step": 3770 }, { "epoch": 0.42, "learning_rate": 1.2891525780935035e-05, "loss": 0.1434, "step": 3780 }, { "epoch": 0.43, "learning_rate": 1.2856666655356988e-05, "loss": 0.1918, "step": 3790 }, { "epoch": 0.43, "learning_rate": 1.282176969109977e-05, "loss": 0.1308, "step": 3800 }, { "epoch": 0.43, "learning_rate": 1.2786835350399682e-05, "loss": 0.164, "step": 3810 }, { "epoch": 0.43, "learning_rate": 1.2751864095988112e-05, "loss": 0.1556, "step": 3820 }, { "epoch": 0.43, "learning_rate": 1.2716856391085384e-05, "loss": 0.1826, "step": 3830 }, { "epoch": 0.43, "learning_rate": 1.2681812699394653e-05, "loss": 0.154, "step": 3840 }, { "epoch": 0.43, "learning_rate": 1.2646733485095727e-05, "loss": 0.1442, "step": 3850 }, { "epoch": 0.43, "learning_rate": 1.2611619212838954e-05, "loss": 0.1676, "step": 3860 }, { "epoch": 0.43, "learning_rate": 1.2576470347739043e-05, "loss": 0.1775, "step": 3870 }, { "epoch": 0.44, "learning_rate": 1.2541287355368908e-05, "loss": 0.1905, "step": 3880 }, { "epoch": 0.44, "learning_rate": 1.250607070175351e-05, "loss": 0.1568, "step": 3890 }, { "epoch": 0.44, "learning_rate": 1.2470820853363674e-05, "loss": 0.1633, "step": 3900 }, { "epoch": 0.44, "learning_rate": 1.2435538277109919e-05, "loss": 0.1393, "step": 3910 }, { "epoch": 0.44, "learning_rate": 1.240022344033627e-05, "loss": 0.1574, "step": 3920 }, { "epoch": 0.44, "learning_rate": 1.2364876810814059e-05, "loss": 0.1361, "step": 3930 }, { "epoch": 0.44, "learning_rate": 1.2329498856735739e-05, "loss": 0.1568, "step": 3940 }, { "epoch": 0.44, "learning_rate": 1.2294090046708684e-05, "loss": 0.1651, "step": 3950 }, { "epoch": 0.44, "learning_rate": 1.225865084974898e-05, "loss": 0.1414, "step": 3960 }, { "epoch": 0.45, "learning_rate": 1.2223181735275203e-05, "loss": 0.1796, "step": 3970 }, { "epoch": 0.45, "learning_rate": 1.2187683173102212e-05, "loss": 0.1793, "step": 3980 }, { "epoch": 0.45, "learning_rate": 1.2152155633434922e-05, "loss": 0.1746, "step": 3990 }, { "epoch": 0.45, "learning_rate": 1.2116599586862079e-05, "loss": 0.1613, "step": 4000 }, { "epoch": 0.45, "learning_rate": 1.2081015504350025e-05, "loss": 0.1644, "step": 4010 }, { "epoch": 0.45, "learning_rate": 1.204540385723645e-05, "loss": 0.1629, "step": 4020 }, { "epoch": 0.45, "learning_rate": 1.2009765117224177e-05, "loss": 0.1977, "step": 4030 }, { "epoch": 0.45, "learning_rate": 1.1974099756374874e-05, "loss": 0.1717, "step": 4040 }, { "epoch": 0.46, "learning_rate": 1.1938408247102825e-05, "loss": 0.1414, "step": 4050 }, { "epoch": 0.46, "learning_rate": 1.1902691062168684e-05, "loss": 0.1508, "step": 4060 }, { "epoch": 0.46, "learning_rate": 1.1866948674673182e-05, "loss": 0.1709, "step": 4070 }, { "epoch": 0.46, "learning_rate": 1.1831181558050889e-05, "loss": 0.1599, "step": 4080 }, { "epoch": 0.46, "learning_rate": 1.1795390186063917e-05, "loss": 0.1653, "step": 4090 }, { "epoch": 0.46, "learning_rate": 1.1759575032795674e-05, "loss": 0.1923, "step": 4100 }, { "epoch": 0.46, "learning_rate": 1.172373657264456e-05, "loss": 0.1584, "step": 4110 }, { "epoch": 0.46, "learning_rate": 1.1687875280317689e-05, "loss": 0.1659, "step": 4120 }, { "epoch": 0.46, "learning_rate": 1.1651991630824608e-05, "loss": 0.1582, "step": 4130 }, { "epoch": 0.47, "learning_rate": 1.161608609947101e-05, "loss": 0.1412, "step": 4140 }, { "epoch": 0.47, "learning_rate": 1.1580159161852413e-05, "loss": 0.1566, "step": 4150 }, { "epoch": 0.47, "learning_rate": 1.1544211293847886e-05, "loss": 0.1572, "step": 4160 }, { "epoch": 0.47, "learning_rate": 1.1508242971613741e-05, "loss": 0.1667, "step": 4170 }, { "epoch": 0.47, "learning_rate": 1.147225467157721e-05, "loss": 0.1381, "step": 4180 }, { "epoch": 0.47, "learning_rate": 1.1436246870430157e-05, "loss": 0.15, "step": 4190 }, { "epoch": 0.47, "learning_rate": 1.1400220045122746e-05, "loss": 0.1496, "step": 4200 }, { "epoch": 0.47, "learning_rate": 1.1364174672857131e-05, "loss": 0.1633, "step": 4210 }, { "epoch": 0.47, "learning_rate": 1.132811123108114e-05, "loss": 0.1867, "step": 4220 }, { "epoch": 0.48, "learning_rate": 1.1292030197481935e-05, "loss": 0.1491, "step": 4230 }, { "epoch": 0.48, "learning_rate": 1.12559320499797e-05, "loss": 0.1597, "step": 4240 }, { "epoch": 0.48, "learning_rate": 1.1219817266721314e-05, "loss": 0.1655, "step": 4250 }, { "epoch": 0.48, "learning_rate": 1.118368632607399e-05, "loss": 0.1455, "step": 4260 }, { "epoch": 0.48, "learning_rate": 1.1147539706618976e-05, "loss": 0.185, "step": 4270 }, { "epoch": 0.48, "learning_rate": 1.1111377887145186e-05, "loss": 0.1555, "step": 4280 }, { "epoch": 0.48, "learning_rate": 1.1075201346642875e-05, "loss": 0.1695, "step": 4290 }, { "epoch": 0.48, "learning_rate": 1.1039010564297288e-05, "loss": 0.1815, "step": 4300 }, { "epoch": 0.48, "learning_rate": 1.100280601948231e-05, "loss": 0.1552, "step": 4310 }, { "epoch": 0.49, "learning_rate": 1.0966588191754129e-05, "loss": 0.1731, "step": 4320 }, { "epoch": 0.49, "learning_rate": 1.0930357560844862e-05, "loss": 0.1748, "step": 4330 }, { "epoch": 0.49, "learning_rate": 1.089411460665623e-05, "loss": 0.1517, "step": 4340 }, { "epoch": 0.49, "learning_rate": 1.0857859809253168e-05, "loss": 0.1834, "step": 4350 }, { "epoch": 0.49, "learning_rate": 1.08215936488575e-05, "loss": 0.1656, "step": 4360 }, { "epoch": 0.49, "learning_rate": 1.0785316605841544e-05, "loss": 0.171, "step": 4370 }, { "epoch": 0.49, "learning_rate": 1.0749029160721782e-05, "loss": 0.1728, "step": 4380 }, { "epoch": 0.49, "learning_rate": 1.0712731794152468e-05, "loss": 0.1575, "step": 4390 }, { "epoch": 0.49, "learning_rate": 1.0676424986919282e-05, "loss": 0.1562, "step": 4400 }, { "epoch": 0.5, "learning_rate": 1.0640109219932946e-05, "loss": 0.1281, "step": 4410 }, { "epoch": 0.5, "learning_rate": 1.0603784974222862e-05, "loss": 0.1702, "step": 4420 }, { "epoch": 0.5, "learning_rate": 1.0567452730930743e-05, "loss": 0.1576, "step": 4430 }, { "epoch": 0.5, "learning_rate": 1.053111297130423e-05, "loss": 0.1551, "step": 4440 }, { "epoch": 0.5, "learning_rate": 1.0494766176690526e-05, "loss": 0.1451, "step": 4450 }, { "epoch": 0.5, "learning_rate": 1.045841282853002e-05, "loss": 0.1577, "step": 4460 }, { "epoch": 0.5, "learning_rate": 1.0422053408349908e-05, "loss": 0.1581, "step": 4470 }, { "epoch": 0.5, "learning_rate": 1.0385688397757809e-05, "loss": 0.154, "step": 4480 }, { "epoch": 0.5, "learning_rate": 1.0349318278435392e-05, "loss": 0.1726, "step": 4490 }, { "epoch": 0.51, "learning_rate": 1.0312943532132003e-05, "loss": 0.1673, "step": 4500 }, { "epoch": 0.51, "learning_rate": 1.0276564640658265e-05, "loss": 0.1842, "step": 4510 }, { "epoch": 0.51, "learning_rate": 1.0240182085879713e-05, "loss": 0.1473, "step": 4520 }, { "epoch": 0.51, "learning_rate": 1.0203796349710406e-05, "loss": 0.1817, "step": 4530 }, { "epoch": 0.51, "learning_rate": 1.0167407914106541e-05, "loss": 0.1414, "step": 4540 }, { "epoch": 0.51, "learning_rate": 1.0131017261060072e-05, "loss": 0.1928, "step": 4550 }, { "epoch": 0.51, "learning_rate": 1.0094624872592318e-05, "loss": 0.1595, "step": 4560 }, { "epoch": 0.51, "learning_rate": 1.0058231230747597e-05, "loss": 0.1352, "step": 4570 }, { "epoch": 0.51, "learning_rate": 1.0021836817586819e-05, "loss": 0.13, "step": 4580 }, { "epoch": 0.52, "learning_rate": 9.985442115181117e-06, "loss": 0.1528, "step": 4590 }, { "epoch": 0.52, "learning_rate": 9.949047605605446e-06, "loss": 0.1362, "step": 4600 }, { "epoch": 0.52, "learning_rate": 9.91265377093222e-06, "loss": 0.1558, "step": 4610 }, { "epoch": 0.52, "learning_rate": 9.87990029519365e-06, "loss": 0.1523, "step": 4620 }, { "epoch": 0.52, "learning_rate": 9.84714810801154e-06, "loss": 0.1362, "step": 4630 }, { "epoch": 0.52, "learning_rate": 9.810758727589814e-06, "loss": 0.1536, "step": 4640 }, { "epoch": 0.52, "learning_rate": 9.774371853809793e-06, "loss": 0.1441, "step": 4650 }, { "epoch": 0.52, "learning_rate": 9.73798796864275e-06, "loss": 0.1501, "step": 4660 }, { "epoch": 0.52, "learning_rate": 9.701607554020364e-06, "loss": 0.1641, "step": 4670 }, { "epoch": 0.53, "learning_rate": 9.66523109182834e-06, "loss": 0.1471, "step": 4680 }, { "epoch": 0.53, "learning_rate": 9.628859063900038e-06, "loss": 0.1476, "step": 4690 }, { "epoch": 0.53, "learning_rate": 9.592491952010081e-06, "loss": 0.1355, "step": 4700 }, { "epoch": 0.53, "learning_rate": 9.556130237867967e-06, "loss": 0.1535, "step": 4710 }, { "epoch": 0.53, "learning_rate": 9.519774403111711e-06, "loss": 0.156, "step": 4720 }, { "epoch": 0.53, "learning_rate": 9.483424929301436e-06, "loss": 0.1646, "step": 4730 }, { "epoch": 0.53, "learning_rate": 9.44708229791302e-06, "loss": 0.1458, "step": 4740 }, { "epoch": 0.53, "learning_rate": 9.41074699033171e-06, "loss": 0.1626, "step": 4750 }, { "epoch": 0.53, "learning_rate": 9.374419487845729e-06, "loss": 0.1597, "step": 4760 }, { "epoch": 0.54, "learning_rate": 9.338100271639932e-06, "loss": 0.1531, "step": 4770 }, { "epoch": 0.54, "learning_rate": 9.301789822789412e-06, "loss": 0.1566, "step": 4780 }, { "epoch": 0.54, "learning_rate": 9.265488622253122e-06, "loss": 0.1456, "step": 4790 }, { "epoch": 0.54, "learning_rate": 9.229197150867525e-06, "loss": 0.1549, "step": 4800 }, { "epoch": 0.54, "learning_rate": 9.192915889340214e-06, "loss": 0.1775, "step": 4810 }, { "epoch": 0.54, "learning_rate": 9.156645318243534e-06, "loss": 0.1686, "step": 4820 }, { "epoch": 0.54, "learning_rate": 9.120385918008244e-06, "loss": 0.159, "step": 4830 }, { "epoch": 0.54, "learning_rate": 9.084138168917117e-06, "loss": 0.1473, "step": 4840 }, { "epoch": 0.55, "learning_rate": 9.047902551098618e-06, "loss": 0.185, "step": 4850 }, { "epoch": 0.55, "learning_rate": 9.011679544520508e-06, "loss": 0.1486, "step": 4860 }, { "epoch": 0.55, "learning_rate": 8.975469628983511e-06, "loss": 0.1767, "step": 4870 }, { "epoch": 0.55, "learning_rate": 8.93927328411495e-06, "loss": 0.1531, "step": 4880 }, { "epoch": 0.55, "learning_rate": 8.903090989362394e-06, "loss": 0.1769, "step": 4890 }, { "epoch": 0.55, "learning_rate": 8.866923223987303e-06, "loss": 0.1592, "step": 4900 }, { "epoch": 0.55, "learning_rate": 8.830770467058688e-06, "loss": 0.1336, "step": 4910 }, { "epoch": 0.55, "learning_rate": 8.79463319744677e-06, "loss": 0.1544, "step": 4920 }, { "epoch": 0.55, "learning_rate": 8.758511893816614e-06, "loss": 0.1575, "step": 4930 }, { "epoch": 0.56, "learning_rate": 8.722407034621812e-06, "loss": 0.1521, "step": 4940 }, { "epoch": 0.56, "learning_rate": 8.686319098098139e-06, "loss": 0.1746, "step": 4950 }, { "epoch": 0.56, "learning_rate": 8.65024856225721e-06, "loss": 0.1503, "step": 4960 }, { "epoch": 0.56, "learning_rate": 8.614195904880164e-06, "loss": 0.1857, "step": 4970 }, { "epoch": 0.56, "learning_rate": 8.578161603511312e-06, "loss": 0.171, "step": 4980 }, { "epoch": 0.56, "learning_rate": 8.54214613545184e-06, "loss": 0.1703, "step": 4990 }, { "epoch": 0.56, "learning_rate": 8.506149977753474e-06, "loss": 0.1403, "step": 5000 }, { "epoch": 0.56, "learning_rate": 8.470173607212145e-06, "loss": 0.1654, "step": 5010 }, { "epoch": 0.56, "learning_rate": 8.434217500361701e-06, "loss": 0.1384, "step": 5020 }, { "epoch": 0.57, "learning_rate": 8.398282133467579e-06, "loss": 0.1632, "step": 5030 }, { "epoch": 0.57, "learning_rate": 8.362367982520495e-06, "loss": 0.1271, "step": 5040 }, { "epoch": 0.57, "learning_rate": 8.326475523230152e-06, "loss": 0.1556, "step": 5050 }, { "epoch": 0.57, "learning_rate": 8.290605231018931e-06, "loss": 0.1672, "step": 5060 }, { "epoch": 0.57, "learning_rate": 8.25475758101558e-06, "loss": 0.1725, "step": 5070 }, { "epoch": 0.57, "learning_rate": 8.218933048048952e-06, "loss": 0.1685, "step": 5080 }, { "epoch": 0.57, "learning_rate": 8.183132106641684e-06, "loss": 0.128, "step": 5090 }, { "epoch": 0.57, "learning_rate": 8.147355231003931e-06, "loss": 0.1708, "step": 5100 }, { "epoch": 0.57, "learning_rate": 8.111602895027083e-06, "loss": 0.177, "step": 5110 }, { "epoch": 0.58, "learning_rate": 8.075875572277474e-06, "loss": 0.1462, "step": 5120 }, { "epoch": 0.58, "learning_rate": 8.040173735990124e-06, "loss": 0.1811, "step": 5130 }, { "epoch": 0.58, "learning_rate": 8.004497859062475e-06, "loss": 0.1549, "step": 5140 }, { "epoch": 0.58, "learning_rate": 7.968848414048097e-06, "loss": 0.1433, "step": 5150 }, { "epoch": 0.58, "learning_rate": 7.93322587315047e-06, "loss": 0.1464, "step": 5160 }, { "epoch": 0.58, "learning_rate": 7.897630708216701e-06, "loss": 0.1356, "step": 5170 }, { "epoch": 0.58, "learning_rate": 7.862063390731277e-06, "loss": 0.1733, "step": 5180 }, { "epoch": 0.58, "learning_rate": 7.826524391809833e-06, "loss": 0.1461, "step": 5190 }, { "epoch": 0.58, "learning_rate": 7.791014182192898e-06, "loss": 0.1385, "step": 5200 }, { "epoch": 0.59, "learning_rate": 7.755533232239667e-06, "loss": 0.1591, "step": 5210 }, { "epoch": 0.59, "learning_rate": 7.720082011921775e-06, "loss": 0.1458, "step": 5220 }, { "epoch": 0.59, "learning_rate": 7.68466099081705e-06, "loss": 0.1776, "step": 5230 }, { "epoch": 0.59, "learning_rate": 7.649270638103324e-06, "loss": 0.1583, "step": 5240 }, { "epoch": 0.59, "learning_rate": 7.613911422552203e-06, "loss": 0.1555, "step": 5250 }, { "epoch": 0.59, "learning_rate": 7.578583812522844e-06, "loss": 0.1534, "step": 5260 }, { "epoch": 0.59, "learning_rate": 7.5432882759557795e-06, "loss": 0.1497, "step": 5270 }, { "epoch": 0.59, "learning_rate": 7.508025280366703e-06, "loss": 0.1725, "step": 5280 }, { "epoch": 0.59, "learning_rate": 7.4727952928402695e-06, "loss": 0.1693, "step": 5290 }, { "epoch": 0.6, "learning_rate": 7.437598780023924e-06, "loss": 0.1627, "step": 5300 }, { "epoch": 0.6, "learning_rate": 7.402436208121723e-06, "loss": 0.158, "step": 5310 }, { "epoch": 0.6, "learning_rate": 7.367308042888131e-06, "loss": 0.159, "step": 5320 }, { "epoch": 0.6, "learning_rate": 7.332214749621884e-06, "loss": 0.1501, "step": 5330 }, { "epoch": 0.6, "learning_rate": 7.297156793159808e-06, "loss": 0.1493, "step": 5340 }, { "epoch": 0.6, "learning_rate": 7.26213463787067e-06, "loss": 0.1572, "step": 5350 }, { "epoch": 0.6, "learning_rate": 7.227148747649024e-06, "loss": 0.1575, "step": 5360 }, { "epoch": 0.6, "learning_rate": 7.192199585909058e-06, "loss": 0.1718, "step": 5370 }, { "epoch": 0.6, "learning_rate": 7.157287615578472e-06, "loss": 0.1619, "step": 5380 }, { "epoch": 0.61, "learning_rate": 7.122413299092343e-06, "loss": 0.1491, "step": 5390 }, { "epoch": 0.61, "learning_rate": 7.0875770983869774e-06, "loss": 0.1732, "step": 5400 }, { "epoch": 0.61, "learning_rate": 7.0527794748938225e-06, "loss": 0.1543, "step": 5410 }, { "epoch": 0.61, "learning_rate": 7.018020889533348e-06, "loss": 0.1542, "step": 5420 }, { "epoch": 0.61, "learning_rate": 6.9833018027089125e-06, "loss": 0.1743, "step": 5430 }, { "epoch": 0.61, "learning_rate": 6.948622674300712e-06, "loss": 0.1542, "step": 5440 }, { "epoch": 0.61, "learning_rate": 6.913983963659639e-06, "loss": 0.148, "step": 5450 }, { "epoch": 0.61, "learning_rate": 6.879386129601244e-06, "loss": 0.1892, "step": 5460 }, { "epoch": 0.61, "learning_rate": 6.8448296303996295e-06, "loss": 0.1402, "step": 5470 }, { "epoch": 0.62, "learning_rate": 6.8103149237813784e-06, "loss": 0.1552, "step": 5480 }, { "epoch": 0.62, "learning_rate": 6.7758424669195086e-06, "loss": 0.1439, "step": 5490 }, { "epoch": 0.62, "learning_rate": 6.7414127164274115e-06, "loss": 0.1455, "step": 5500 }, { "epoch": 0.62, "learning_rate": 6.7070261283527895e-06, "loss": 0.1611, "step": 5510 }, { "epoch": 0.62, "learning_rate": 6.6726831581716374e-06, "loss": 0.1201, "step": 5520 }, { "epoch": 0.62, "learning_rate": 6.638384260782193e-06, "loss": 0.1406, "step": 5530 }, { "epoch": 0.62, "learning_rate": 6.604129890498915e-06, "loss": 0.1534, "step": 5540 }, { "epoch": 0.62, "learning_rate": 6.569920501046474e-06, "loss": 0.1635, "step": 5550 }, { "epoch": 0.62, "learning_rate": 6.535756545553734e-06, "loss": 0.1388, "step": 5560 }, { "epoch": 0.63, "learning_rate": 6.501638476547745e-06, "loss": 0.1658, "step": 5570 }, { "epoch": 0.63, "learning_rate": 6.467566745947771e-06, "loss": 0.1699, "step": 5580 }, { "epoch": 0.63, "learning_rate": 6.433541805059269e-06, "loss": 0.1257, "step": 5590 }, { "epoch": 0.63, "learning_rate": 6.39956410456795e-06, "loss": 0.1537, "step": 5600 }, { "epoch": 0.63, "learning_rate": 6.365634094533786e-06, "loss": 0.1527, "step": 5610 }, { "epoch": 0.63, "learning_rate": 6.331752224385043e-06, "loss": 0.1662, "step": 5620 }, { "epoch": 0.63, "learning_rate": 6.29791894291235e-06, "loss": 0.1706, "step": 5630 }, { "epoch": 0.63, "learning_rate": 6.264134698262745e-06, "loss": 0.1533, "step": 5640 }, { "epoch": 0.63, "learning_rate": 6.230399937933719e-06, "loss": 0.1612, "step": 5650 }, { "epoch": 0.64, "learning_rate": 6.196715108767325e-06, "loss": 0.1447, "step": 5660 }, { "epoch": 0.64, "learning_rate": 6.163080656944234e-06, "loss": 0.1746, "step": 5670 }, { "epoch": 0.64, "learning_rate": 6.129497027977829e-06, "loss": 0.1667, "step": 5680 }, { "epoch": 0.64, "learning_rate": 6.095964666708312e-06, "loss": 0.1614, "step": 5690 }, { "epoch": 0.64, "learning_rate": 6.062484017296796e-06, "loss": 0.1614, "step": 5700 }, { "epoch": 0.64, "learning_rate": 6.029055523219442e-06, "loss": 0.1416, "step": 5710 }, { "epoch": 0.64, "learning_rate": 5.995679627261575e-06, "loss": 0.1376, "step": 5720 }, { "epoch": 0.64, "learning_rate": 5.962356771511808e-06, "loss": 0.1516, "step": 5730 }, { "epoch": 0.65, "learning_rate": 5.929087397356206e-06, "loss": 0.1646, "step": 5740 }, { "epoch": 0.65, "learning_rate": 5.895871945472434e-06, "loss": 0.1481, "step": 5750 }, { "epoch": 0.65, "learning_rate": 5.866024505964063e-06, "loss": 0.1544, "step": 5760 }, { "epoch": 0.65, "learning_rate": 5.832912717900956e-06, "loss": 0.1629, "step": 5770 }, { "epoch": 0.65, "learning_rate": 5.799856126014999e-06, "loss": 0.1617, "step": 5780 }, { "epoch": 0.65, "learning_rate": 5.766855168165374e-06, "loss": 0.1472, "step": 5790 }, { "epoch": 0.65, "learning_rate": 5.733910281474384e-06, "loss": 0.1214, "step": 5800 }, { "epoch": 0.65, "learning_rate": 5.701021902321594e-06, "loss": 0.1529, "step": 5810 }, { "epoch": 0.65, "learning_rate": 5.668190466338111e-06, "loss": 0.1538, "step": 5820 }, { "epoch": 0.66, "learning_rate": 5.635416408400774e-06, "loss": 0.1485, "step": 5830 }, { "epoch": 0.66, "learning_rate": 5.602700162626406e-06, "loss": 0.1781, "step": 5840 }, { "epoch": 0.66, "learning_rate": 5.570042162366076e-06, "loss": 0.131, "step": 5850 }, { "epoch": 0.66, "learning_rate": 5.537442840199337e-06, "loss": 0.1597, "step": 5860 }, { "epoch": 0.66, "learning_rate": 5.504902627928508e-06, "loss": 0.1589, "step": 5870 }, { "epoch": 0.66, "learning_rate": 5.472421956572953e-06, "loss": 0.1711, "step": 5880 }, { "epoch": 0.66, "learning_rate": 5.440001256363386e-06, "loss": 0.1442, "step": 5890 }, { "epoch": 0.66, "learning_rate": 5.407640956736133e-06, "loss": 0.1515, "step": 5900 }, { "epoch": 0.66, "learning_rate": 5.3753414863274985e-06, "loss": 0.1599, "step": 5910 }, { "epoch": 0.67, "learning_rate": 5.343103272968028e-06, "loss": 0.156, "step": 5920 }, { "epoch": 0.67, "learning_rate": 5.310926743676898e-06, "loss": 0.1526, "step": 5930 }, { "epoch": 0.67, "learning_rate": 5.2788123246562206e-06, "loss": 0.1478, "step": 5940 }, { "epoch": 0.67, "learning_rate": 5.249962803412024e-06, "loss": 0.1541, "step": 5950 }, { "epoch": 0.67, "learning_rate": 5.217967565140998e-06, "loss": 0.1696, "step": 5960 }, { "epoch": 0.67, "learning_rate": 5.1860356684540395e-06, "loss": 0.1359, "step": 5970 }, { "epoch": 0.67, "learning_rate": 5.154167536312911e-06, "loss": 0.1548, "step": 5980 }, { "epoch": 0.67, "learning_rate": 5.1223635908347846e-06, "loss": 0.1113, "step": 5990 }, { "epoch": 0.67, "learning_rate": 5.090624253286622e-06, "loss": 0.1584, "step": 6000 }, { "epoch": 0.68, "learning_rate": 5.058949944079607e-06, "loss": 0.1623, "step": 6010 }, { "epoch": 0.68, "learning_rate": 5.027341082763575e-06, "loss": 0.1646, "step": 6020 }, { "epoch": 0.68, "learning_rate": 4.995798088021454e-06, "loss": 0.1456, "step": 6030 }, { "epoch": 0.68, "learning_rate": 4.964321377663718e-06, "loss": 0.1794, "step": 6040 }, { "epoch": 0.68, "learning_rate": 4.93291136862287e-06, "loss": 0.1703, "step": 6050 }, { "epoch": 0.68, "learning_rate": 4.901568476947876e-06, "loss": 0.1436, "step": 6060 }, { "epoch": 0.68, "learning_rate": 4.8702931177987115e-06, "loss": 0.1436, "step": 6070 }, { "epoch": 0.68, "learning_rate": 4.839085705440815e-06, "loss": 0.1491, "step": 6080 }, { "epoch": 0.68, "learning_rate": 4.807946653239621e-06, "loss": 0.1541, "step": 6090 }, { "epoch": 0.69, "learning_rate": 4.7768763736550975e-06, "loss": 0.1484, "step": 6100 }, { "epoch": 0.69, "learning_rate": 4.7458752782362486e-06, "loss": 0.1279, "step": 6110 }, { "epoch": 0.69, "learning_rate": 4.714943777615693e-06, "loss": 0.1637, "step": 6120 }, { "epoch": 0.69, "learning_rate": 4.684082281504214e-06, "loss": 0.1632, "step": 6130 }, { "epoch": 0.69, "learning_rate": 4.653291198685331e-06, "loss": 0.1277, "step": 6140 }, { "epoch": 0.69, "learning_rate": 4.622570937009879e-06, "loss": 0.1514, "step": 6150 }, { "epoch": 0.69, "learning_rate": 4.5919219033906384e-06, "loss": 0.1399, "step": 6160 }, { "epoch": 0.69, "learning_rate": 4.561344503796887e-06, "loss": 0.1768, "step": 6170 }, { "epoch": 0.69, "learning_rate": 4.530839143249089e-06, "loss": 0.1449, "step": 6180 }, { "epoch": 0.7, "learning_rate": 4.500406225813476e-06, "loss": 0.1682, "step": 6190 }, { "epoch": 0.7, "learning_rate": 4.470046154596725e-06, "loss": 0.1542, "step": 6200 }, { "epoch": 0.7, "learning_rate": 4.439759331740606e-06, "loss": 0.1674, "step": 6210 }, { "epoch": 0.7, "learning_rate": 4.409546158416674e-06, "loss": 0.1653, "step": 6220 }, { "epoch": 0.7, "learning_rate": 4.379407034820915e-06, "loss": 0.1238, "step": 6230 }, { "epoch": 0.7, "learning_rate": 4.349342360168498e-06, "loss": 0.1399, "step": 6240 }, { "epoch": 0.7, "learning_rate": 4.319352532688444e-06, "loss": 0.1683, "step": 6250 }, { "epoch": 0.7, "learning_rate": 4.2894379496183725e-06, "loss": 0.124, "step": 6260 }, { "epoch": 0.7, "learning_rate": 4.259599007199233e-06, "loss": 0.1455, "step": 6270 }, { "epoch": 0.71, "learning_rate": 4.229836100670058e-06, "loss": 0.148, "step": 6280 }, { "epoch": 0.71, "learning_rate": 4.200149624262736e-06, "loss": 0.1838, "step": 6290 }, { "epoch": 0.71, "learning_rate": 4.170539971196771e-06, "loss": 0.1487, "step": 6300 }, { "epoch": 0.71, "learning_rate": 4.141007533674087e-06, "loss": 0.1275, "step": 6310 }, { "epoch": 0.71, "learning_rate": 4.11155270287383e-06, "loss": 0.1682, "step": 6320 }, { "epoch": 0.71, "learning_rate": 4.0821758689472e-06, "loss": 0.1648, "step": 6330 }, { "epoch": 0.71, "learning_rate": 4.0528774210122455e-06, "loss": 0.1476, "step": 6340 }, { "epoch": 0.71, "learning_rate": 4.023657747148757e-06, "loss": 0.1567, "step": 6350 }, { "epoch": 0.71, "learning_rate": 3.994517234393093e-06, "loss": 0.1684, "step": 6360 }, { "epoch": 0.72, "learning_rate": 3.965456268733065e-06, "loss": 0.1408, "step": 6370 }, { "epoch": 0.72, "learning_rate": 3.936475235102826e-06, "loss": 0.1617, "step": 6380 }, { "epoch": 0.72, "learning_rate": 3.907574517377766e-06, "loss": 0.129, "step": 6390 }, { "epoch": 0.72, "learning_rate": 3.8787544983694325e-06, "loss": 0.1448, "step": 6400 }, { "epoch": 0.72, "learning_rate": 3.850015559820465e-06, "loss": 0.1507, "step": 6410 }, { "epoch": 0.72, "learning_rate": 3.821358082399522e-06, "loss": 0.1421, "step": 6420 }, { "epoch": 0.72, "learning_rate": 3.7927824456962557e-06, "loss": 0.173, "step": 6430 }, { "epoch": 0.72, "learning_rate": 3.7642890282162713e-06, "loss": 0.1682, "step": 6440 }, { "epoch": 0.72, "learning_rate": 3.7358782073761202e-06, "loss": 0.1639, "step": 6450 }, { "epoch": 0.73, "learning_rate": 3.7075503594983064e-06, "loss": 0.1337, "step": 6460 }, { "epoch": 0.73, "learning_rate": 3.6793058598062892e-06, "loss": 0.1781, "step": 6470 }, { "epoch": 0.73, "learning_rate": 3.6511450824195184e-06, "loss": 0.1655, "step": 6480 }, { "epoch": 0.73, "learning_rate": 3.6230684003484785e-06, "loss": 0.125, "step": 6490 }, { "epoch": 0.73, "learning_rate": 3.595076185489761e-06, "loss": 0.1598, "step": 6500 }, { "epoch": 0.73, "learning_rate": 3.567168808621104e-06, "loss": 0.1549, "step": 6510 }, { "epoch": 0.73, "learning_rate": 3.539346639396529e-06, "loss": 0.1426, "step": 6520 }, { "epoch": 0.73, "learning_rate": 3.5116100463413926e-06, "loss": 0.1416, "step": 6530 }, { "epoch": 0.73, "learning_rate": 3.483959396847554e-06, "loss": 0.1745, "step": 6540 }, { "epoch": 0.74, "learning_rate": 3.4563950571684725e-06, "loss": 0.1491, "step": 6550 }, { "epoch": 0.74, "learning_rate": 3.428917392414374e-06, "loss": 0.1853, "step": 6560 }, { "epoch": 0.74, "learning_rate": 3.401526766547405e-06, "loss": 0.167, "step": 6570 }, { "epoch": 0.74, "learning_rate": 3.37422354237683e-06, "loss": 0.1328, "step": 6580 }, { "epoch": 0.74, "learning_rate": 3.3470080815542004e-06, "loss": 0.1339, "step": 6590 }, { "epoch": 0.74, "learning_rate": 3.319880744568581e-06, "loss": 0.1585, "step": 6600 }, { "epoch": 0.74, "learning_rate": 3.2928418907417702e-06, "loss": 0.1458, "step": 6610 }, { "epoch": 0.74, "learning_rate": 3.2658918782235383e-06, "loss": 0.1546, "step": 6620 }, { "epoch": 0.75, "learning_rate": 3.2390310639868992e-06, "loss": 0.1349, "step": 6630 }, { "epoch": 0.75, "learning_rate": 3.2122598038233466e-06, "loss": 0.1536, "step": 6640 }, { "epoch": 0.75, "learning_rate": 3.185578452338185e-06, "loss": 0.1519, "step": 6650 }, { "epoch": 0.75, "learning_rate": 3.1589873629458002e-06, "loss": 0.1364, "step": 6660 }, { "epoch": 0.75, "learning_rate": 3.132486887864992e-06, "loss": 0.1608, "step": 6670 }, { "epoch": 0.75, "learning_rate": 3.1060773781143004e-06, "loss": 0.1512, "step": 6680 }, { "epoch": 0.75, "learning_rate": 3.0797591835073804e-06, "loss": 0.148, "step": 6690 }, { "epoch": 0.75, "learning_rate": 3.053532652648323e-06, "loss": 0.1666, "step": 6700 }, { "epoch": 0.75, "learning_rate": 3.0273981329270865e-06, "loss": 0.1424, "step": 6710 }, { "epoch": 0.76, "learning_rate": 3.001355970514863e-06, "loss": 0.1398, "step": 6720 }, { "epoch": 0.76, "learning_rate": 2.9754065103595054e-06, "loss": 0.1274, "step": 6730 }, { "epoch": 0.76, "learning_rate": 2.949550096180954e-06, "loss": 0.1511, "step": 6740 }, { "epoch": 0.76, "learning_rate": 2.923787070466687e-06, "loss": 0.1946, "step": 6750 }, { "epoch": 0.76, "learning_rate": 2.8981177744671875e-06, "loss": 0.1533, "step": 6760 }, { "epoch": 0.76, "learning_rate": 2.8725425481914127e-06, "loss": 0.1568, "step": 6770 }, { "epoch": 0.76, "learning_rate": 2.8470617304022976e-06, "loss": 0.1292, "step": 6780 }, { "epoch": 0.76, "learning_rate": 2.821675658612263e-06, "loss": 0.1597, "step": 6790 }, { "epoch": 0.76, "learning_rate": 2.7963846690787633e-06, "loss": 0.1396, "step": 6800 }, { "epoch": 0.77, "learning_rate": 2.7711890967997923e-06, "loss": 0.1405, "step": 6810 }, { "epoch": 0.77, "learning_rate": 2.746089275509496e-06, "loss": 0.1495, "step": 6820 }, { "epoch": 0.77, "learning_rate": 2.7210855376737123e-06, "loss": 0.151, "step": 6830 }, { "epoch": 0.77, "learning_rate": 2.6961782144855876e-06, "loss": 0.1472, "step": 6840 }, { "epoch": 0.77, "learning_rate": 2.6713676358611775e-06, "loss": 0.1271, "step": 6850 }, { "epoch": 0.77, "learning_rate": 2.646654130435101e-06, "loss": 0.1465, "step": 6860 }, { "epoch": 0.77, "learning_rate": 2.622038025556145e-06, "loss": 0.1483, "step": 6870 }, { "epoch": 0.77, "learning_rate": 2.597519647282981e-06, "loss": 0.1416, "step": 6880 }, { "epoch": 0.77, "learning_rate": 2.5730993203797906e-06, "loss": 0.1524, "step": 6890 }, { "epoch": 0.78, "learning_rate": 2.5487773683120166e-06, "loss": 0.1361, "step": 6900 }, { "epoch": 0.78, "learning_rate": 2.5245541132420403e-06, "loss": 0.1649, "step": 6910 }, { "epoch": 0.78, "learning_rate": 2.5004298760249267e-06, "loss": 0.1477, "step": 6920 }, { "epoch": 0.78, "learning_rate": 2.4764049762041874e-06, "loss": 0.1514, "step": 6930 }, { "epoch": 0.78, "learning_rate": 2.4524797320075233e-06, "loss": 0.1705, "step": 6940 }, { "epoch": 0.78, "learning_rate": 2.42865446034263e-06, "loss": 0.1412, "step": 6950 }, { "epoch": 0.78, "learning_rate": 2.4049294767929844e-06, "loss": 0.1283, "step": 6960 }, { "epoch": 0.78, "learning_rate": 2.3813050956136876e-06, "loss": 0.1559, "step": 6970 }, { "epoch": 0.78, "learning_rate": 2.357781629727265e-06, "loss": 0.1437, "step": 6980 }, { "epoch": 0.79, "learning_rate": 2.3343593907195692e-06, "loss": 0.1588, "step": 6990 }, { "epoch": 0.79, "learning_rate": 2.311038688835604e-06, "loss": 0.1702, "step": 7000 }, { "epoch": 0.79, "learning_rate": 2.2901371267146e-06, "loss": 0.1615, "step": 7010 }, { "epoch": 0.79, "learning_rate": 2.267010195270566e-06, "loss": 0.1396, "step": 7020 }, { "epoch": 0.79, "learning_rate": 2.243985693040561e-06, "loss": 0.1324, "step": 7030 }, { "epoch": 0.79, "learning_rate": 2.221063925001278e-06, "loss": 0.1731, "step": 7040 }, { "epoch": 0.79, "learning_rate": 2.200522422465723e-06, "loss": 0.1473, "step": 7050 }, { "epoch": 0.79, "learning_rate": 2.177796684722696e-06, "loss": 0.1402, "step": 7060 }, { "epoch": 0.79, "learning_rate": 2.155174557893146e-06, "loss": 0.1556, "step": 7070 }, { "epoch": 0.8, "learning_rate": 2.1326563416239997e-06, "loss": 0.1882, "step": 7080 }, { "epoch": 0.8, "learning_rate": 2.1102423341858235e-06, "loss": 0.1476, "step": 7090 }, { "epoch": 0.8, "learning_rate": 2.0879328324688497e-06, "loss": 0.1739, "step": 7100 }, { "epoch": 0.8, "learning_rate": 2.065728131979058e-06, "loss": 0.1355, "step": 7110 }, { "epoch": 0.8, "learning_rate": 2.0436285268342548e-06, "loss": 0.162, "step": 7120 }, { "epoch": 0.8, "learning_rate": 2.021634309760191e-06, "loss": 0.1477, "step": 7130 }, { "epoch": 0.8, "learning_rate": 1.9997457720866554e-06, "loss": 0.1639, "step": 7140 }, { "epoch": 0.8, "learning_rate": 1.9779632037436513e-06, "loss": 0.1418, "step": 7150 }, { "epoch": 0.8, "learning_rate": 1.9562868932575328e-06, "loss": 0.1293, "step": 7160 }, { "epoch": 0.81, "learning_rate": 1.9347171277471875e-06, "loss": 0.1529, "step": 7170 }, { "epoch": 0.81, "learning_rate": 1.9132541929202384e-06, "loss": 0.18, "step": 7180 }, { "epoch": 0.81, "learning_rate": 1.8918983730692563e-06, "loss": 0.1607, "step": 7190 }, { "epoch": 0.81, "learning_rate": 1.8706499510679888e-06, "loss": 0.1394, "step": 7200 }, { "epoch": 0.81, "learning_rate": 1.8495092083676324e-06, "loss": 0.1645, "step": 7210 }, { "epoch": 0.81, "learning_rate": 1.828476424993071e-06, "loss": 0.1441, "step": 7220 }, { "epoch": 0.81, "learning_rate": 1.8075518795392077e-06, "loss": 0.1432, "step": 7230 }, { "epoch": 0.81, "learning_rate": 1.7867358491672394e-06, "loss": 0.1247, "step": 7240 }, { "epoch": 0.81, "learning_rate": 1.7660286096010027e-06, "loss": 0.1646, "step": 7250 }, { "epoch": 0.82, "learning_rate": 1.7454304351233253e-06, "loss": 0.1419, "step": 7260 }, { "epoch": 0.82, "learning_rate": 1.7249415985723795e-06, "loss": 0.144, "step": 7270 }, { "epoch": 0.82, "learning_rate": 1.7045623713380777e-06, "loss": 0.1487, "step": 7280 }, { "epoch": 0.82, "learning_rate": 1.684293023358472e-06, "loss": 0.1653, "step": 7290 }, { "epoch": 0.82, "learning_rate": 1.664133823116193e-06, "loss": 0.1439, "step": 7300 }, { "epoch": 0.82, "learning_rate": 1.6440850376348627e-06, "loss": 0.1314, "step": 7310 }, { "epoch": 0.82, "learning_rate": 1.624146932475601e-06, "loss": 0.138, "step": 7320 }, { "epoch": 0.82, "learning_rate": 1.6043197717334614e-06, "loss": 0.1577, "step": 7330 }, { "epoch": 0.82, "learning_rate": 1.584603818033975e-06, "loss": 0.1528, "step": 7340 }, { "epoch": 0.83, "learning_rate": 1.5649993325296408e-06, "loss": 0.1479, "step": 7350 }, { "epoch": 0.83, "learning_rate": 1.5455065748964825e-06, "loss": 0.1504, "step": 7360 }, { "epoch": 0.83, "learning_rate": 1.5261258033306027e-06, "loss": 0.1494, "step": 7370 }, { "epoch": 0.83, "learning_rate": 1.506857274544774e-06, "loss": 0.1493, "step": 7380 }, { "epoch": 0.83, "learning_rate": 1.487701243765013e-06, "loss": 0.1327, "step": 7390 }, { "epoch": 0.83, "learning_rate": 1.4686579647272337e-06, "loss": 0.1488, "step": 7400 }, { "epoch": 0.83, "learning_rate": 1.4497276896738588e-06, "loss": 0.139, "step": 7410 }, { "epoch": 0.83, "learning_rate": 1.4309106693504914e-06, "loss": 0.1554, "step": 7420 }, { "epoch": 0.83, "learning_rate": 1.4122071530025915e-06, "loss": 0.1569, "step": 7430 }, { "epoch": 0.84, "learning_rate": 1.3936173883721726e-06, "loss": 0.1249, "step": 7440 }, { "epoch": 0.84, "learning_rate": 1.375141621694529e-06, "loss": 0.1652, "step": 7450 }, { "epoch": 0.84, "learning_rate": 1.3567800976949585e-06, "loss": 0.1458, "step": 7460 }, { "epoch": 0.84, "learning_rate": 1.338533059585534e-06, "loss": 0.152, "step": 7470 }, { "epoch": 0.84, "learning_rate": 1.3204007490618742e-06, "loss": 0.1296, "step": 7480 }, { "epoch": 0.84, "learning_rate": 1.302383406299952e-06, "loss": 0.147, "step": 7490 }, { "epoch": 0.84, "learning_rate": 1.2844812699528963e-06, "loss": 0.1411, "step": 7500 }, { "epoch": 0.84, "learning_rate": 1.266694577147851e-06, "loss": 0.1521, "step": 7510 }, { "epoch": 0.85, "learning_rate": 1.2490235634828196e-06, "loss": 0.1333, "step": 7520 }, { "epoch": 0.85, "learning_rate": 1.2314684630235507e-06, "loss": 0.1552, "step": 7530 }, { "epoch": 0.85, "learning_rate": 1.2140295083004306e-06, "loss": 0.1626, "step": 7540 }, { "epoch": 0.85, "learning_rate": 1.1967069303054213e-06, "loss": 0.1583, "step": 7550 }, { "epoch": 0.85, "learning_rate": 1.1795009584889716e-06, "loss": 0.1456, "step": 7560 }, { "epoch": 0.85, "learning_rate": 1.16241182075701e-06, "loss": 0.1408, "step": 7570 }, { "epoch": 0.85, "learning_rate": 1.1454397434679022e-06, "loss": 0.189, "step": 7580 }, { "epoch": 0.85, "learning_rate": 1.12858495142946e-06, "loss": 0.1043, "step": 7590 }, { "epoch": 0.85, "learning_rate": 1.111847667895971e-06, "loss": 0.1107, "step": 7600 }, { "epoch": 0.86, "learning_rate": 1.0952281145652266e-06, "loss": 0.1579, "step": 7610 }, { "epoch": 0.86, "learning_rate": 1.078726511575603e-06, "loss": 0.1257, "step": 7620 }, { "epoch": 0.86, "learning_rate": 1.0623430775031306e-06, "loss": 0.1452, "step": 7630 }, { "epoch": 0.86, "learning_rate": 1.0460780293586059e-06, "loss": 0.1534, "step": 7640 }, { "epoch": 0.86, "learning_rate": 1.0299315825847122e-06, "loss": 0.1428, "step": 7650 }, { "epoch": 0.86, "learning_rate": 1.01390395105318e-06, "loss": 0.1354, "step": 7660 }, { "epoch": 0.86, "learning_rate": 9.979953470619263e-07, "loss": 0.1499, "step": 7670 }, { "epoch": 0.86, "learning_rate": 9.822059813322771e-07, "loss": 0.1305, "step": 7680 }, { "epoch": 0.86, "learning_rate": 9.665360630061438e-07, "loss": 0.1615, "step": 7690 }, { "epoch": 0.87, "learning_rate": 9.509857996432792e-07, "loss": 0.1585, "step": 7700 }, { "epoch": 0.87, "learning_rate": 9.355553972185116e-07, "loss": 0.1498, "step": 7710 }, { "epoch": 0.87, "learning_rate": 9.202450601190227e-07, "loss": 0.1773, "step": 7720 }, { "epoch": 0.87, "learning_rate": 9.050549911416373e-07, "loss": 0.1499, "step": 7730 }, { "epoch": 0.87, "learning_rate": 8.899853914901446e-07, "loss": 0.1825, "step": 7740 }, { "epoch": 0.87, "learning_rate": 8.750364607726247e-07, "loss": 0.1626, "step": 7750 }, { "epoch": 0.87, "learning_rate": 8.602083969988051e-07, "loss": 0.1558, "step": 7760 }, { "epoch": 0.87, "learning_rate": 8.455013965774462e-07, "loss": 0.1426, "step": 7770 }, { "epoch": 0.87, "learning_rate": 8.309156543137265e-07, "loss": 0.1408, "step": 7780 }, { "epoch": 0.88, "learning_rate": 8.164513634066784e-07, "loss": 0.1302, "step": 7790 }, { "epoch": 0.88, "learning_rate": 8.021087154466156e-07, "loss": 0.1927, "step": 7800 }, { "epoch": 0.88, "learning_rate": 7.878879004126005e-07, "loss": 0.1481, "step": 7810 }, { "epoch": 0.88, "learning_rate": 7.737891066699288e-07, "loss": 0.1517, "step": 7820 }, { "epoch": 0.88, "learning_rate": 7.598125209676321e-07, "loss": 0.1508, "step": 7830 }, { "epoch": 0.88, "learning_rate": 7.459583284360039e-07, "loss": 0.1587, "step": 7840 }, { "epoch": 0.88, "learning_rate": 7.322267125841575e-07, "loss": 0.1511, "step": 7850 }, { "epoch": 0.88, "learning_rate": 7.18617855297572e-07, "loss": 0.1519, "step": 7860 }, { "epoch": 0.88, "learning_rate": 7.051319368357124e-07, "loss": 0.177, "step": 7870 }, { "epoch": 0.89, "learning_rate": 6.917691358296185e-07, "loss": 0.1553, "step": 7880 }, { "epoch": 0.89, "learning_rate": 6.785296292795496e-07, "loss": 0.1329, "step": 7890 }, { "epoch": 0.89, "learning_rate": 6.654135925526373e-07, "loss": 0.1557, "step": 7900 }, { "epoch": 0.89, "learning_rate": 6.524211993805684e-07, "loss": 0.1511, "step": 7910 }, { "epoch": 0.89, "learning_rate": 6.395526218572723e-07, "loss": 0.1566, "step": 7920 }, { "epoch": 0.89, "learning_rate": 6.268080304366509e-07, "loss": 0.1557, "step": 7930 }, { "epoch": 0.89, "learning_rate": 6.141875939303176e-07, "loss": 0.1458, "step": 7940 }, { "epoch": 0.89, "learning_rate": 6.016914795053586e-07, "loss": 0.1598, "step": 7950 }, { "epoch": 0.89, "learning_rate": 5.893198526821287e-07, "loss": 0.1393, "step": 7960 }, { "epoch": 0.9, "learning_rate": 5.770728773320411e-07, "loss": 0.1595, "step": 7970 }, { "epoch": 0.9, "learning_rate": 5.649507156754174e-07, "loss": 0.1369, "step": 7980 }, { "epoch": 0.9, "learning_rate": 5.52953528279323e-07, "loss": 0.1522, "step": 7990 }, { "epoch": 0.9, "learning_rate": 5.410814740554471e-07, "loss": 0.1533, "step": 8000 }, { "epoch": 0.9, "learning_rate": 5.293347102579959e-07, "loss": 0.1322, "step": 8010 }, { "epoch": 0.9, "learning_rate": 5.177133924816169e-07, "loss": 0.1389, "step": 8020 }, { "epoch": 0.9, "learning_rate": 5.062176746593195e-07, "loss": 0.1331, "step": 8030 }, { "epoch": 0.9, "learning_rate": 4.94847709060462e-07, "loss": 0.1579, "step": 8040 }, { "epoch": 0.9, "learning_rate": 4.836036462887061e-07, "loss": 0.1329, "step": 8050 }, { "epoch": 0.91, "learning_rate": 4.724856352800511e-07, "loss": 0.1511, "step": 8060 }, { "epoch": 0.91, "learning_rate": 4.614938233008359e-07, "loss": 0.1725, "step": 8070 }, { "epoch": 0.91, "learning_rate": 4.506283559458047e-07, "loss": 0.1443, "step": 8080 }, { "epoch": 0.91, "learning_rate": 4.398893771361723e-07, "loss": 0.1602, "step": 8090 }, { "epoch": 0.91, "learning_rate": 4.292770291177173e-07, "loss": 0.143, "step": 8100 }, { "epoch": 0.91, "learning_rate": 4.187914524588998e-07, "loss": 0.168, "step": 8110 }, { "epoch": 0.91, "learning_rate": 4.0843278604899673e-07, "loss": 0.1684, "step": 8120 }, { "epoch": 0.91, "learning_rate": 3.982011670962682e-07, "loss": 0.1777, "step": 8130 }, { "epoch": 0.91, "learning_rate": 3.880967311261319e-07, "loss": 0.1695, "step": 8140 }, { "epoch": 0.92, "learning_rate": 3.79111590857375e-07, "loss": 0.1533, "step": 8150 }, { "epoch": 0.92, "learning_rate": 3.692491698917511e-07, "loss": 0.1563, "step": 8160 }, { "epoch": 0.92, "learning_rate": 3.595143153995062e-07, "loss": 0.1674, "step": 8170 }, { "epoch": 0.92, "learning_rate": 3.4990715632604145e-07, "loss": 0.1505, "step": 8180 }, { "epoch": 0.92, "learning_rate": 3.404278199253397e-07, "loss": 0.1714, "step": 8190 }, { "epoch": 0.92, "learning_rate": 3.3107643175827707e-07, "loss": 0.1547, "step": 8200 }, { "epoch": 0.92, "learning_rate": 3.218531156909621e-07, "loss": 0.1546, "step": 8210 }, { "epoch": 0.92, "learning_rate": 3.127579938930891e-07, "loss": 0.1729, "step": 8220 }, { "epoch": 0.92, "learning_rate": 3.0379118683632635e-07, "loss": 0.1476, "step": 8230 }, { "epoch": 0.93, "learning_rate": 2.949528132927171e-07, "loss": 0.1444, "step": 8240 }, { "epoch": 0.93, "learning_rate": 2.8624299033310767e-07, "loss": 0.1583, "step": 8250 }, { "epoch": 0.93, "learning_rate": 2.7766183332559316e-07, "loss": 0.1489, "step": 8260 }, { "epoch": 0.93, "learning_rate": 2.692094559339975e-07, "loss": 0.1568, "step": 8270 }, { "epoch": 0.93, "learning_rate": 2.6088597011635575e-07, "loss": 0.159, "step": 8280 }, { "epoch": 0.93, "learning_rate": 2.526914861234464e-07, "loss": 0.1495, "step": 8290 }, { "epoch": 0.93, "learning_rate": 2.446261124973137e-07, "loss": 0.1471, "step": 8300 }, { "epoch": 0.93, "learning_rate": 2.3668995606984547e-07, "loss": 0.1536, "step": 8310 }, { "epoch": 0.93, "learning_rate": 2.2888312196134855e-07, "loss": 0.1583, "step": 8320 }, { "epoch": 0.94, "learning_rate": 2.2120571357915898e-07, "loss": 0.1844, "step": 8330 }, { "epoch": 0.94, "learning_rate": 2.1365783261627525e-07, "loss": 0.1399, "step": 8340 }, { "epoch": 0.94, "learning_rate": 2.0623957905000603e-07, "loss": 0.1508, "step": 8350 }, { "epoch": 0.94, "learning_rate": 1.9967406351210305e-07, "loss": 0.1458, "step": 8360 }, { "epoch": 0.94, "learning_rate": 1.9250237128636385e-07, "loss": 0.1851, "step": 8370 }, { "epoch": 0.94, "learning_rate": 1.8546058667709088e-07, "loss": 0.1561, "step": 8380 }, { "epoch": 0.94, "learning_rate": 1.7854880295797406e-07, "loss": 0.17, "step": 8390 }, { "epoch": 0.94, "learning_rate": 1.7176711168073845e-07, "loss": 0.1376, "step": 8400 }, { "epoch": 0.95, "learning_rate": 1.6511560267394088e-07, "loss": 0.1466, "step": 8410 }, { "epoch": 0.95, "learning_rate": 1.5859436404177532e-07, "loss": 0.1402, "step": 8420 }, { "epoch": 0.95, "learning_rate": 1.5220348216290924e-07, "loss": 0.1232, "step": 8430 }, { "epoch": 0.95, "learning_rate": 1.4594304168933703e-07, "loss": 0.1492, "step": 8440 }, { "epoch": 0.95, "learning_rate": 1.3981312554525728e-07, "loss": 0.1578, "step": 8450 }, { "epoch": 0.95, "learning_rate": 1.3381381492598155e-07, "loss": 0.151, "step": 8460 }, { "epoch": 0.95, "learning_rate": 1.279451892968475e-07, "loss": 0.1267, "step": 8470 }, { "epoch": 0.95, "learning_rate": 1.2220732639217858e-07, "loss": 0.162, "step": 8480 }, { "epoch": 0.95, "learning_rate": 1.1660030221424479e-07, "loss": 0.1519, "step": 8490 }, { "epoch": 0.96, "learning_rate": 1.1112419103226136e-07, "loss": 0.1519, "step": 8500 }, { "epoch": 0.96, "learning_rate": 1.057790653814017e-07, "loss": 0.1342, "step": 8510 }, { "epoch": 0.96, "learning_rate": 1.0056499606183933e-07, "loss": 0.1371, "step": 8520 }, { "epoch": 0.96, "learning_rate": 9.548205213780859e-08, "loss": 0.1744, "step": 8530 }, { "epoch": 0.96, "learning_rate": 9.053030093669313e-08, "loss": 0.1813, "step": 8540 }, { "epoch": 0.96, "learning_rate": 8.570980804812556e-08, "loss": 0.1452, "step": 8550 }, { "epoch": 0.96, "learning_rate": 8.102063732312925e-08, "loss": 0.1587, "step": 8560 }, { "epoch": 0.96, "learning_rate": 7.646285087326344e-08, "loss": 0.1961, "step": 8570 }, { "epoch": 0.96, "learning_rate": 7.203650906980942e-08, "loss": 0.1519, "step": 8580 }, { "epoch": 0.97, "learning_rate": 6.774167054296233e-08, "loss": 0.1558, "step": 8590 }, { "epoch": 0.97, "learning_rate": 6.357839218106066e-08, "loss": 0.1698, "step": 8600 }, { "epoch": 0.97, "learning_rate": 5.954672912982906e-08, "loss": 0.1756, "step": 8610 }, { "epoch": 0.97, "learning_rate": 5.564673479164895e-08, "loss": 0.1528, "step": 8620 }, { "epoch": 0.97, "learning_rate": 5.187846082485348e-08, "loss": 0.1431, "step": 8630 }, { "epoch": 0.97, "learning_rate": 4.8241957143040365e-08, "loss": 0.112, "step": 8640 }, { "epoch": 0.97, "learning_rate": 4.473727191441124e-08, "loss": 0.1766, "step": 8650 }, { "epoch": 0.97, "learning_rate": 4.136445156113222e-08, "loss": 0.1372, "step": 8660 }, { "epoch": 0.97, "learning_rate": 3.8123540758726596e-08, "loss": 0.125, "step": 8670 }, { "epoch": 0.98, "learning_rate": 3.501458243547085e-08, "loss": 0.1373, "step": 8680 }, { "epoch": 0.98, "learning_rate": 3.203761777183734e-08, "loss": 0.1584, "step": 8690 }, { "epoch": 0.98, "learning_rate": 2.9192686199939204e-08, "loss": 0.1654, "step": 8700 }, { "epoch": 0.98, "learning_rate": 2.6479825403019633e-08, "loss": 0.1473, "step": 8710 }, { "epoch": 0.98, "learning_rate": 2.389907131493785e-08, "loss": 0.1228, "step": 8720 }, { "epoch": 0.98, "learning_rate": 2.145045811970836e-08, "loss": 0.1324, "step": 8730 }, { "epoch": 0.98, "learning_rate": 1.9134018251038e-08, "loss": 0.1737, "step": 8740 }, { "epoch": 0.98, "learning_rate": 1.6949782391897375e-08, "loss": 0.1492, "step": 8750 }, { "epoch": 0.98, "learning_rate": 1.4897779474120078e-08, "loss": 0.1615, "step": 8760 }, { "epoch": 0.99, "learning_rate": 1.2978036678014117e-08, "loss": 0.1339, "step": 8770 }, { "epoch": 0.99, "learning_rate": 1.1190579432003301e-08, "loss": 0.1475, "step": 8780 }, { "epoch": 0.99, "learning_rate": 9.535431412293073e-09, "loss": 0.1477, "step": 8790 }, { "epoch": 0.99, "learning_rate": 8.012614542549646e-09, "loss": 0.1467, "step": 8800 }, { "epoch": 0.99, "learning_rate": 6.622148993619126e-09, "loss": 0.1563, "step": 8810 }, { "epoch": 0.99, "learning_rate": 5.3640531832543916e-09, "loss": 0.169, "step": 8820 }, { "epoch": 0.99, "learning_rate": 4.2383437758719555e-09, "loss": 0.1605, "step": 8830 }, { "epoch": 0.99, "learning_rate": 3.2450356823321427e-09, "loss": 0.1632, "step": 8840 }, { "epoch": 0.99, "learning_rate": 2.3841420597414677e-09, "loss": 0.1469, "step": 8850 }, { "epoch": 1.0, "learning_rate": 1.655674311276112e-09, "loss": 0.1529, "step": 8860 }, { "epoch": 1.0, "learning_rate": 1.0596420860353728e-09, "loss": 0.1308, "step": 8870 }, { "epoch": 1.0, "learning_rate": 5.960532789106577e-10, "loss": 0.14, "step": 8880 }, { "epoch": 1.0, "learning_rate": 2.6491403048112266e-10, "loss": 0.1611, "step": 8890 }, { "epoch": 1.0, "step": 8899, "total_flos": 1198267632648192.0, "train_loss": 0.1920002836473257, "train_runtime": 285448.837, "train_samples_per_second": 0.125, "train_steps_per_second": 0.031 } ], "logging_steps": 10, "max_steps": 8899, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "total_flos": 1198267632648192.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }