{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "global_step": 3673936, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999999523671616e-05, "loss": 11.0137, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.999761835807701e-05, "loss": 7.984, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.9995236716154014e-05, "loss": 7.1233, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.999285507423102e-05, "loss": 6.6937, "step": 1500 }, { "epoch": 0.0, "learning_rate": 4.999047343230802e-05, "loss": 6.388, "step": 2000 }, { "epoch": 0.0, "learning_rate": 4.998809179038503e-05, "loss": 6.1487, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.998571014846203e-05, "loss": 5.9445, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.998332850653904e-05, "loss": 5.7592, "step": 3500 }, { "epoch": 0.01, "learning_rate": 4.9980946864616044e-05, "loss": 5.5984, "step": 4000 }, { "epoch": 0.01, "learning_rate": 4.9978565222693046e-05, "loss": 5.4834, "step": 4500 }, { "epoch": 0.01, "learning_rate": 4.9976183580770056e-05, "loss": 5.3009, "step": 5000 }, { "epoch": 0.01, "learning_rate": 4.997380193884706e-05, "loss": 5.1806, "step": 5500 }, { "epoch": 0.01, "learning_rate": 4.997142029692407e-05, "loss": 5.0827, "step": 6000 }, { "epoch": 0.01, "learning_rate": 4.996903865500107e-05, "loss": 4.9557, "step": 6500 }, { "epoch": 0.01, "learning_rate": 4.996665701307807e-05, "loss": 4.8671, "step": 7000 }, { "epoch": 0.01, "learning_rate": 4.9964275371155076e-05, "loss": 4.7349, "step": 7500 }, { "epoch": 0.02, "learning_rate": 4.9961893729232085e-05, "loss": 4.6278, "step": 8000 }, { "epoch": 0.02, "learning_rate": 4.995951208730909e-05, "loss": 4.5656, "step": 8500 }, { "epoch": 0.02, "learning_rate": 4.995713520866994e-05, "loss": 4.4782, "step": 9000 }, { "epoch": 0.02, "learning_rate": 4.995475356674695e-05, "loss": 4.4091, "step": 9500 }, { "epoch": 0.02, "learning_rate": 4.995237192482395e-05, "loss": 4.3529, "step": 10000 }, { "epoch": 0.02, "learning_rate": 4.9949990282900955e-05, "loss": 4.2866, "step": 10500 }, { "epoch": 0.02, "learning_rate": 4.994761340426181e-05, "loss": 4.2443, "step": 11000 }, { "epoch": 0.02, "learning_rate": 4.994523176233881e-05, "loss": 4.2001, "step": 11500 }, { "epoch": 0.02, "learning_rate": 4.994285012041582e-05, "loss": 4.1593, "step": 12000 }, { "epoch": 0.02, "learning_rate": 4.994046847849282e-05, "loss": 4.1322, "step": 12500 }, { "epoch": 0.02, "learning_rate": 4.9938096363137524e-05, "loss": 4.0938, "step": 13000 }, { "epoch": 0.03, "learning_rate": 4.9935714721214526e-05, "loss": 4.0705, "step": 13500 }, { "epoch": 0.03, "learning_rate": 4.9933333079291536e-05, "loss": 4.0102, "step": 14000 }, { "epoch": 0.03, "learning_rate": 4.993095143736853e-05, "loss": 4.0048, "step": 14500 }, { "epoch": 0.03, "learning_rate": 4.992856979544554e-05, "loss": 3.9547, "step": 15000 }, { "epoch": 0.03, "learning_rate": 4.9926188153522543e-05, "loss": 3.9632, "step": 15500 }, { "epoch": 0.03, "learning_rate": 4.992380651159955e-05, "loss": 3.9251, "step": 16000 }, { "epoch": 0.03, "learning_rate": 4.9921429632960405e-05, "loss": 3.9255, "step": 16500 }, { "epoch": 0.03, "learning_rate": 4.991904799103741e-05, "loss": 3.8649, "step": 17000 }, { "epoch": 0.03, "learning_rate": 4.991666634911441e-05, "loss": 3.868, "step": 17500 }, { "epoch": 0.03, "learning_rate": 4.991428470719142e-05, "loss": 3.8656, "step": 18000 }, { "epoch": 0.04, "learning_rate": 4.991190306526842e-05, "loss": 3.8354, "step": 18500 }, { "epoch": 0.04, "learning_rate": 4.990952142334543e-05, "loss": 3.8092, "step": 19000 }, { "epoch": 0.04, "learning_rate": 4.9907139781422434e-05, "loss": 3.7616, "step": 19500 }, { "epoch": 0.04, "learning_rate": 4.990476290278329e-05, "loss": 3.7865, "step": 20000 }, { "epoch": 0.04, "learning_rate": 4.990238126086029e-05, "loss": 3.7486, "step": 20500 }, { "epoch": 0.04, "learning_rate": 4.989999961893729e-05, "loss": 3.7564, "step": 21000 }, { "epoch": 0.04, "learning_rate": 4.98976179770143e-05, "loss": 3.7295, "step": 21500 }, { "epoch": 0.04, "learning_rate": 4.9895236335091304e-05, "loss": 3.7099, "step": 22000 }, { "epoch": 0.04, "learning_rate": 4.9892854693168313e-05, "loss": 3.7051, "step": 22500 }, { "epoch": 0.04, "learning_rate": 4.9890473051245316e-05, "loss": 3.6885, "step": 23000 }, { "epoch": 0.04, "learning_rate": 4.988809140932232e-05, "loss": 3.6618, "step": 23500 }, { "epoch": 0.05, "learning_rate": 4.988570976739932e-05, "loss": 3.6792, "step": 24000 }, { "epoch": 0.05, "learning_rate": 4.988332812547633e-05, "loss": 3.6561, "step": 24500 }, { "epoch": 0.05, "learning_rate": 4.988094648355334e-05, "loss": 3.6536, "step": 25000 }, { "epoch": 0.05, "learning_rate": 4.9878569604914186e-05, "loss": 3.6481, "step": 25500 }, { "epoch": 0.05, "learning_rate": 4.9876187962991195e-05, "loss": 3.6074, "step": 26000 }, { "epoch": 0.05, "learning_rate": 4.98738063210682e-05, "loss": 3.6203, "step": 26500 }, { "epoch": 0.05, "learning_rate": 4.98714246791452e-05, "loss": 3.605, "step": 27000 }, { "epoch": 0.05, "learning_rate": 4.986904780050605e-05, "loss": 3.5967, "step": 27500 }, { "epoch": 0.05, "learning_rate": 4.9866666158583055e-05, "loss": 3.5657, "step": 28000 }, { "epoch": 0.05, "learning_rate": 4.986428927994391e-05, "loss": 3.5736, "step": 28500 }, { "epoch": 0.06, "learning_rate": 4.986190763802092e-05, "loss": 3.5563, "step": 29000 }, { "epoch": 0.06, "learning_rate": 4.985952599609792e-05, "loss": 3.5582, "step": 29500 }, { "epoch": 0.06, "learning_rate": 4.985714911745877e-05, "loss": 3.5725, "step": 30000 }, { "epoch": 0.06, "eval_accuracy": 0.41541346348770203, "eval_loss": 3.4435417652130127, "eval_runtime": 4216.2146, "eval_samples_per_second": 65.222, "eval_steps_per_second": 6.522, "step": 30000 }, { "epoch": 0.06, "learning_rate": 4.9854767475535774e-05, "loss": 3.5448, "step": 30500 }, { "epoch": 0.06, "learning_rate": 4.985238583361278e-05, "loss": 3.5492, "step": 31000 }, { "epoch": 0.06, "learning_rate": 4.9850004191689786e-05, "loss": 3.5285, "step": 31500 }, { "epoch": 0.06, "learning_rate": 4.9847622549766796e-05, "loss": 3.5095, "step": 32000 }, { "epoch": 0.06, "learning_rate": 4.984524567112764e-05, "loss": 3.488, "step": 32500 }, { "epoch": 0.06, "learning_rate": 4.984286402920465e-05, "loss": 3.4749, "step": 33000 }, { "epoch": 0.06, "learning_rate": 4.984048238728165e-05, "loss": 3.5116, "step": 33500 }, { "epoch": 0.06, "learning_rate": 4.9838100745358656e-05, "loss": 3.4812, "step": 34000 }, { "epoch": 0.07, "learning_rate": 4.9835719103435665e-05, "loss": 3.4653, "step": 34500 }, { "epoch": 0.07, "learning_rate": 4.983333746151267e-05, "loss": 3.4669, "step": 35000 }, { "epoch": 0.07, "learning_rate": 4.983095581958968e-05, "loss": 3.4564, "step": 35500 }, { "epoch": 0.07, "learning_rate": 4.982857417766668e-05, "loss": 3.4531, "step": 36000 }, { "epoch": 0.07, "learning_rate": 4.982619253574368e-05, "loss": 3.4486, "step": 36500 }, { "epoch": 0.07, "learning_rate": 4.9823810893820685e-05, "loss": 3.4435, "step": 37000 }, { "epoch": 0.07, "learning_rate": 4.982143401518154e-05, "loss": 3.4376, "step": 37500 }, { "epoch": 0.07, "learning_rate": 4.981905237325855e-05, "loss": 3.405, "step": 38000 }, { "epoch": 0.07, "learning_rate": 4.981667073133555e-05, "loss": 3.4307, "step": 38500 }, { "epoch": 0.07, "learning_rate": 4.981428908941256e-05, "loss": 3.4117, "step": 39000 }, { "epoch": 0.08, "learning_rate": 4.981190744748956e-05, "loss": 3.4025, "step": 39500 }, { "epoch": 0.08, "learning_rate": 4.9809525805566564e-05, "loss": 3.4141, "step": 40000 }, { "epoch": 0.08, "learning_rate": 4.9807144163643574e-05, "loss": 3.3937, "step": 40500 }, { "epoch": 0.08, "learning_rate": 4.9804762521720576e-05, "loss": 3.3952, "step": 41000 }, { "epoch": 0.08, "learning_rate": 4.980238564308143e-05, "loss": 3.4003, "step": 41500 }, { "epoch": 0.08, "learning_rate": 4.980000400115843e-05, "loss": 3.4048, "step": 42000 }, { "epoch": 0.08, "learning_rate": 4.979762235923544e-05, "loss": 3.3994, "step": 42500 }, { "epoch": 0.08, "learning_rate": 4.979524548059629e-05, "loss": 3.3665, "step": 43000 }, { "epoch": 0.08, "learning_rate": 4.9792863838673296e-05, "loss": 3.3959, "step": 43500 }, { "epoch": 0.08, "learning_rate": 4.97904821967503e-05, "loss": 3.3571, "step": 44000 }, { "epoch": 0.08, "learning_rate": 4.978810055482731e-05, "loss": 3.355, "step": 44500 }, { "epoch": 0.09, "learning_rate": 4.978571891290431e-05, "loss": 3.3801, "step": 45000 }, { "epoch": 0.09, "learning_rate": 4.978334203426516e-05, "loss": 3.3567, "step": 45500 }, { "epoch": 0.09, "learning_rate": 4.978096039234217e-05, "loss": 3.3357, "step": 46000 }, { "epoch": 0.09, "learning_rate": 4.977857875041917e-05, "loss": 3.349, "step": 46500 }, { "epoch": 0.09, "learning_rate": 4.977619710849618e-05, "loss": 3.3361, "step": 47000 }, { "epoch": 0.09, "learning_rate": 4.977382022985703e-05, "loss": 3.3283, "step": 47500 }, { "epoch": 0.09, "learning_rate": 4.977143858793403e-05, "loss": 3.3326, "step": 48000 }, { "epoch": 0.09, "learning_rate": 4.976905694601104e-05, "loss": 3.3317, "step": 48500 }, { "epoch": 0.09, "learning_rate": 4.9766675304088044e-05, "loss": 3.3347, "step": 49000 }, { "epoch": 0.09, "learning_rate": 4.976429366216505e-05, "loss": 3.309, "step": 49500 }, { "epoch": 0.1, "learning_rate": 4.97619167835259e-05, "loss": 3.3189, "step": 50000 }, { "epoch": 0.1, "learning_rate": 4.97595351416029e-05, "loss": 3.3208, "step": 50500 }, { "epoch": 0.1, "learning_rate": 4.975715349967991e-05, "loss": 3.3145, "step": 51000 }, { "epoch": 0.1, "learning_rate": 4.9754771857756914e-05, "loss": 3.3244, "step": 51500 }, { "epoch": 0.1, "learning_rate": 4.975239021583392e-05, "loss": 3.2951, "step": 52000 }, { "epoch": 0.1, "learning_rate": 4.9750013337194775e-05, "loss": 3.3083, "step": 52500 }, { "epoch": 0.1, "learning_rate": 4.974763169527177e-05, "loss": 3.3045, "step": 53000 }, { "epoch": 0.1, "learning_rate": 4.974525005334878e-05, "loss": 3.292, "step": 53500 }, { "epoch": 0.1, "learning_rate": 4.974286841142578e-05, "loss": 3.3146, "step": 54000 }, { "epoch": 0.1, "learning_rate": 4.974048676950279e-05, "loss": 3.2819, "step": 54500 }, { "epoch": 0.1, "learning_rate": 4.9738105127579795e-05, "loss": 3.2981, "step": 55000 }, { "epoch": 0.11, "learning_rate": 4.9735723485656805e-05, "loss": 3.2745, "step": 55500 }, { "epoch": 0.11, "learning_rate": 4.973334184373381e-05, "loss": 3.263, "step": 56000 }, { "epoch": 0.11, "learning_rate": 4.973096020181081e-05, "loss": 3.2733, "step": 56500 }, { "epoch": 0.11, "learning_rate": 4.972858332317166e-05, "loss": 3.268, "step": 57000 }, { "epoch": 0.11, "learning_rate": 4.9726206444532514e-05, "loss": 3.2639, "step": 57500 }, { "epoch": 0.11, "learning_rate": 4.972382480260952e-05, "loss": 3.2594, "step": 58000 }, { "epoch": 0.11, "learning_rate": 4.9721443160686527e-05, "loss": 3.2511, "step": 58500 }, { "epoch": 0.11, "learning_rate": 4.971906151876353e-05, "loss": 3.2542, "step": 59000 }, { "epoch": 0.11, "learning_rate": 4.971667987684053e-05, "loss": 3.2588, "step": 59500 }, { "epoch": 0.11, "learning_rate": 4.9714302998201384e-05, "loss": 3.2641, "step": 60000 }, { "epoch": 0.11, "eval_accuracy": 0.4468345093412838, "eval_loss": 3.1526217460632324, "eval_runtime": 4222.6173, "eval_samples_per_second": 65.123, "eval_steps_per_second": 6.512, "step": 60000 }, { "epoch": 0.12, "learning_rate": 4.9711921356278393e-05, "loss": 3.2438, "step": 60500 }, { "epoch": 0.12, "learning_rate": 4.9709539714355396e-05, "loss": 3.2406, "step": 61000 }, { "epoch": 0.12, "learning_rate": 4.9707158072432405e-05, "loss": 3.2435, "step": 61500 }, { "epoch": 0.12, "learning_rate": 4.970477643050941e-05, "loss": 3.2462, "step": 62000 }, { "epoch": 0.12, "learning_rate": 4.970239478858641e-05, "loss": 3.2408, "step": 62500 }, { "epoch": 0.12, "learning_rate": 4.970001314666341e-05, "loss": 3.2535, "step": 63000 }, { "epoch": 0.12, "learning_rate": 4.9697636268024266e-05, "loss": 3.249, "step": 63500 }, { "epoch": 0.12, "learning_rate": 4.9695254626101275e-05, "loss": 3.2246, "step": 64000 }, { "epoch": 0.12, "learning_rate": 4.969287298417828e-05, "loss": 3.2244, "step": 64500 }, { "epoch": 0.12, "learning_rate": 4.969049134225529e-05, "loss": 3.2282, "step": 65000 }, { "epoch": 0.12, "learning_rate": 4.968810970033229e-05, "loss": 3.2215, "step": 65500 }, { "epoch": 0.13, "learning_rate": 4.968572805840929e-05, "loss": 3.1836, "step": 66000 }, { "epoch": 0.13, "learning_rate": 4.9683351179770145e-05, "loss": 3.2228, "step": 66500 }, { "epoch": 0.13, "learning_rate": 4.968096953784715e-05, "loss": 3.2261, "step": 67000 }, { "epoch": 0.13, "learning_rate": 4.967858789592416e-05, "loss": 3.2021, "step": 67500 }, { "epoch": 0.13, "learning_rate": 4.967620625400116e-05, "loss": 3.2033, "step": 68000 }, { "epoch": 0.13, "learning_rate": 4.967382461207817e-05, "loss": 3.2121, "step": 68500 }, { "epoch": 0.13, "learning_rate": 4.967144773343902e-05, "loss": 3.2048, "step": 69000 }, { "epoch": 0.13, "learning_rate": 4.966906609151602e-05, "loss": 3.201, "step": 69500 }, { "epoch": 0.13, "learning_rate": 4.9666684449593026e-05, "loss": 3.1764, "step": 70000 }, { "epoch": 0.13, "learning_rate": 4.9664302807670036e-05, "loss": 3.1852, "step": 70500 }, { "epoch": 0.14, "learning_rate": 4.966192116574704e-05, "loss": 3.185, "step": 71000 }, { "epoch": 0.14, "learning_rate": 4.965953952382405e-05, "loss": 3.1944, "step": 71500 }, { "epoch": 0.14, "learning_rate": 4.965715788190105e-05, "loss": 3.165, "step": 72000 }, { "epoch": 0.14, "learning_rate": 4.965477623997805e-05, "loss": 3.1636, "step": 72500 }, { "epoch": 0.14, "learning_rate": 4.9652399361338905e-05, "loss": 3.1808, "step": 73000 }, { "epoch": 0.14, "learning_rate": 4.965001771941591e-05, "loss": 3.1703, "step": 73500 }, { "epoch": 0.14, "learning_rate": 4.964763607749292e-05, "loss": 3.1667, "step": 74000 }, { "epoch": 0.14, "learning_rate": 4.964525443556992e-05, "loss": 3.1565, "step": 74500 }, { "epoch": 0.14, "learning_rate": 4.964287755693077e-05, "loss": 3.1491, "step": 75000 }, { "epoch": 0.14, "learning_rate": 4.964049591500778e-05, "loss": 3.1529, "step": 75500 }, { "epoch": 0.14, "learning_rate": 4.963811427308478e-05, "loss": 3.1586, "step": 76000 }, { "epoch": 0.15, "learning_rate": 4.963573263116179e-05, "loss": 3.1675, "step": 76500 }, { "epoch": 0.15, "learning_rate": 4.963335098923879e-05, "loss": 3.1665, "step": 77000 }, { "epoch": 0.15, "learning_rate": 4.96309693473158e-05, "loss": 3.1786, "step": 77500 }, { "epoch": 0.15, "learning_rate": 4.962859246867665e-05, "loss": 3.1415, "step": 78000 }, { "epoch": 0.15, "learning_rate": 4.9626210826753654e-05, "loss": 3.1518, "step": 78500 }, { "epoch": 0.15, "learning_rate": 4.9623829184830656e-05, "loss": 3.1632, "step": 79000 }, { "epoch": 0.15, "learning_rate": 4.962144754290766e-05, "loss": 3.1531, "step": 79500 }, { "epoch": 0.15, "learning_rate": 4.961907066426851e-05, "loss": 3.1429, "step": 80000 }, { "epoch": 0.15, "learning_rate": 4.961668902234552e-05, "loss": 3.1457, "step": 80500 }, { "epoch": 0.15, "learning_rate": 4.961430738042252e-05, "loss": 3.1483, "step": 81000 }, { "epoch": 0.16, "learning_rate": 4.961192573849953e-05, "loss": 3.1449, "step": 81500 }, { "epoch": 0.16, "learning_rate": 4.9609544096576535e-05, "loss": 3.1436, "step": 82000 }, { "epoch": 0.16, "learning_rate": 4.960716245465354e-05, "loss": 3.1293, "step": 82500 }, { "epoch": 0.16, "learning_rate": 4.960478081273055e-05, "loss": 3.1183, "step": 83000 }, { "epoch": 0.16, "learning_rate": 4.960239917080755e-05, "loss": 3.1399, "step": 83500 }, { "epoch": 0.16, "learning_rate": 4.960001752888456e-05, "loss": 3.1278, "step": 84000 }, { "epoch": 0.16, "learning_rate": 4.959763588696156e-05, "loss": 3.1229, "step": 84500 }, { "epoch": 0.16, "learning_rate": 4.9595254245038565e-05, "loss": 3.1287, "step": 85000 }, { "epoch": 0.16, "learning_rate": 4.959287260311557e-05, "loss": 3.1408, "step": 85500 }, { "epoch": 0.16, "learning_rate": 4.959049572447642e-05, "loss": 3.128, "step": 86000 }, { "epoch": 0.16, "learning_rate": 4.958811408255343e-05, "loss": 3.1285, "step": 86500 }, { "epoch": 0.17, "learning_rate": 4.958573244063043e-05, "loss": 3.1363, "step": 87000 }, { "epoch": 0.17, "learning_rate": 4.9583355561991284e-05, "loss": 3.127, "step": 87500 }, { "epoch": 0.17, "learning_rate": 4.958097392006829e-05, "loss": 3.1079, "step": 88000 }, { "epoch": 0.17, "learning_rate": 4.957859227814529e-05, "loss": 3.1329, "step": 88500 }, { "epoch": 0.17, "learning_rate": 4.957621539950614e-05, "loss": 3.096, "step": 89000 }, { "epoch": 0.17, "learning_rate": 4.957383375758315e-05, "loss": 3.1149, "step": 89500 }, { "epoch": 0.17, "learning_rate": 4.9571452115660153e-05, "loss": 3.1188, "step": 90000 }, { "epoch": 0.17, "eval_accuracy": 0.46232212731337385, "eval_loss": 3.011150598526001, "eval_runtime": 4214.178, "eval_samples_per_second": 65.253, "eval_steps_per_second": 6.525, "step": 90000 }, { "epoch": 0.17, "learning_rate": 4.956907047373716e-05, "loss": 3.0974, "step": 90500 }, { "epoch": 0.17, "learning_rate": 4.9566688831814165e-05, "loss": 3.0888, "step": 91000 }, { "epoch": 0.17, "learning_rate": 4.956430718989117e-05, "loss": 3.1028, "step": 91500 }, { "epoch": 0.18, "learning_rate": 4.956192554796818e-05, "loss": 3.0907, "step": 92000 }, { "epoch": 0.18, "learning_rate": 4.955954390604518e-05, "loss": 3.1065, "step": 92500 }, { "epoch": 0.18, "learning_rate": 4.955716702740603e-05, "loss": 3.1142, "step": 93000 }, { "epoch": 0.18, "learning_rate": 4.9554785385483035e-05, "loss": 3.1061, "step": 93500 }, { "epoch": 0.18, "learning_rate": 4.9552403743560044e-05, "loss": 3.0688, "step": 94000 }, { "epoch": 0.18, "learning_rate": 4.955002210163705e-05, "loss": 3.0756, "step": 94500 }, { "epoch": 0.18, "learning_rate": 4.954764045971405e-05, "loss": 3.1067, "step": 95000 }, { "epoch": 0.18, "learning_rate": 4.954525881779106e-05, "loss": 3.098, "step": 95500 }, { "epoch": 0.18, "learning_rate": 4.954288193915191e-05, "loss": 3.0842, "step": 96000 }, { "epoch": 0.18, "learning_rate": 4.9540500297228914e-05, "loss": 3.1108, "step": 96500 }, { "epoch": 0.18, "learning_rate": 4.9538118655305923e-05, "loss": 3.1051, "step": 97000 }, { "epoch": 0.19, "learning_rate": 4.9535737013382926e-05, "loss": 3.0793, "step": 97500 }, { "epoch": 0.19, "learning_rate": 4.953335537145993e-05, "loss": 3.0828, "step": 98000 }, { "epoch": 0.19, "learning_rate": 4.953097849282078e-05, "loss": 3.0739, "step": 98500 }, { "epoch": 0.19, "learning_rate": 4.9528596850897784e-05, "loss": 3.075, "step": 99000 }, { "epoch": 0.19, "learning_rate": 4.952621520897479e-05, "loss": 3.0725, "step": 99500 }, { "epoch": 0.19, "learning_rate": 4.9523833567051796e-05, "loss": 3.0696, "step": 100000 }, { "epoch": 0.19, "learning_rate": 4.9521451925128805e-05, "loss": 3.093, "step": 100500 }, { "epoch": 0.19, "learning_rate": 4.951907504648966e-05, "loss": 3.1022, "step": 101000 }, { "epoch": 0.19, "learning_rate": 4.951669340456665e-05, "loss": 3.0696, "step": 101500 }, { "epoch": 0.19, "learning_rate": 4.951431176264366e-05, "loss": 3.0587, "step": 102000 }, { "epoch": 0.2, "learning_rate": 4.9511930120720665e-05, "loss": 3.0673, "step": 102500 }, { "epoch": 0.2, "learning_rate": 4.9509548478797675e-05, "loss": 3.0797, "step": 103000 }, { "epoch": 0.2, "learning_rate": 4.950717160015853e-05, "loss": 3.0633, "step": 103500 }, { "epoch": 0.2, "learning_rate": 4.950478995823553e-05, "loss": 3.0532, "step": 104000 }, { "epoch": 0.2, "learning_rate": 4.950240831631254e-05, "loss": 3.0784, "step": 104500 }, { "epoch": 0.2, "learning_rate": 4.9500026674389535e-05, "loss": 3.0876, "step": 105000 }, { "epoch": 0.2, "learning_rate": 4.949764979575039e-05, "loss": 3.0644, "step": 105500 }, { "epoch": 0.2, "learning_rate": 4.9495268153827396e-05, "loss": 3.068, "step": 106000 }, { "epoch": 0.2, "learning_rate": 4.949289127518825e-05, "loss": 3.0458, "step": 106500 }, { "epoch": 0.2, "learning_rate": 4.949050963326525e-05, "loss": 3.0647, "step": 107000 }, { "epoch": 0.2, "learning_rate": 4.948812799134226e-05, "loss": 3.0725, "step": 107500 }, { "epoch": 0.21, "learning_rate": 4.948574634941926e-05, "loss": 3.0271, "step": 108000 }, { "epoch": 0.21, "learning_rate": 4.9483364707496266e-05, "loss": 3.0606, "step": 108500 }, { "epoch": 0.21, "learning_rate": 4.948098306557327e-05, "loss": 3.0502, "step": 109000 }, { "epoch": 0.21, "learning_rate": 4.947860618693412e-05, "loss": 3.0381, "step": 109500 }, { "epoch": 0.21, "learning_rate": 4.947622454501113e-05, "loss": 3.0475, "step": 110000 }, { "epoch": 0.21, "learning_rate": 4.947384290308813e-05, "loss": 3.0515, "step": 110500 }, { "epoch": 0.21, "learning_rate": 4.947146126116514e-05, "loss": 3.0638, "step": 111000 }, { "epoch": 0.21, "learning_rate": 4.9469079619242145e-05, "loss": 3.0554, "step": 111500 }, { "epoch": 0.21, "learning_rate": 4.946669797731915e-05, "loss": 3.0375, "step": 112000 }, { "epoch": 0.21, "learning_rate": 4.946431633539616e-05, "loss": 3.0443, "step": 112500 }, { "epoch": 0.22, "learning_rate": 4.946193469347316e-05, "loss": 3.0302, "step": 113000 }, { "epoch": 0.22, "learning_rate": 4.945955305155017e-05, "loss": 3.0351, "step": 113500 }, { "epoch": 0.22, "learning_rate": 4.945717617291102e-05, "loss": 3.0397, "step": 114000 }, { "epoch": 0.22, "learning_rate": 4.9454794530988024e-05, "loss": 3.0398, "step": 114500 }, { "epoch": 0.22, "learning_rate": 4.9452412889065027e-05, "loss": 3.0542, "step": 115000 }, { "epoch": 0.22, "learning_rate": 4.945003601042588e-05, "loss": 3.0548, "step": 115500 }, { "epoch": 0.22, "learning_rate": 4.944765436850288e-05, "loss": 3.0487, "step": 116000 }, { "epoch": 0.22, "learning_rate": 4.944527272657989e-05, "loss": 3.0132, "step": 116500 }, { "epoch": 0.22, "learning_rate": 4.9442891084656894e-05, "loss": 3.0301, "step": 117000 }, { "epoch": 0.22, "learning_rate": 4.9440514206017746e-05, "loss": 3.0305, "step": 117500 }, { "epoch": 0.22, "learning_rate": 4.9438132564094755e-05, "loss": 3.0082, "step": 118000 }, { "epoch": 0.23, "learning_rate": 4.943575092217175e-05, "loss": 3.0325, "step": 118500 }, { "epoch": 0.23, "learning_rate": 4.943336928024876e-05, "loss": 3.0237, "step": 119000 }, { "epoch": 0.23, "learning_rate": 4.943098763832576e-05, "loss": 3.042, "step": 119500 }, { "epoch": 0.23, "learning_rate": 4.942860599640277e-05, "loss": 3.0365, "step": 120000 }, { "epoch": 0.23, "eval_accuracy": 0.47260208535750414, "eval_loss": 2.92478346824646, "eval_runtime": 4203.6765, "eval_samples_per_second": 65.416, "eval_steps_per_second": 6.542, "step": 120000 }, { "epoch": 0.23, "learning_rate": 4.9426229117763625e-05, "loss": 3.0064, "step": 120500 }, { "epoch": 0.23, "learning_rate": 4.942384747584063e-05, "loss": 3.0299, "step": 121000 }, { "epoch": 0.23, "learning_rate": 4.942146583391763e-05, "loss": 3.0462, "step": 121500 }, { "epoch": 0.23, "learning_rate": 4.941908419199463e-05, "loss": 3.0181, "step": 122000 }, { "epoch": 0.23, "learning_rate": 4.941670255007164e-05, "loss": 3.0111, "step": 122500 }, { "epoch": 0.23, "learning_rate": 4.9414320908148645e-05, "loss": 3.0239, "step": 123000 }, { "epoch": 0.24, "learning_rate": 4.9411939266225654e-05, "loss": 3.0011, "step": 123500 }, { "epoch": 0.24, "learning_rate": 4.9409557624302663e-05, "loss": 3.0184, "step": 124000 }, { "epoch": 0.24, "learning_rate": 4.940718074566351e-05, "loss": 3.0211, "step": 124500 }, { "epoch": 0.24, "learning_rate": 4.940479910374051e-05, "loss": 3.0203, "step": 125000 }, { "epoch": 0.24, "learning_rate": 4.940241746181752e-05, "loss": 2.9997, "step": 125500 }, { "epoch": 0.24, "learning_rate": 4.9400035819894524e-05, "loss": 3.009, "step": 126000 }, { "epoch": 0.24, "learning_rate": 4.9397658941255376e-05, "loss": 2.9964, "step": 126500 }, { "epoch": 0.24, "learning_rate": 4.9395277299332385e-05, "loss": 3.0033, "step": 127000 }, { "epoch": 0.24, "learning_rate": 4.939289565740939e-05, "loss": 2.9992, "step": 127500 }, { "epoch": 0.24, "learning_rate": 4.939051401548639e-05, "loss": 3.0084, "step": 128000 }, { "epoch": 0.24, "learning_rate": 4.938813237356339e-05, "loss": 2.9921, "step": 128500 }, { "epoch": 0.25, "learning_rate": 4.93857507316404e-05, "loss": 3.0012, "step": 129000 }, { "epoch": 0.25, "learning_rate": 4.9383373853001255e-05, "loss": 3.0017, "step": 129500 }, { "epoch": 0.25, "learning_rate": 4.938099221107826e-05, "loss": 2.9846, "step": 130000 }, { "epoch": 0.25, "learning_rate": 4.937861056915527e-05, "loss": 3.0023, "step": 130500 }, { "epoch": 0.25, "learning_rate": 4.937622892723226e-05, "loss": 2.9973, "step": 131000 }, { "epoch": 0.25, "learning_rate": 4.9373852048593115e-05, "loss": 2.9771, "step": 131500 }, { "epoch": 0.25, "learning_rate": 4.9371470406670124e-05, "loss": 2.9814, "step": 132000 }, { "epoch": 0.25, "learning_rate": 4.936908876474713e-05, "loss": 2.9834, "step": 132500 }, { "epoch": 0.25, "learning_rate": 4.9366707122824137e-05, "loss": 3.0091, "step": 133000 }, { "epoch": 0.25, "learning_rate": 4.936432548090114e-05, "loss": 2.985, "step": 133500 }, { "epoch": 0.26, "learning_rate": 4.936194383897814e-05, "loss": 2.9763, "step": 134000 }, { "epoch": 0.26, "learning_rate": 4.935956219705515e-05, "loss": 2.9712, "step": 134500 }, { "epoch": 0.26, "learning_rate": 4.9357185318416e-05, "loss": 2.998, "step": 135000 }, { "epoch": 0.26, "learning_rate": 4.9354803676493006e-05, "loss": 2.9996, "step": 135500 }, { "epoch": 0.26, "learning_rate": 4.935242203457001e-05, "loss": 2.9956, "step": 136000 }, { "epoch": 0.26, "learning_rate": 4.935004039264702e-05, "loss": 3.0, "step": 136500 }, { "epoch": 0.26, "learning_rate": 4.934765875072402e-05, "loss": 2.9845, "step": 137000 }, { "epoch": 0.26, "learning_rate": 4.934527710880102e-05, "loss": 3.0032, "step": 137500 }, { "epoch": 0.26, "learning_rate": 4.934289546687803e-05, "loss": 2.9781, "step": 138000 }, { "epoch": 0.26, "learning_rate": 4.9340518588238885e-05, "loss": 2.9831, "step": 138500 }, { "epoch": 0.26, "learning_rate": 4.933814170959973e-05, "loss": 2.9895, "step": 139000 }, { "epoch": 0.27, "learning_rate": 4.933576006767674e-05, "loss": 2.9894, "step": 139500 }, { "epoch": 0.27, "learning_rate": 4.933337842575374e-05, "loss": 2.9726, "step": 140000 }, { "epoch": 0.27, "learning_rate": 4.933099678383075e-05, "loss": 2.9903, "step": 140500 }, { "epoch": 0.27, "learning_rate": 4.9328615141907755e-05, "loss": 2.9834, "step": 141000 }, { "epoch": 0.27, "learning_rate": 4.932623349998476e-05, "loss": 2.9733, "step": 141500 }, { "epoch": 0.27, "learning_rate": 4.932385185806177e-05, "loss": 2.9814, "step": 142000 }, { "epoch": 0.27, "learning_rate": 4.932147021613877e-05, "loss": 2.9641, "step": 142500 }, { "epoch": 0.27, "learning_rate": 4.931908857421578e-05, "loss": 2.9625, "step": 143000 }, { "epoch": 0.27, "learning_rate": 4.9316706932292775e-05, "loss": 2.9771, "step": 143500 }, { "epoch": 0.27, "learning_rate": 4.9314325290369784e-05, "loss": 2.9806, "step": 144000 }, { "epoch": 0.28, "learning_rate": 4.931194364844679e-05, "loss": 2.9707, "step": 144500 }, { "epoch": 0.28, "learning_rate": 4.9309562006523796e-05, "loss": 2.9815, "step": 145000 }, { "epoch": 0.28, "learning_rate": 4.9307180364600805e-05, "loss": 2.9802, "step": 145500 }, { "epoch": 0.28, "learning_rate": 4.930479872267781e-05, "loss": 2.9659, "step": 146000 }, { "epoch": 0.28, "learning_rate": 4.930242184403866e-05, "loss": 2.9527, "step": 146500 }, { "epoch": 0.28, "learning_rate": 4.930004020211566e-05, "loss": 2.955, "step": 147000 }, { "epoch": 0.28, "learning_rate": 4.9297658560192666e-05, "loss": 2.9793, "step": 147500 }, { "epoch": 0.28, "learning_rate": 4.9295276918269675e-05, "loss": 2.9665, "step": 148000 }, { "epoch": 0.28, "learning_rate": 4.929289527634668e-05, "loss": 2.958, "step": 148500 }, { "epoch": 0.28, "learning_rate": 4.929051839770753e-05, "loss": 2.9461, "step": 149000 }, { "epoch": 0.28, "learning_rate": 4.928813675578454e-05, "loss": 2.955, "step": 149500 }, { "epoch": 0.29, "learning_rate": 4.9285755113861535e-05, "loss": 2.9485, "step": 150000 }, { "epoch": 0.29, "eval_accuracy": 0.4800383469323627, "eval_loss": 2.864793300628662, "eval_runtime": 4210.0007, "eval_samples_per_second": 65.318, "eval_steps_per_second": 6.532, "step": 150000 }, { "epoch": 0.29, "learning_rate": 4.9283373471938545e-05, "loss": 2.9538, "step": 150500 }, { "epoch": 0.29, "learning_rate": 4.92809965932994e-05, "loss": 2.9492, "step": 151000 }, { "epoch": 0.29, "learning_rate": 4.92786149513764e-05, "loss": 2.95, "step": 151500 }, { "epoch": 0.29, "learning_rate": 4.927623330945341e-05, "loss": 2.964, "step": 152000 }, { "epoch": 0.29, "learning_rate": 4.927385166753041e-05, "loss": 2.9606, "step": 152500 }, { "epoch": 0.29, "learning_rate": 4.9271470025607414e-05, "loss": 2.949, "step": 153000 }, { "epoch": 0.29, "learning_rate": 4.926908838368442e-05, "loss": 2.9471, "step": 153500 }, { "epoch": 0.29, "learning_rate": 4.926671150504527e-05, "loss": 2.9512, "step": 154000 }, { "epoch": 0.29, "learning_rate": 4.926432986312228e-05, "loss": 2.9483, "step": 154500 }, { "epoch": 0.3, "learning_rate": 4.926194822119928e-05, "loss": 2.9451, "step": 155000 }, { "epoch": 0.3, "learning_rate": 4.925956657927629e-05, "loss": 2.9353, "step": 155500 }, { "epoch": 0.3, "learning_rate": 4.925718493735329e-05, "loss": 2.9489, "step": 156000 }, { "epoch": 0.3, "learning_rate": 4.9254803295430296e-05, "loss": 2.943, "step": 156500 }, { "epoch": 0.3, "learning_rate": 4.925242641679115e-05, "loss": 2.9482, "step": 157000 }, { "epoch": 0.3, "learning_rate": 4.925004477486815e-05, "loss": 2.9412, "step": 157500 }, { "epoch": 0.3, "learning_rate": 4.924766313294516e-05, "loss": 2.9429, "step": 158000 }, { "epoch": 0.3, "learning_rate": 4.924528149102217e-05, "loss": 2.9481, "step": 158500 }, { "epoch": 0.3, "learning_rate": 4.924289984909917e-05, "loss": 2.9405, "step": 159000 }, { "epoch": 0.3, "learning_rate": 4.9240518207176175e-05, "loss": 2.924, "step": 159500 }, { "epoch": 0.3, "learning_rate": 4.923813656525318e-05, "loss": 2.9204, "step": 160000 }, { "epoch": 0.31, "learning_rate": 4.923575968661403e-05, "loss": 2.9657, "step": 160500 }, { "epoch": 0.31, "learning_rate": 4.923337804469104e-05, "loss": 2.9385, "step": 161000 }, { "epoch": 0.31, "learning_rate": 4.923099640276804e-05, "loss": 2.9479, "step": 161500 }, { "epoch": 0.31, "learning_rate": 4.922861476084505e-05, "loss": 2.9378, "step": 162000 }, { "epoch": 0.31, "learning_rate": 4.92262378822059e-05, "loss": 2.9242, "step": 162500 }, { "epoch": 0.31, "learning_rate": 4.92238562402829e-05, "loss": 2.9325, "step": 163000 }, { "epoch": 0.31, "learning_rate": 4.922147459835991e-05, "loss": 2.9201, "step": 163500 }, { "epoch": 0.31, "learning_rate": 4.921909295643691e-05, "loss": 2.9464, "step": 164000 }, { "epoch": 0.31, "learning_rate": 4.921671131451392e-05, "loss": 2.9199, "step": 164500 }, { "epoch": 0.31, "learning_rate": 4.921432967259092e-05, "loss": 2.9203, "step": 165000 }, { "epoch": 0.32, "learning_rate": 4.921194803066793e-05, "loss": 2.939, "step": 165500 }, { "epoch": 0.32, "learning_rate": 4.9209571152028785e-05, "loss": 2.9499, "step": 166000 }, { "epoch": 0.32, "learning_rate": 4.920718951010578e-05, "loss": 2.9351, "step": 166500 }, { "epoch": 0.32, "learning_rate": 4.920480786818279e-05, "loss": 2.9298, "step": 167000 }, { "epoch": 0.32, "learning_rate": 4.920242622625979e-05, "loss": 2.9327, "step": 167500 }, { "epoch": 0.32, "learning_rate": 4.92000445843368e-05, "loss": 2.9069, "step": 168000 }, { "epoch": 0.32, "learning_rate": 4.9197662942413805e-05, "loss": 2.9296, "step": 168500 }, { "epoch": 0.32, "learning_rate": 4.919528130049081e-05, "loss": 2.9222, "step": 169000 }, { "epoch": 0.32, "learning_rate": 4.919289965856782e-05, "loss": 2.9232, "step": 169500 }, { "epoch": 0.32, "learning_rate": 4.919052277992867e-05, "loss": 2.9347, "step": 170000 }, { "epoch": 0.32, "learning_rate": 4.9188145901289515e-05, "loss": 2.9262, "step": 170500 }, { "epoch": 0.33, "learning_rate": 4.9185764259366524e-05, "loss": 2.9046, "step": 171000 }, { "epoch": 0.33, "learning_rate": 4.918338261744353e-05, "loss": 2.9363, "step": 171500 }, { "epoch": 0.33, "learning_rate": 4.9181000975520536e-05, "loss": 2.9085, "step": 172000 }, { "epoch": 0.33, "learning_rate": 4.917861933359754e-05, "loss": 2.9274, "step": 172500 }, { "epoch": 0.33, "learning_rate": 4.917623769167454e-05, "loss": 2.9339, "step": 173000 }, { "epoch": 0.33, "learning_rate": 4.9173860813035394e-05, "loss": 2.9248, "step": 173500 }, { "epoch": 0.33, "learning_rate": 4.91714791711124e-05, "loss": 2.9374, "step": 174000 }, { "epoch": 0.33, "learning_rate": 4.9169097529189406e-05, "loss": 2.9056, "step": 174500 }, { "epoch": 0.33, "learning_rate": 4.9166715887266415e-05, "loss": 2.9192, "step": 175000 }, { "epoch": 0.33, "learning_rate": 4.916433424534342e-05, "loss": 2.9084, "step": 175500 }, { "epoch": 0.34, "learning_rate": 4.916195260342042e-05, "loss": 2.9345, "step": 176000 }, { "epoch": 0.34, "learning_rate": 4.915957096149742e-05, "loss": 2.9072, "step": 176500 }, { "epoch": 0.34, "learning_rate": 4.915718931957443e-05, "loss": 2.9283, "step": 177000 }, { "epoch": 0.34, "learning_rate": 4.9154807677651435e-05, "loss": 2.9169, "step": 177500 }, { "epoch": 0.34, "learning_rate": 4.9152426035728444e-05, "loss": 2.9219, "step": 178000 }, { "epoch": 0.34, "learning_rate": 4.915004439380545e-05, "loss": 2.9168, "step": 178500 }, { "epoch": 0.34, "learning_rate": 4.914766275188245e-05, "loss": 2.9338, "step": 179000 }, { "epoch": 0.34, "learning_rate": 4.91452858732433e-05, "loss": 2.9176, "step": 179500 }, { "epoch": 0.34, "learning_rate": 4.9142908994604154e-05, "loss": 2.915, "step": 180000 }, { "epoch": 0.34, "eval_accuracy": 0.4856050130174702, "eval_loss": 2.8177905082702637, "eval_runtime": 4193.3028, "eval_samples_per_second": 65.578, "eval_steps_per_second": 6.558, "step": 180000 }, { "epoch": 0.34, "learning_rate": 4.914052735268116e-05, "loss": 2.9, "step": 180500 }, { "epoch": 0.34, "learning_rate": 4.9138145710758166e-05, "loss": 2.9127, "step": 181000 }, { "epoch": 0.35, "learning_rate": 4.913576406883517e-05, "loss": 2.8993, "step": 181500 }, { "epoch": 0.35, "learning_rate": 4.913338719019602e-05, "loss": 2.9255, "step": 182000 }, { "epoch": 0.35, "learning_rate": 4.9131005548273024e-05, "loss": 2.9263, "step": 182500 }, { "epoch": 0.35, "learning_rate": 4.912862390635003e-05, "loss": 2.9172, "step": 183000 }, { "epoch": 0.35, "learning_rate": 4.9126242264427036e-05, "loss": 2.9216, "step": 183500 }, { "epoch": 0.35, "learning_rate": 4.9123860622504045e-05, "loss": 2.9132, "step": 184000 }, { "epoch": 0.35, "learning_rate": 4.912147898058105e-05, "loss": 2.8923, "step": 184500 }, { "epoch": 0.35, "learning_rate": 4.911909733865806e-05, "loss": 2.8949, "step": 185000 }, { "epoch": 0.35, "learning_rate": 4.91167204600189e-05, "loss": 2.9391, "step": 185500 }, { "epoch": 0.35, "learning_rate": 4.9114338818095905e-05, "loss": 2.8978, "step": 186000 }, { "epoch": 0.36, "learning_rate": 4.9111957176172915e-05, "loss": 2.9102, "step": 186500 }, { "epoch": 0.36, "learning_rate": 4.910957553424992e-05, "loss": 2.8995, "step": 187000 }, { "epoch": 0.36, "learning_rate": 4.910719389232693e-05, "loss": 2.8929, "step": 187500 }, { "epoch": 0.36, "learning_rate": 4.910481225040393e-05, "loss": 2.892, "step": 188000 }, { "epoch": 0.36, "learning_rate": 4.910243060848093e-05, "loss": 2.9014, "step": 188500 }, { "epoch": 0.36, "learning_rate": 4.9100048966557935e-05, "loss": 2.8879, "step": 189000 }, { "epoch": 0.36, "learning_rate": 4.9097667324634944e-05, "loss": 2.9027, "step": 189500 }, { "epoch": 0.36, "learning_rate": 4.9095285682711953e-05, "loss": 2.8988, "step": 190000 }, { "epoch": 0.36, "learning_rate": 4.90929088040728e-05, "loss": 2.9274, "step": 190500 }, { "epoch": 0.36, "learning_rate": 4.909052716214981e-05, "loss": 2.9009, "step": 191000 }, { "epoch": 0.36, "learning_rate": 4.908814552022681e-05, "loss": 2.9112, "step": 191500 }, { "epoch": 0.37, "learning_rate": 4.9085763878303814e-05, "loss": 2.8892, "step": 192000 }, { "epoch": 0.37, "learning_rate": 4.9083386999664666e-05, "loss": 2.9021, "step": 192500 }, { "epoch": 0.37, "learning_rate": 4.908100535774167e-05, "loss": 2.883, "step": 193000 }, { "epoch": 0.37, "learning_rate": 4.907862371581868e-05, "loss": 2.9087, "step": 193500 }, { "epoch": 0.37, "learning_rate": 4.907624207389569e-05, "loss": 2.8922, "step": 194000 }, { "epoch": 0.37, "learning_rate": 4.907386043197269e-05, "loss": 2.8925, "step": 194500 }, { "epoch": 0.37, "learning_rate": 4.907147879004969e-05, "loss": 2.8823, "step": 195000 }, { "epoch": 0.37, "learning_rate": 4.9069097148126695e-05, "loss": 2.9049, "step": 195500 }, { "epoch": 0.37, "learning_rate": 4.9066715506203705e-05, "loss": 2.8924, "step": 196000 }, { "epoch": 0.37, "learning_rate": 4.906433386428071e-05, "loss": 2.9121, "step": 196500 }, { "epoch": 0.38, "learning_rate": 4.906195222235772e-05, "loss": 2.8982, "step": 197000 }, { "epoch": 0.38, "learning_rate": 4.905957534371857e-05, "loss": 2.8903, "step": 197500 }, { "epoch": 0.38, "learning_rate": 4.905719846507942e-05, "loss": 2.8758, "step": 198000 }, { "epoch": 0.38, "learning_rate": 4.905481682315642e-05, "loss": 2.8884, "step": 198500 }, { "epoch": 0.38, "learning_rate": 4.9052435181233426e-05, "loss": 2.9044, "step": 199000 }, { "epoch": 0.38, "learning_rate": 4.905005353931043e-05, "loss": 2.8927, "step": 199500 }, { "epoch": 0.38, "learning_rate": 4.904767189738744e-05, "loss": 2.8837, "step": 200000 }, { "epoch": 0.38, "learning_rate": 4.904529025546444e-05, "loss": 2.8822, "step": 200500 }, { "epoch": 0.38, "learning_rate": 4.904290861354145e-05, "loss": 2.8707, "step": 201000 }, { "epoch": 0.38, "learning_rate": 4.904052697161845e-05, "loss": 2.8843, "step": 201500 }, { "epoch": 0.38, "learning_rate": 4.9038145329695456e-05, "loss": 2.8957, "step": 202000 }, { "epoch": 0.39, "learning_rate": 4.9035763687772465e-05, "loss": 2.8859, "step": 202500 }, { "epoch": 0.39, "learning_rate": 4.903338204584947e-05, "loss": 2.8871, "step": 203000 }, { "epoch": 0.39, "learning_rate": 4.903100040392648e-05, "loss": 2.8926, "step": 203500 }, { "epoch": 0.39, "learning_rate": 4.902861876200347e-05, "loss": 2.8846, "step": 204000 }, { "epoch": 0.39, "learning_rate": 4.9026241883364325e-05, "loss": 2.8715, "step": 204500 }, { "epoch": 0.39, "learning_rate": 4.9023860241441335e-05, "loss": 2.8811, "step": 205000 }, { "epoch": 0.39, "learning_rate": 4.902148336280219e-05, "loss": 2.9095, "step": 205500 }, { "epoch": 0.39, "learning_rate": 4.901910172087919e-05, "loss": 2.8823, "step": 206000 }, { "epoch": 0.39, "learning_rate": 4.90167200789562e-05, "loss": 2.8913, "step": 206500 }, { "epoch": 0.39, "learning_rate": 4.90143384370332e-05, "loss": 2.8882, "step": 207000 }, { "epoch": 0.4, "learning_rate": 4.9011956795110204e-05, "loss": 2.8912, "step": 207500 }, { "epoch": 0.4, "learning_rate": 4.900957515318721e-05, "loss": 2.8725, "step": 208000 }, { "epoch": 0.4, "learning_rate": 4.9007193511264216e-05, "loss": 2.8965, "step": 208500 }, { "epoch": 0.4, "learning_rate": 4.900481186934122e-05, "loss": 2.8667, "step": 209000 }, { "epoch": 0.4, "learning_rate": 4.900243499070207e-05, "loss": 2.8925, "step": 209500 }, { "epoch": 0.4, "learning_rate": 4.900005334877908e-05, "loss": 2.8614, "step": 210000 }, { "epoch": 0.4, "eval_accuracy": 0.4904832992907921, "eval_loss": 2.780757427215576, "eval_runtime": 4196.6661, "eval_samples_per_second": 65.525, "eval_steps_per_second": 6.553, "step": 210000 }, { "epoch": 0.4, "learning_rate": 4.899767647013993e-05, "loss": 2.8782, "step": 210500 }, { "epoch": 0.4, "learning_rate": 4.899529482821693e-05, "loss": 2.8703, "step": 211000 }, { "epoch": 0.4, "learning_rate": 4.899291318629394e-05, "loss": 2.8613, "step": 211500 }, { "epoch": 0.4, "learning_rate": 4.899053154437094e-05, "loss": 2.8787, "step": 212000 }, { "epoch": 0.4, "learning_rate": 4.898814990244795e-05, "loss": 2.8618, "step": 212500 }, { "epoch": 0.41, "learning_rate": 4.898576826052495e-05, "loss": 2.8789, "step": 213000 }, { "epoch": 0.41, "learning_rate": 4.898338661860196e-05, "loss": 2.883, "step": 213500 }, { "epoch": 0.41, "learning_rate": 4.8981004976678965e-05, "loss": 2.8674, "step": 214000 }, { "epoch": 0.41, "learning_rate": 4.897862333475597e-05, "loss": 2.8753, "step": 214500 }, { "epoch": 0.41, "learning_rate": 4.897624645611682e-05, "loss": 2.8818, "step": 215000 }, { "epoch": 0.41, "learning_rate": 4.897386481419383e-05, "loss": 2.8554, "step": 215500 }, { "epoch": 0.41, "learning_rate": 4.897148317227083e-05, "loss": 2.8524, "step": 216000 }, { "epoch": 0.41, "learning_rate": 4.8969106293631684e-05, "loss": 2.8753, "step": 216500 }, { "epoch": 0.41, "learning_rate": 4.896672465170869e-05, "loss": 2.8829, "step": 217000 }, { "epoch": 0.41, "learning_rate": 4.896434300978569e-05, "loss": 2.8492, "step": 217500 }, { "epoch": 0.42, "learning_rate": 4.89619613678627e-05, "loss": 2.8549, "step": 218000 }, { "epoch": 0.42, "learning_rate": 4.89595797259397e-05, "loss": 2.8714, "step": 218500 }, { "epoch": 0.42, "learning_rate": 4.895719808401671e-05, "loss": 2.8793, "step": 219000 }, { "epoch": 0.42, "learning_rate": 4.8954816442093713e-05, "loss": 2.8662, "step": 219500 }, { "epoch": 0.42, "learning_rate": 4.895243480017072e-05, "loss": 2.8771, "step": 220000 }, { "epoch": 0.42, "learning_rate": 4.895005315824772e-05, "loss": 2.8731, "step": 220500 }, { "epoch": 0.42, "learning_rate": 4.894767627960857e-05, "loss": 2.8633, "step": 221000 }, { "epoch": 0.42, "learning_rate": 4.894529463768558e-05, "loss": 2.8699, "step": 221500 }, { "epoch": 0.42, "learning_rate": 4.894291775904643e-05, "loss": 2.8709, "step": 222000 }, { "epoch": 0.42, "learning_rate": 4.8940536117123435e-05, "loss": 2.871, "step": 222500 }, { "epoch": 0.42, "learning_rate": 4.8938154475200445e-05, "loss": 2.8695, "step": 223000 }, { "epoch": 0.43, "learning_rate": 4.89357775965613e-05, "loss": 2.8483, "step": 223500 }, { "epoch": 0.43, "learning_rate": 4.89333959546383e-05, "loss": 2.8684, "step": 224000 }, { "epoch": 0.43, "learning_rate": 4.89310143127153e-05, "loss": 2.8714, "step": 224500 }, { "epoch": 0.43, "learning_rate": 4.8928632670792305e-05, "loss": 2.8452, "step": 225000 }, { "epoch": 0.43, "learning_rate": 4.8926251028869314e-05, "loss": 2.8415, "step": 225500 }, { "epoch": 0.43, "learning_rate": 4.892386938694632e-05, "loss": 2.8586, "step": 226000 }, { "epoch": 0.43, "learning_rate": 4.8921487745023326e-05, "loss": 2.8664, "step": 226500 }, { "epoch": 0.43, "learning_rate": 4.891910610310033e-05, "loss": 2.8464, "step": 227000 }, { "epoch": 0.43, "learning_rate": 4.891672446117733e-05, "loss": 2.8437, "step": 227500 }, { "epoch": 0.43, "learning_rate": 4.891434281925434e-05, "loss": 2.8615, "step": 228000 }, { "epoch": 0.44, "learning_rate": 4.8911961177331344e-05, "loss": 2.8492, "step": 228500 }, { "epoch": 0.44, "learning_rate": 4.890957953540835e-05, "loss": 2.8733, "step": 229000 }, { "epoch": 0.44, "learning_rate": 4.8907202656769205e-05, "loss": 2.8338, "step": 229500 }, { "epoch": 0.44, "learning_rate": 4.89048210148462e-05, "loss": 2.8462, "step": 230000 }, { "epoch": 0.44, "learning_rate": 4.890243937292321e-05, "loss": 2.8606, "step": 230500 }, { "epoch": 0.44, "learning_rate": 4.890005773100021e-05, "loss": 2.8544, "step": 231000 }, { "epoch": 0.44, "learning_rate": 4.889767608907722e-05, "loss": 2.844, "step": 231500 }, { "epoch": 0.44, "learning_rate": 4.8895294447154225e-05, "loss": 2.8514, "step": 232000 }, { "epoch": 0.44, "learning_rate": 4.8892912805231235e-05, "loss": 2.8638, "step": 232500 }, { "epoch": 0.44, "learning_rate": 4.889053116330824e-05, "loss": 2.8432, "step": 233000 }, { "epoch": 0.44, "learning_rate": 4.888814952138524e-05, "loss": 2.8588, "step": 233500 }, { "epoch": 0.45, "learning_rate": 4.888577264274609e-05, "loss": 2.8633, "step": 234000 }, { "epoch": 0.45, "learning_rate": 4.8883391000823095e-05, "loss": 2.8489, "step": 234500 }, { "epoch": 0.45, "learning_rate": 4.8881009358900104e-05, "loss": 2.8559, "step": 235000 }, { "epoch": 0.45, "learning_rate": 4.887862771697711e-05, "loss": 2.8523, "step": 235500 }, { "epoch": 0.45, "learning_rate": 4.887625083833796e-05, "loss": 2.8658, "step": 236000 }, { "epoch": 0.45, "learning_rate": 4.887386919641496e-05, "loss": 2.8777, "step": 236500 }, { "epoch": 0.45, "learning_rate": 4.887148755449197e-05, "loss": 2.8526, "step": 237000 }, { "epoch": 0.45, "learning_rate": 4.8869105912568974e-05, "loss": 2.8665, "step": 237500 }, { "epoch": 0.45, "learning_rate": 4.8866729033929826e-05, "loss": 2.8526, "step": 238000 }, { "epoch": 0.45, "learning_rate": 4.886434739200683e-05, "loss": 2.8565, "step": 238500 }, { "epoch": 0.46, "learning_rate": 4.886197051336768e-05, "loss": 2.8483, "step": 239000 }, { "epoch": 0.46, "learning_rate": 4.885958887144469e-05, "loss": 2.8477, "step": 239500 }, { "epoch": 0.46, "learning_rate": 4.885720722952169e-05, "loss": 2.8218, "step": 240000 }, { "epoch": 0.46, "eval_accuracy": 0.49425977617706335, "eval_loss": 2.7505943775177, "eval_runtime": 4203.7249, "eval_samples_per_second": 65.415, "eval_steps_per_second": 6.542, "step": 240000 }, { "epoch": 0.46, "learning_rate": 4.8854825587598696e-05, "loss": 2.8244, "step": 240500 }, { "epoch": 0.46, "learning_rate": 4.8852443945675705e-05, "loss": 2.8449, "step": 241000 }, { "epoch": 0.46, "learning_rate": 4.885006230375271e-05, "loss": 2.8438, "step": 241500 }, { "epoch": 0.46, "learning_rate": 4.884768066182972e-05, "loss": 2.8285, "step": 242000 }, { "epoch": 0.46, "learning_rate": 4.884529901990672e-05, "loss": 2.8541, "step": 242500 }, { "epoch": 0.46, "learning_rate": 4.884292214126757e-05, "loss": 2.8516, "step": 243000 }, { "epoch": 0.46, "learning_rate": 4.8840545262628424e-05, "loss": 2.8282, "step": 243500 }, { "epoch": 0.46, "learning_rate": 4.883816362070543e-05, "loss": 2.8558, "step": 244000 }, { "epoch": 0.47, "learning_rate": 4.883578197878243e-05, "loss": 2.8516, "step": 244500 }, { "epoch": 0.47, "learning_rate": 4.883340033685944e-05, "loss": 2.8527, "step": 245000 }, { "epoch": 0.47, "learning_rate": 4.883101869493644e-05, "loss": 2.8431, "step": 245500 }, { "epoch": 0.47, "learning_rate": 4.882863705301345e-05, "loss": 2.8627, "step": 246000 }, { "epoch": 0.47, "learning_rate": 4.882625541109045e-05, "loss": 2.843, "step": 246500 }, { "epoch": 0.47, "learning_rate": 4.8823873769167456e-05, "loss": 2.8415, "step": 247000 }, { "epoch": 0.47, "learning_rate": 4.882149689052831e-05, "loss": 2.8414, "step": 247500 }, { "epoch": 0.47, "learning_rate": 4.881911524860531e-05, "loss": 2.8278, "step": 248000 }, { "epoch": 0.47, "learning_rate": 4.881673360668232e-05, "loss": 2.8495, "step": 248500 }, { "epoch": 0.47, "learning_rate": 4.881435196475932e-05, "loss": 2.8635, "step": 249000 }, { "epoch": 0.48, "learning_rate": 4.8811970322836326e-05, "loss": 2.8482, "step": 249500 }, { "epoch": 0.48, "learning_rate": 4.8809588680913335e-05, "loss": 2.8395, "step": 250000 }, { "epoch": 0.48, "learning_rate": 4.880720703899034e-05, "loss": 2.8385, "step": 250500 }, { "epoch": 0.48, "learning_rate": 4.880483016035119e-05, "loss": 2.8411, "step": 251000 }, { "epoch": 0.48, "learning_rate": 4.880244851842819e-05, "loss": 2.8349, "step": 251500 }, { "epoch": 0.48, "learning_rate": 4.88000668765052e-05, "loss": 2.8378, "step": 252000 }, { "epoch": 0.48, "learning_rate": 4.8797685234582205e-05, "loss": 2.8516, "step": 252500 }, { "epoch": 0.48, "learning_rate": 4.879530359265921e-05, "loss": 2.8434, "step": 253000 }, { "epoch": 0.48, "learning_rate": 4.879292195073622e-05, "loss": 2.8206, "step": 253500 }, { "epoch": 0.48, "learning_rate": 4.879054030881322e-05, "loss": 2.8255, "step": 254000 }, { "epoch": 0.48, "learning_rate": 4.878816343017407e-05, "loss": 2.8323, "step": 254500 }, { "epoch": 0.49, "learning_rate": 4.878578178825108e-05, "loss": 2.827, "step": 255000 }, { "epoch": 0.49, "learning_rate": 4.8783404909611927e-05, "loss": 2.8372, "step": 255500 }, { "epoch": 0.49, "learning_rate": 4.8781023267688936e-05, "loss": 2.8374, "step": 256000 }, { "epoch": 0.49, "learning_rate": 4.877864162576594e-05, "loss": 2.8344, "step": 256500 }, { "epoch": 0.49, "learning_rate": 4.877625998384294e-05, "loss": 2.8446, "step": 257000 }, { "epoch": 0.49, "learning_rate": 4.877387834191995e-05, "loss": 2.8356, "step": 257500 }, { "epoch": 0.49, "learning_rate": 4.877149669999695e-05, "loss": 2.8469, "step": 258000 }, { "epoch": 0.49, "learning_rate": 4.876911505807396e-05, "loss": 2.8382, "step": 258500 }, { "epoch": 0.49, "learning_rate": 4.876673341615096e-05, "loss": 2.8439, "step": 259000 }, { "epoch": 0.49, "learning_rate": 4.876435177422797e-05, "loss": 2.8362, "step": 259500 }, { "epoch": 0.5, "learning_rate": 4.876197489558882e-05, "loss": 2.8284, "step": 260000 }, { "epoch": 0.5, "learning_rate": 4.875959325366582e-05, "loss": 2.8452, "step": 260500 }, { "epoch": 0.5, "learning_rate": 4.875721161174283e-05, "loss": 2.8049, "step": 261000 }, { "epoch": 0.5, "learning_rate": 4.8754829969819835e-05, "loss": 2.8301, "step": 261500 }, { "epoch": 0.5, "learning_rate": 4.8752448327896844e-05, "loss": 2.8383, "step": 262000 }, { "epoch": 0.5, "learning_rate": 4.875006668597385e-05, "loss": 2.8385, "step": 262500 }, { "epoch": 0.5, "learning_rate": 4.874768504405085e-05, "loss": 2.8262, "step": 263000 }, { "epoch": 0.5, "learning_rate": 4.874530340212786e-05, "loss": 2.8484, "step": 263500 }, { "epoch": 0.5, "learning_rate": 4.874292176020486e-05, "loss": 2.823, "step": 264000 }, { "epoch": 0.5, "learning_rate": 4.8740544881565714e-05, "loss": 2.8355, "step": 264500 }, { "epoch": 0.5, "learning_rate": 4.873816323964272e-05, "loss": 2.8497, "step": 265000 }, { "epoch": 0.51, "learning_rate": 4.873578636100357e-05, "loss": 2.8228, "step": 265500 }, { "epoch": 0.51, "learning_rate": 4.873340471908057e-05, "loss": 2.8205, "step": 266000 }, { "epoch": 0.51, "learning_rate": 4.873102307715758e-05, "loss": 2.8133, "step": 266500 }, { "epoch": 0.51, "learning_rate": 4.872864143523458e-05, "loss": 2.8296, "step": 267000 }, { "epoch": 0.51, "learning_rate": 4.872625979331159e-05, "loss": 2.8099, "step": 267500 }, { "epoch": 0.51, "learning_rate": 4.8723878151388595e-05, "loss": 2.8249, "step": 268000 }, { "epoch": 0.51, "learning_rate": 4.87214965094656e-05, "loss": 2.823, "step": 268500 }, { "epoch": 0.51, "learning_rate": 4.87191148675426e-05, "loss": 2.8376, "step": 269000 }, { "epoch": 0.51, "learning_rate": 4.871673322561961e-05, "loss": 2.8406, "step": 269500 }, { "epoch": 0.51, "learning_rate": 4.871435158369661e-05, "loss": 2.8271, "step": 270000 }, { "epoch": 0.51, "eval_accuracy": 0.4973672893658376, "eval_loss": 2.72774076461792, "eval_runtime": 4191.8433, "eval_samples_per_second": 65.601, "eval_steps_per_second": 6.56, "step": 270000 }, { "epoch": 0.52, "learning_rate": 4.8711974705057465e-05, "loss": 2.8431, "step": 270500 }, { "epoch": 0.52, "learning_rate": 4.8709593063134474e-05, "loss": 2.7967, "step": 271000 }, { "epoch": 0.52, "learning_rate": 4.870721142121148e-05, "loss": 2.8257, "step": 271500 }, { "epoch": 0.52, "learning_rate": 4.870482977928848e-05, "loss": 2.8253, "step": 272000 }, { "epoch": 0.52, "learning_rate": 4.870244813736549e-05, "loss": 2.8402, "step": 272500 }, { "epoch": 0.52, "learning_rate": 4.8700071258726335e-05, "loss": 2.8292, "step": 273000 }, { "epoch": 0.52, "learning_rate": 4.8697689616803344e-05, "loss": 2.8208, "step": 273500 }, { "epoch": 0.52, "learning_rate": 4.8695307974880347e-05, "loss": 2.8185, "step": 274000 }, { "epoch": 0.52, "learning_rate": 4.8692926332957356e-05, "loss": 2.7985, "step": 274500 }, { "epoch": 0.52, "learning_rate": 4.869054469103436e-05, "loss": 2.8334, "step": 275000 }, { "epoch": 0.52, "learning_rate": 4.868816781239521e-05, "loss": 2.8096, "step": 275500 }, { "epoch": 0.53, "learning_rate": 4.8685786170472213e-05, "loss": 2.8228, "step": 276000 }, { "epoch": 0.53, "learning_rate": 4.868340452854922e-05, "loss": 2.818, "step": 276500 }, { "epoch": 0.53, "learning_rate": 4.8681022886626226e-05, "loss": 2.8238, "step": 277000 }, { "epoch": 0.53, "learning_rate": 4.867864600798708e-05, "loss": 2.8391, "step": 277500 }, { "epoch": 0.53, "learning_rate": 4.867626912934793e-05, "loss": 2.8469, "step": 278000 }, { "epoch": 0.53, "learning_rate": 4.867388748742493e-05, "loss": 2.8091, "step": 278500 }, { "epoch": 0.53, "learning_rate": 4.8671505845501935e-05, "loss": 2.8445, "step": 279000 }, { "epoch": 0.53, "learning_rate": 4.866912896686279e-05, "loss": 2.8206, "step": 279500 }, { "epoch": 0.53, "learning_rate": 4.866674732493979e-05, "loss": 2.8077, "step": 280000 }, { "epoch": 0.53, "learning_rate": 4.86643656830168e-05, "loss": 2.8338, "step": 280500 }, { "epoch": 0.54, "learning_rate": 4.86619840410938e-05, "loss": 2.8354, "step": 281000 }, { "epoch": 0.54, "learning_rate": 4.865960239917081e-05, "loss": 2.8186, "step": 281500 }, { "epoch": 0.54, "learning_rate": 4.8657220757247814e-05, "loss": 2.8138, "step": 282000 }, { "epoch": 0.54, "learning_rate": 4.865483911532482e-05, "loss": 2.8095, "step": 282500 }, { "epoch": 0.54, "learning_rate": 4.8652457473401826e-05, "loss": 2.797, "step": 283000 }, { "epoch": 0.54, "learning_rate": 4.865008059476268e-05, "loss": 2.8117, "step": 283500 }, { "epoch": 0.54, "learning_rate": 4.864769895283968e-05, "loss": 2.8167, "step": 284000 }, { "epoch": 0.54, "learning_rate": 4.864531731091669e-05, "loss": 2.8184, "step": 284500 }, { "epoch": 0.54, "learning_rate": 4.864293566899369e-05, "loss": 2.8027, "step": 285000 }, { "epoch": 0.54, "learning_rate": 4.8640554027070696e-05, "loss": 2.8234, "step": 285500 }, { "epoch": 0.54, "learning_rate": 4.86381723851477e-05, "loss": 2.8145, "step": 286000 }, { "epoch": 0.55, "learning_rate": 4.863579074322471e-05, "loss": 2.8101, "step": 286500 }, { "epoch": 0.55, "learning_rate": 4.863340910130171e-05, "loss": 2.8219, "step": 287000 }, { "epoch": 0.55, "learning_rate": 4.863102745937872e-05, "loss": 2.8181, "step": 287500 }, { "epoch": 0.55, "learning_rate": 4.862865058073957e-05, "loss": 2.7895, "step": 288000 }, { "epoch": 0.55, "learning_rate": 4.8626273702100425e-05, "loss": 2.8217, "step": 288500 }, { "epoch": 0.55, "learning_rate": 4.862389206017742e-05, "loss": 2.8212, "step": 289000 }, { "epoch": 0.55, "learning_rate": 4.862151041825443e-05, "loss": 2.802, "step": 289500 }, { "epoch": 0.55, "learning_rate": 4.861912877633143e-05, "loss": 2.8141, "step": 290000 }, { "epoch": 0.55, "learning_rate": 4.861674713440844e-05, "loss": 2.8115, "step": 290500 }, { "epoch": 0.55, "learning_rate": 4.8614370255769294e-05, "loss": 2.8154, "step": 291000 }, { "epoch": 0.56, "learning_rate": 4.8611993377130146e-05, "loss": 2.8161, "step": 291500 }, { "epoch": 0.56, "learning_rate": 4.860961173520715e-05, "loss": 2.8066, "step": 292000 }, { "epoch": 0.56, "learning_rate": 4.860723009328415e-05, "loss": 2.8004, "step": 292500 }, { "epoch": 0.56, "learning_rate": 4.8604848451361154e-05, "loss": 2.7903, "step": 293000 }, { "epoch": 0.56, "learning_rate": 4.8602466809438164e-05, "loss": 2.8093, "step": 293500 }, { "epoch": 0.56, "learning_rate": 4.8600085167515166e-05, "loss": 2.8132, "step": 294000 }, { "epoch": 0.56, "learning_rate": 4.8597703525592176e-05, "loss": 2.7986, "step": 294500 }, { "epoch": 0.56, "learning_rate": 4.859532188366918e-05, "loss": 2.8011, "step": 295000 }, { "epoch": 0.56, "learning_rate": 4.859294024174618e-05, "loss": 2.829, "step": 295500 }, { "epoch": 0.56, "learning_rate": 4.859055859982319e-05, "loss": 2.8242, "step": 296000 }, { "epoch": 0.56, "learning_rate": 4.858817695790019e-05, "loss": 2.8085, "step": 296500 }, { "epoch": 0.57, "learning_rate": 4.85857953159772e-05, "loss": 2.8147, "step": 297000 }, { "epoch": 0.57, "learning_rate": 4.8583413674054205e-05, "loss": 2.7879, "step": 297500 }, { "epoch": 0.57, "learning_rate": 4.858103679541506e-05, "loss": 2.8046, "step": 298000 }, { "epoch": 0.57, "learning_rate": 4.857865515349206e-05, "loss": 2.7977, "step": 298500 }, { "epoch": 0.57, "learning_rate": 4.857627351156906e-05, "loss": 2.8018, "step": 299000 }, { "epoch": 0.57, "learning_rate": 4.8573896632929915e-05, "loss": 2.8201, "step": 299500 }, { "epoch": 0.57, "learning_rate": 4.8571514991006924e-05, "loss": 2.7841, "step": 300000 }, { "epoch": 0.57, "eval_accuracy": 0.500198121148619, "eval_loss": 2.706984519958496, "eval_runtime": 4175.95, "eval_samples_per_second": 65.85, "eval_steps_per_second": 6.585, "step": 300000 }, { "epoch": 0.57, "learning_rate": 4.856913334908393e-05, "loss": 2.7935, "step": 300500 }, { "epoch": 0.57, "learning_rate": 4.8566751707160936e-05, "loss": 2.7856, "step": 301000 }, { "epoch": 0.57, "learning_rate": 4.856437006523793e-05, "loss": 2.8056, "step": 301500 }, { "epoch": 0.58, "learning_rate": 4.856198842331494e-05, "loss": 2.8014, "step": 302000 }, { "epoch": 0.58, "learning_rate": 4.8559606781391944e-05, "loss": 2.7991, "step": 302500 }, { "epoch": 0.58, "learning_rate": 4.8557225139468954e-05, "loss": 2.8168, "step": 303000 }, { "epoch": 0.58, "learning_rate": 4.855484349754596e-05, "loss": 2.8249, "step": 303500 }, { "epoch": 0.58, "learning_rate": 4.855246661890681e-05, "loss": 2.8117, "step": 304000 }, { "epoch": 0.58, "learning_rate": 4.855008497698382e-05, "loss": 2.7977, "step": 304500 }, { "epoch": 0.58, "learning_rate": 4.854770333506082e-05, "loss": 2.8028, "step": 305000 }, { "epoch": 0.58, "learning_rate": 4.854532169313782e-05, "loss": 2.8074, "step": 305500 }, { "epoch": 0.58, "learning_rate": 4.8542944814498675e-05, "loss": 2.8115, "step": 306000 }, { "epoch": 0.58, "learning_rate": 4.8540563172575685e-05, "loss": 2.8118, "step": 306500 }, { "epoch": 0.58, "learning_rate": 4.853818153065269e-05, "loss": 2.8042, "step": 307000 }, { "epoch": 0.59, "learning_rate": 4.85357998887297e-05, "loss": 2.8004, "step": 307500 }, { "epoch": 0.59, "learning_rate": 4.853342301009054e-05, "loss": 2.8269, "step": 308000 }, { "epoch": 0.59, "learning_rate": 4.8531041368167545e-05, "loss": 2.7948, "step": 308500 }, { "epoch": 0.59, "learning_rate": 4.8528659726244554e-05, "loss": 2.8262, "step": 309000 }, { "epoch": 0.59, "learning_rate": 4.852627808432156e-05, "loss": 2.7992, "step": 309500 }, { "epoch": 0.59, "learning_rate": 4.8523896442398566e-05, "loss": 2.8108, "step": 310000 }, { "epoch": 0.59, "learning_rate": 4.852151956375942e-05, "loss": 2.7821, "step": 310500 }, { "epoch": 0.59, "learning_rate": 4.851913792183642e-05, "loss": 2.8114, "step": 311000 }, { "epoch": 0.59, "learning_rate": 4.8516756279913424e-05, "loss": 2.8033, "step": 311500 }, { "epoch": 0.59, "learning_rate": 4.8514374637990427e-05, "loss": 2.7979, "step": 312000 }, { "epoch": 0.6, "learning_rate": 4.851199775935128e-05, "loss": 2.794, "step": 312500 }, { "epoch": 0.6, "learning_rate": 4.850961611742829e-05, "loss": 2.7865, "step": 313000 }, { "epoch": 0.6, "learning_rate": 4.850723447550529e-05, "loss": 2.7929, "step": 313500 }, { "epoch": 0.6, "learning_rate": 4.85048528335823e-05, "loss": 2.7888, "step": 314000 }, { "epoch": 0.6, "learning_rate": 4.85024711916593e-05, "loss": 2.8074, "step": 314500 }, { "epoch": 0.6, "learning_rate": 4.8500089549736306e-05, "loss": 2.7982, "step": 315000 }, { "epoch": 0.6, "learning_rate": 4.849770790781331e-05, "loss": 2.7926, "step": 315500 }, { "epoch": 0.6, "learning_rate": 4.849532626589032e-05, "loss": 2.8047, "step": 316000 }, { "epoch": 0.6, "learning_rate": 4.849294462396732e-05, "loss": 2.7995, "step": 316500 }, { "epoch": 0.6, "learning_rate": 4.849056774532817e-05, "loss": 2.8089, "step": 317000 }, { "epoch": 0.6, "learning_rate": 4.848818610340518e-05, "loss": 2.8066, "step": 317500 }, { "epoch": 0.61, "learning_rate": 4.8485804461482185e-05, "loss": 2.8025, "step": 318000 }, { "epoch": 0.61, "learning_rate": 4.848342758284303e-05, "loss": 2.8031, "step": 318500 }, { "epoch": 0.61, "learning_rate": 4.848104594092004e-05, "loss": 2.7867, "step": 319000 }, { "epoch": 0.61, "learning_rate": 4.847866429899704e-05, "loss": 2.7925, "step": 319500 }, { "epoch": 0.61, "learning_rate": 4.847628265707405e-05, "loss": 2.8044, "step": 320000 }, { "epoch": 0.61, "learning_rate": 4.847390101515106e-05, "loss": 2.7977, "step": 320500 }, { "epoch": 0.61, "learning_rate": 4.8471524136511906e-05, "loss": 2.8039, "step": 321000 }, { "epoch": 0.61, "learning_rate": 4.846914249458891e-05, "loss": 2.77, "step": 321500 }, { "epoch": 0.61, "learning_rate": 4.846676085266592e-05, "loss": 2.8138, "step": 322000 }, { "epoch": 0.61, "learning_rate": 4.846437921074292e-05, "loss": 2.7847, "step": 322500 }, { "epoch": 0.62, "learning_rate": 4.846199756881993e-05, "loss": 2.8014, "step": 323000 }, { "epoch": 0.62, "learning_rate": 4.845961592689693e-05, "loss": 2.7839, "step": 323500 }, { "epoch": 0.62, "learning_rate": 4.845723428497394e-05, "loss": 2.8027, "step": 324000 }, { "epoch": 0.62, "learning_rate": 4.8454857406334795e-05, "loss": 2.8037, "step": 324500 }, { "epoch": 0.62, "learning_rate": 4.845247576441179e-05, "loss": 2.7796, "step": 325000 }, { "epoch": 0.62, "learning_rate": 4.84500941224888e-05, "loss": 2.7834, "step": 325500 }, { "epoch": 0.62, "learning_rate": 4.84477124805658e-05, "loss": 2.813, "step": 326000 }, { "epoch": 0.62, "learning_rate": 4.844533083864281e-05, "loss": 2.794, "step": 326500 }, { "epoch": 0.62, "learning_rate": 4.8442949196719815e-05, "loss": 2.7567, "step": 327000 }, { "epoch": 0.62, "learning_rate": 4.844057231808067e-05, "loss": 2.8017, "step": 327500 }, { "epoch": 0.62, "learning_rate": 4.843819067615767e-05, "loss": 2.7848, "step": 328000 }, { "epoch": 0.63, "learning_rate": 4.843580903423467e-05, "loss": 2.8048, "step": 328500 }, { "epoch": 0.63, "learning_rate": 4.843342739231168e-05, "loss": 2.7787, "step": 329000 }, { "epoch": 0.63, "learning_rate": 4.8431045750388684e-05, "loss": 2.7954, "step": 329500 }, { "epoch": 0.63, "learning_rate": 4.8428664108465694e-05, "loss": 2.8134, "step": 330000 }, { "epoch": 0.63, "eval_accuracy": 0.5023986686076809, "eval_loss": 2.6885826587677, "eval_runtime": 4173.4446, "eval_samples_per_second": 65.89, "eval_steps_per_second": 6.589, "step": 330000 }, { "epoch": 0.63, "learning_rate": 4.8426282466542696e-05, "loss": 2.7891, "step": 330500 }, { "epoch": 0.63, "learning_rate": 4.84239008246197e-05, "loss": 2.7639, "step": 331000 }, { "epoch": 0.63, "learning_rate": 4.842152394598055e-05, "loss": 2.7844, "step": 331500 }, { "epoch": 0.63, "learning_rate": 4.841914230405756e-05, "loss": 2.7777, "step": 332000 }, { "epoch": 0.63, "learning_rate": 4.841676066213456e-05, "loss": 2.7886, "step": 332500 }, { "epoch": 0.63, "learning_rate": 4.841437902021157e-05, "loss": 2.7717, "step": 333000 }, { "epoch": 0.64, "learning_rate": 4.8411997378288575e-05, "loss": 2.7962, "step": 333500 }, { "epoch": 0.64, "learning_rate": 4.840962049964943e-05, "loss": 2.7873, "step": 334000 }, { "epoch": 0.64, "learning_rate": 4.840723885772643e-05, "loss": 2.7883, "step": 334500 }, { "epoch": 0.64, "learning_rate": 4.840485721580343e-05, "loss": 2.788, "step": 335000 }, { "epoch": 0.64, "learning_rate": 4.840247557388044e-05, "loss": 2.775, "step": 335500 }, { "epoch": 0.64, "learning_rate": 4.8400093931957445e-05, "loss": 2.7683, "step": 336000 }, { "epoch": 0.64, "learning_rate": 4.8397712290034454e-05, "loss": 2.7829, "step": 336500 }, { "epoch": 0.64, "learning_rate": 4.839533064811145e-05, "loss": 2.7823, "step": 337000 }, { "epoch": 0.64, "learning_rate": 4.83929537694723e-05, "loss": 2.7774, "step": 337500 }, { "epoch": 0.64, "learning_rate": 4.839057212754931e-05, "loss": 2.7664, "step": 338000 }, { "epoch": 0.64, "learning_rate": 4.8388190485626314e-05, "loss": 2.7895, "step": 338500 }, { "epoch": 0.65, "learning_rate": 4.8385808843703324e-05, "loss": 2.7755, "step": 339000 }, { "epoch": 0.65, "learning_rate": 4.8383427201780326e-05, "loss": 2.8094, "step": 339500 }, { "epoch": 0.65, "learning_rate": 4.838105032314118e-05, "loss": 2.7654, "step": 340000 }, { "epoch": 0.65, "learning_rate": 4.837866868121818e-05, "loss": 2.78, "step": 340500 }, { "epoch": 0.65, "learning_rate": 4.8376287039295184e-05, "loss": 2.7846, "step": 341000 }, { "epoch": 0.65, "learning_rate": 4.837390539737219e-05, "loss": 2.7928, "step": 341500 }, { "epoch": 0.65, "learning_rate": 4.83715237554492e-05, "loss": 2.7882, "step": 342000 }, { "epoch": 0.65, "learning_rate": 4.8369142113526205e-05, "loss": 2.7776, "step": 342500 }, { "epoch": 0.65, "learning_rate": 4.8366760471603215e-05, "loss": 2.7804, "step": 343000 }, { "epoch": 0.65, "learning_rate": 4.836437882968021e-05, "loss": 2.7786, "step": 343500 }, { "epoch": 0.66, "learning_rate": 4.836199718775722e-05, "loss": 2.7706, "step": 344000 }, { "epoch": 0.66, "learning_rate": 4.835962030911807e-05, "loss": 2.778, "step": 344500 }, { "epoch": 0.66, "learning_rate": 4.8357238667195075e-05, "loss": 2.7803, "step": 345000 }, { "epoch": 0.66, "learning_rate": 4.8354857025272084e-05, "loss": 2.7755, "step": 345500 }, { "epoch": 0.66, "learning_rate": 4.835247538334909e-05, "loss": 2.7542, "step": 346000 }, { "epoch": 0.66, "learning_rate": 4.835009374142609e-05, "loss": 2.7657, "step": 346500 }, { "epoch": 0.66, "learning_rate": 4.834771686278694e-05, "loss": 2.7645, "step": 347000 }, { "epoch": 0.66, "learning_rate": 4.8345339984147794e-05, "loss": 2.7823, "step": 347500 }, { "epoch": 0.66, "learning_rate": 4.83429583422248e-05, "loss": 2.7666, "step": 348000 }, { "epoch": 0.66, "learning_rate": 4.8340576700301806e-05, "loss": 2.7749, "step": 348500 }, { "epoch": 0.66, "learning_rate": 4.833819505837881e-05, "loss": 2.753, "step": 349000 }, { "epoch": 0.67, "learning_rate": 4.833581341645582e-05, "loss": 2.781, "step": 349500 }, { "epoch": 0.67, "learning_rate": 4.833343653781667e-05, "loss": 2.7814, "step": 350000 }, { "epoch": 0.67, "learning_rate": 4.8331054895893666e-05, "loss": 2.7695, "step": 350500 }, { "epoch": 0.67, "learning_rate": 4.8328673253970676e-05, "loss": 2.7446, "step": 351000 }, { "epoch": 0.67, "learning_rate": 4.832629161204768e-05, "loss": 2.7676, "step": 351500 }, { "epoch": 0.67, "learning_rate": 4.832391473340853e-05, "loss": 2.7621, "step": 352000 }, { "epoch": 0.67, "learning_rate": 4.832153309148554e-05, "loss": 2.7819, "step": 352500 }, { "epoch": 0.67, "learning_rate": 4.831915144956254e-05, "loss": 2.7752, "step": 353000 }, { "epoch": 0.67, "learning_rate": 4.8316769807639545e-05, "loss": 2.771, "step": 353500 }, { "epoch": 0.67, "learning_rate": 4.831438816571655e-05, "loss": 2.7715, "step": 354000 }, { "epoch": 0.68, "learning_rate": 4.83120112870774e-05, "loss": 2.7628, "step": 354500 }, { "epoch": 0.68, "learning_rate": 4.830962964515441e-05, "loss": 2.7833, "step": 355000 }, { "epoch": 0.68, "learning_rate": 4.830724800323141e-05, "loss": 2.7831, "step": 355500 }, { "epoch": 0.68, "learning_rate": 4.830486636130842e-05, "loss": 2.7812, "step": 356000 }, { "epoch": 0.68, "learning_rate": 4.8302484719385424e-05, "loss": 2.7579, "step": 356500 }, { "epoch": 0.68, "learning_rate": 4.830010307746243e-05, "loss": 2.7735, "step": 357000 }, { "epoch": 0.68, "learning_rate": 4.8297721435539436e-05, "loss": 2.7532, "step": 357500 }, { "epoch": 0.68, "learning_rate": 4.829533979361644e-05, "loss": 2.7823, "step": 358000 }, { "epoch": 0.68, "learning_rate": 4.8292967678261134e-05, "loss": 2.7576, "step": 358500 }, { "epoch": 0.68, "learning_rate": 4.8290586036338144e-05, "loss": 2.7734, "step": 359000 }, { "epoch": 0.68, "learning_rate": 4.8288204394415146e-05, "loss": 2.7751, "step": 359500 }, { "epoch": 0.69, "learning_rate": 4.8285822752492156e-05, "loss": 2.7722, "step": 360000 }, { "epoch": 0.69, "eval_accuracy": 0.5047063886097096, "eval_loss": 2.6718878746032715, "eval_runtime": 4172.2329, "eval_samples_per_second": 65.909, "eval_steps_per_second": 6.591, "step": 360000 }, { "epoch": 0.69, "learning_rate": 4.828344111056916e-05, "loss": 2.7687, "step": 360500 }, { "epoch": 0.69, "learning_rate": 4.828105946864616e-05, "loss": 2.7759, "step": 361000 }, { "epoch": 0.69, "learning_rate": 4.827867782672317e-05, "loss": 2.7606, "step": 361500 }, { "epoch": 0.69, "learning_rate": 4.8276300948084016e-05, "loss": 2.7773, "step": 362000 }, { "epoch": 0.69, "learning_rate": 4.8273919306161025e-05, "loss": 2.7843, "step": 362500 }, { "epoch": 0.69, "learning_rate": 4.8271537664238035e-05, "loss": 2.7704, "step": 363000 }, { "epoch": 0.69, "learning_rate": 4.826915602231503e-05, "loss": 2.7626, "step": 363500 }, { "epoch": 0.69, "learning_rate": 4.826677438039204e-05, "loss": 2.7691, "step": 364000 }, { "epoch": 0.69, "learning_rate": 4.826439273846904e-05, "loss": 2.7629, "step": 364500 }, { "epoch": 0.7, "learning_rate": 4.826201109654605e-05, "loss": 2.7711, "step": 365000 }, { "epoch": 0.7, "learning_rate": 4.8259629454623054e-05, "loss": 2.7956, "step": 365500 }, { "epoch": 0.7, "learning_rate": 4.825725257598391e-05, "loss": 2.7615, "step": 366000 }, { "epoch": 0.7, "learning_rate": 4.8254870934060916e-05, "loss": 2.774, "step": 366500 }, { "epoch": 0.7, "learning_rate": 4.825248929213791e-05, "loss": 2.7567, "step": 367000 }, { "epoch": 0.7, "learning_rate": 4.825010765021492e-05, "loss": 2.7748, "step": 367500 }, { "epoch": 0.7, "learning_rate": 4.8247726008291924e-05, "loss": 2.7682, "step": 368000 }, { "epoch": 0.7, "learning_rate": 4.8245344366368933e-05, "loss": 2.7669, "step": 368500 }, { "epoch": 0.7, "learning_rate": 4.8242962724445936e-05, "loss": 2.7774, "step": 369000 }, { "epoch": 0.7, "learning_rate": 4.824058584580679e-05, "loss": 2.7809, "step": 369500 }, { "epoch": 0.7, "learning_rate": 4.823820420388379e-05, "loss": 2.7681, "step": 370000 }, { "epoch": 0.71, "learning_rate": 4.82358225619608e-05, "loss": 2.7933, "step": 370500 }, { "epoch": 0.71, "learning_rate": 4.82334409200378e-05, "loss": 2.7741, "step": 371000 }, { "epoch": 0.71, "learning_rate": 4.8231064041398655e-05, "loss": 2.7727, "step": 371500 }, { "epoch": 0.71, "learning_rate": 4.822868716275951e-05, "loss": 2.7705, "step": 372000 }, { "epoch": 0.71, "learning_rate": 4.822630552083651e-05, "loss": 2.765, "step": 372500 }, { "epoch": 0.71, "learning_rate": 4.822392387891352e-05, "loss": 2.7541, "step": 373000 }, { "epoch": 0.71, "learning_rate": 4.822154223699052e-05, "loss": 2.7461, "step": 373500 }, { "epoch": 0.71, "learning_rate": 4.8219160595067525e-05, "loss": 2.763, "step": 374000 }, { "epoch": 0.71, "learning_rate": 4.8216778953144534e-05, "loss": 2.783, "step": 374500 }, { "epoch": 0.71, "learning_rate": 4.821440207450538e-05, "loss": 2.7699, "step": 375000 }, { "epoch": 0.72, "learning_rate": 4.821202043258239e-05, "loss": 2.7568, "step": 375500 }, { "epoch": 0.72, "learning_rate": 4.820963879065939e-05, "loss": 2.7636, "step": 376000 }, { "epoch": 0.72, "learning_rate": 4.82072571487364e-05, "loss": 2.767, "step": 376500 }, { "epoch": 0.72, "learning_rate": 4.8204875506813404e-05, "loss": 2.7544, "step": 377000 }, { "epoch": 0.72, "learning_rate": 4.8202493864890406e-05, "loss": 2.7735, "step": 377500 }, { "epoch": 0.72, "learning_rate": 4.8200112222967416e-05, "loss": 2.7645, "step": 378000 }, { "epoch": 0.72, "learning_rate": 4.819773058104442e-05, "loss": 2.7735, "step": 378500 }, { "epoch": 0.72, "learning_rate": 4.819534893912143e-05, "loss": 2.7481, "step": 379000 }, { "epoch": 0.72, "learning_rate": 4.8192967297198424e-05, "loss": 2.7531, "step": 379500 }, { "epoch": 0.72, "learning_rate": 4.8190590418559276e-05, "loss": 2.7567, "step": 380000 }, { "epoch": 0.72, "learning_rate": 4.8188208776636285e-05, "loss": 2.7582, "step": 380500 }, { "epoch": 0.73, "learning_rate": 4.818582713471329e-05, "loss": 2.7676, "step": 381000 }, { "epoch": 0.73, "learning_rate": 4.81834454927903e-05, "loss": 2.7603, "step": 381500 }, { "epoch": 0.73, "learning_rate": 4.818106861415115e-05, "loss": 2.7491, "step": 382000 }, { "epoch": 0.73, "learning_rate": 4.817868697222815e-05, "loss": 2.7478, "step": 382500 }, { "epoch": 0.73, "learning_rate": 4.8176305330305155e-05, "loss": 2.7658, "step": 383000 }, { "epoch": 0.73, "learning_rate": 4.817392368838216e-05, "loss": 2.7579, "step": 383500 }, { "epoch": 0.73, "learning_rate": 4.817154680974301e-05, "loss": 2.7648, "step": 384000 }, { "epoch": 0.73, "learning_rate": 4.816916516782002e-05, "loss": 2.7474, "step": 384500 }, { "epoch": 0.73, "learning_rate": 4.816678828918087e-05, "loss": 2.757, "step": 385000 }, { "epoch": 0.73, "learning_rate": 4.8164406647257874e-05, "loss": 2.7492, "step": 385500 }, { "epoch": 0.74, "learning_rate": 4.8162025005334884e-05, "loss": 2.7793, "step": 386000 }, { "epoch": 0.74, "learning_rate": 4.815964336341188e-05, "loss": 2.7435, "step": 386500 }, { "epoch": 0.74, "learning_rate": 4.815726172148889e-05, "loss": 2.7668, "step": 387000 }, { "epoch": 0.74, "learning_rate": 4.81548800795659e-05, "loss": 2.7458, "step": 387500 }, { "epoch": 0.74, "learning_rate": 4.81524984376429e-05, "loss": 2.762, "step": 388000 }, { "epoch": 0.74, "learning_rate": 4.815012155900375e-05, "loss": 2.7411, "step": 388500 }, { "epoch": 0.74, "learning_rate": 4.8147739917080756e-05, "loss": 2.7492, "step": 389000 }, { "epoch": 0.74, "learning_rate": 4.8145358275157765e-05, "loss": 2.767, "step": 389500 }, { "epoch": 0.74, "learning_rate": 4.814297663323477e-05, "loss": 2.7593, "step": 390000 }, { "epoch": 0.74, "eval_accuracy": 0.5064444507671825, "eval_loss": 2.6576755046844482, "eval_runtime": 4177.589, "eval_samples_per_second": 65.825, "eval_steps_per_second": 6.583, "step": 390000 }, { "epoch": 0.74, "learning_rate": 4.814059499131177e-05, "loss": 2.7503, "step": 390500 }, { "epoch": 0.74, "learning_rate": 4.813821334938878e-05, "loss": 2.753, "step": 391000 }, { "epoch": 0.75, "learning_rate": 4.813583170746578e-05, "loss": 2.7593, "step": 391500 }, { "epoch": 0.75, "learning_rate": 4.813345006554279e-05, "loss": 2.7521, "step": 392000 }, { "epoch": 0.75, "learning_rate": 4.8131073186903644e-05, "loss": 2.7625, "step": 392500 }, { "epoch": 0.75, "learning_rate": 4.812869154498064e-05, "loss": 2.7699, "step": 393000 }, { "epoch": 0.75, "learning_rate": 4.812630990305765e-05, "loss": 2.7453, "step": 393500 }, { "epoch": 0.75, "learning_rate": 4.81239330244185e-05, "loss": 2.7504, "step": 394000 }, { "epoch": 0.75, "learning_rate": 4.8121556145779354e-05, "loss": 2.7526, "step": 394500 }, { "epoch": 0.75, "learning_rate": 4.8119174503856357e-05, "loss": 2.7593, "step": 395000 }, { "epoch": 0.75, "learning_rate": 4.8116792861933366e-05, "loss": 2.7597, "step": 395500 }, { "epoch": 0.75, "learning_rate": 4.811441122001037e-05, "loss": 2.7369, "step": 396000 }, { "epoch": 0.76, "learning_rate": 4.811202957808737e-05, "loss": 2.7491, "step": 396500 }, { "epoch": 0.76, "learning_rate": 4.8109647936164374e-05, "loss": 2.7435, "step": 397000 }, { "epoch": 0.76, "learning_rate": 4.810726629424138e-05, "loss": 2.7442, "step": 397500 }, { "epoch": 0.76, "learning_rate": 4.8104884652318386e-05, "loss": 2.7505, "step": 398000 }, { "epoch": 0.76, "learning_rate": 4.810250777367924e-05, "loss": 2.7656, "step": 398500 }, { "epoch": 0.76, "learning_rate": 4.810012613175625e-05, "loss": 2.7373, "step": 399000 }, { "epoch": 0.76, "learning_rate": 4.809774448983325e-05, "loss": 2.7536, "step": 399500 }, { "epoch": 0.76, "learning_rate": 4.809536284791025e-05, "loss": 2.7409, "step": 400000 }, { "epoch": 0.76, "learning_rate": 4.8092981205987255e-05, "loss": 2.7464, "step": 400500 }, { "epoch": 0.76, "learning_rate": 4.8090599564064265e-05, "loss": 2.7728, "step": 401000 }, { "epoch": 0.76, "learning_rate": 4.8088217922141274e-05, "loss": 2.7601, "step": 401500 }, { "epoch": 0.77, "learning_rate": 4.808583628021828e-05, "loss": 2.7271, "step": 402000 }, { "epoch": 0.77, "learning_rate": 4.808345940157913e-05, "loss": 2.745, "step": 402500 }, { "epoch": 0.77, "learning_rate": 4.808107775965613e-05, "loss": 2.7669, "step": 403000 }, { "epoch": 0.77, "learning_rate": 4.8078696117733134e-05, "loss": 2.7608, "step": 403500 }, { "epoch": 0.77, "learning_rate": 4.8076314475810144e-05, "loss": 2.727, "step": 404000 }, { "epoch": 0.77, "learning_rate": 4.8073932833887147e-05, "loss": 2.759, "step": 404500 }, { "epoch": 0.77, "learning_rate": 4.8071551191964156e-05, "loss": 2.7529, "step": 405000 }, { "epoch": 0.77, "learning_rate": 4.806916955004116e-05, "loss": 2.7402, "step": 405500 }, { "epoch": 0.77, "learning_rate": 4.806678790811816e-05, "loss": 2.7362, "step": 406000 }, { "epoch": 0.77, "learning_rate": 4.8064411029479013e-05, "loss": 2.7643, "step": 406500 }, { "epoch": 0.78, "learning_rate": 4.8062029387556016e-05, "loss": 2.7695, "step": 407000 }, { "epoch": 0.78, "learning_rate": 4.8059647745633025e-05, "loss": 2.756, "step": 407500 }, { "epoch": 0.78, "learning_rate": 4.805726610371003e-05, "loss": 2.7311, "step": 408000 }, { "epoch": 0.78, "learning_rate": 4.805488922507088e-05, "loss": 2.7209, "step": 408500 }, { "epoch": 0.78, "learning_rate": 4.805250758314789e-05, "loss": 2.7285, "step": 409000 }, { "epoch": 0.78, "learning_rate": 4.8050125941224886e-05, "loss": 2.7606, "step": 409500 }, { "epoch": 0.78, "learning_rate": 4.8047744299301895e-05, "loss": 2.7473, "step": 410000 }, { "epoch": 0.78, "learning_rate": 4.80453626573789e-05, "loss": 2.7492, "step": 410500 }, { "epoch": 0.78, "learning_rate": 4.804298101545591e-05, "loss": 2.7439, "step": 411000 }, { "epoch": 0.78, "learning_rate": 4.804060413681676e-05, "loss": 2.7685, "step": 411500 }, { "epoch": 0.78, "learning_rate": 4.803822249489376e-05, "loss": 2.7603, "step": 412000 }, { "epoch": 0.79, "learning_rate": 4.8035840852970765e-05, "loss": 2.7721, "step": 412500 }, { "epoch": 0.79, "learning_rate": 4.8033459211047774e-05, "loss": 2.7582, "step": 413000 }, { "epoch": 0.79, "learning_rate": 4.803107756912478e-05, "loss": 2.7515, "step": 413500 }, { "epoch": 0.79, "learning_rate": 4.8028695927201786e-05, "loss": 2.7588, "step": 414000 }, { "epoch": 0.79, "learning_rate": 4.802631428527879e-05, "loss": 2.7661, "step": 414500 }, { "epoch": 0.79, "learning_rate": 4.802393740663964e-05, "loss": 2.7528, "step": 415000 }, { "epoch": 0.79, "learning_rate": 4.8021555764716644e-05, "loss": 2.7596, "step": 415500 }, { "epoch": 0.79, "learning_rate": 4.8019174122793646e-05, "loss": 2.7261, "step": 416000 }, { "epoch": 0.79, "learning_rate": 4.8016792480870656e-05, "loss": 2.7564, "step": 416500 }, { "epoch": 0.79, "learning_rate": 4.801441083894766e-05, "loss": 2.7222, "step": 417000 }, { "epoch": 0.8, "learning_rate": 4.801202919702467e-05, "loss": 2.77, "step": 417500 }, { "epoch": 0.8, "learning_rate": 4.800964755510167e-05, "loss": 2.7293, "step": 418000 }, { "epoch": 0.8, "learning_rate": 4.800726591317867e-05, "loss": 2.7385, "step": 418500 }, { "epoch": 0.8, "learning_rate": 4.8004889034539525e-05, "loss": 2.7334, "step": 419000 }, { "epoch": 0.8, "learning_rate": 4.800250739261653e-05, "loss": 2.7317, "step": 419500 }, { "epoch": 0.8, "learning_rate": 4.800012575069354e-05, "loss": 2.7521, "step": 420000 }, { "epoch": 0.8, "eval_accuracy": 0.5083140640307475, "eval_loss": 2.643585681915283, "eval_runtime": 4178.2521, "eval_samples_per_second": 65.814, "eval_steps_per_second": 6.581, "step": 420000 }, { "epoch": 0.8, "learning_rate": 4.799774410877054e-05, "loss": 2.745, "step": 420500 }, { "epoch": 0.8, "learning_rate": 4.799536246684755e-05, "loss": 2.7598, "step": 421000 }, { "epoch": 0.8, "learning_rate": 4.79929855882084e-05, "loss": 2.7404, "step": 421500 }, { "epoch": 0.8, "learning_rate": 4.79906039462854e-05, "loss": 2.7466, "step": 422000 }, { "epoch": 0.8, "learning_rate": 4.798822230436241e-05, "loss": 2.7527, "step": 422500 }, { "epoch": 0.81, "learning_rate": 4.7985840662439416e-05, "loss": 2.7432, "step": 423000 }, { "epoch": 0.81, "learning_rate": 4.798346854708411e-05, "loss": 2.7455, "step": 423500 }, { "epoch": 0.81, "learning_rate": 4.7981086905161114e-05, "loss": 2.7307, "step": 424000 }, { "epoch": 0.81, "learning_rate": 4.797870526323812e-05, "loss": 2.7455, "step": 424500 }, { "epoch": 0.81, "learning_rate": 4.7976323621315126e-05, "loss": 2.7677, "step": 425000 }, { "epoch": 0.81, "learning_rate": 4.797394197939213e-05, "loss": 2.7538, "step": 425500 }, { "epoch": 0.81, "learning_rate": 4.797156033746914e-05, "loss": 2.7455, "step": 426000 }, { "epoch": 0.81, "learning_rate": 4.796917869554614e-05, "loss": 2.7378, "step": 426500 }, { "epoch": 0.81, "learning_rate": 4.796679705362315e-05, "loss": 2.7274, "step": 427000 }, { "epoch": 0.81, "learning_rate": 4.7964420174983996e-05, "loss": 2.7472, "step": 427500 }, { "epoch": 0.82, "learning_rate": 4.7962038533061005e-05, "loss": 2.7466, "step": 428000 }, { "epoch": 0.82, "learning_rate": 4.795965689113801e-05, "loss": 2.7461, "step": 428500 }, { "epoch": 0.82, "learning_rate": 4.795727524921501e-05, "loss": 2.7417, "step": 429000 }, { "epoch": 0.82, "learning_rate": 4.795489360729202e-05, "loss": 2.7433, "step": 429500 }, { "epoch": 0.82, "learning_rate": 4.795251672865287e-05, "loss": 2.7392, "step": 430000 }, { "epoch": 0.82, "learning_rate": 4.7950135086729875e-05, "loss": 2.7303, "step": 430500 }, { "epoch": 0.82, "learning_rate": 4.7947753444806884e-05, "loss": 2.7368, "step": 431000 }, { "epoch": 0.82, "learning_rate": 4.7945371802883887e-05, "loss": 2.7358, "step": 431500 }, { "epoch": 0.82, "learning_rate": 4.794299492424474e-05, "loss": 2.742, "step": 432000 }, { "epoch": 0.82, "learning_rate": 4.794061328232174e-05, "loss": 2.739, "step": 432500 }, { "epoch": 0.83, "learning_rate": 4.7938231640398744e-05, "loss": 2.7513, "step": 433000 }, { "epoch": 0.83, "learning_rate": 4.7935849998475754e-05, "loss": 2.7482, "step": 433500 }, { "epoch": 0.83, "learning_rate": 4.7933473119836606e-05, "loss": 2.7302, "step": 434000 }, { "epoch": 0.83, "learning_rate": 4.793109147791361e-05, "loss": 2.7423, "step": 434500 }, { "epoch": 0.83, "learning_rate": 4.792870983599062e-05, "loss": 2.7361, "step": 435000 }, { "epoch": 0.83, "learning_rate": 4.7926328194067614e-05, "loss": 2.7432, "step": 435500 }, { "epoch": 0.83, "learning_rate": 4.7923951315428466e-05, "loss": 2.7637, "step": 436000 }, { "epoch": 0.83, "learning_rate": 4.7921569673505475e-05, "loss": 2.7377, "step": 436500 }, { "epoch": 0.83, "learning_rate": 4.791918803158248e-05, "loss": 2.697, "step": 437000 }, { "epoch": 0.83, "learning_rate": 4.791680638965949e-05, "loss": 2.7247, "step": 437500 }, { "epoch": 0.83, "learning_rate": 4.791442474773649e-05, "loss": 2.7322, "step": 438000 }, { "epoch": 0.84, "learning_rate": 4.791204310581349e-05, "loss": 2.7595, "step": 438500 }, { "epoch": 0.84, "learning_rate": 4.7909661463890495e-05, "loss": 2.7616, "step": 439000 }, { "epoch": 0.84, "learning_rate": 4.7907279821967505e-05, "loss": 2.7307, "step": 439500 }, { "epoch": 0.84, "learning_rate": 4.790490294332836e-05, "loss": 2.7414, "step": 440000 }, { "epoch": 0.84, "learning_rate": 4.790252130140536e-05, "loss": 2.7468, "step": 440500 }, { "epoch": 0.84, "learning_rate": 4.790013965948237e-05, "loss": 2.7226, "step": 441000 }, { "epoch": 0.84, "learning_rate": 4.789776278084322e-05, "loss": 2.7268, "step": 441500 }, { "epoch": 0.84, "learning_rate": 4.7895381138920224e-05, "loss": 2.7457, "step": 442000 }, { "epoch": 0.84, "learning_rate": 4.7892999496997227e-05, "loss": 2.7279, "step": 442500 }, { "epoch": 0.84, "learning_rate": 4.789061785507423e-05, "loss": 2.7526, "step": 443000 }, { "epoch": 0.85, "learning_rate": 4.788823621315124e-05, "loss": 2.7482, "step": 443500 }, { "epoch": 0.85, "learning_rate": 4.788585457122825e-05, "loss": 2.7305, "step": 444000 }, { "epoch": 0.85, "learning_rate": 4.788347292930525e-05, "loss": 2.716, "step": 444500 }, { "epoch": 0.85, "learning_rate": 4.788109128738225e-05, "loss": 2.7294, "step": 445000 }, { "epoch": 0.85, "learning_rate": 4.7878714408743106e-05, "loss": 2.7266, "step": 445500 }, { "epoch": 0.85, "learning_rate": 4.787633276682011e-05, "loss": 2.7358, "step": 446000 }, { "epoch": 0.85, "learning_rate": 4.787395112489712e-05, "loss": 2.7469, "step": 446500 }, { "epoch": 0.85, "learning_rate": 4.787156948297412e-05, "loss": 2.7221, "step": 447000 }, { "epoch": 0.85, "learning_rate": 4.786918784105113e-05, "loss": 2.739, "step": 447500 }, { "epoch": 0.85, "learning_rate": 4.786681096241198e-05, "loss": 2.7245, "step": 448000 }, { "epoch": 0.85, "learning_rate": 4.786442932048898e-05, "loss": 2.7398, "step": 448500 }, { "epoch": 0.86, "learning_rate": 4.786204767856599e-05, "loss": 2.7258, "step": 449000 }, { "epoch": 0.86, "learning_rate": 4.785966603664299e-05, "loss": 2.7333, "step": 449500 }, { "epoch": 0.86, "learning_rate": 4.785728915800384e-05, "loss": 2.7293, "step": 450000 }, { "epoch": 0.86, "eval_accuracy": 0.5099124879968102, "eval_loss": 2.6340997219085693, "eval_runtime": 4180.1732, "eval_samples_per_second": 65.784, "eval_steps_per_second": 6.578, "step": 450000 }, { "epoch": 0.86, "learning_rate": 4.7854912279364694e-05, "loss": 2.7176, "step": 450500 }, { "epoch": 0.86, "learning_rate": 4.7852530637441704e-05, "loss": 2.7456, "step": 451000 }, { "epoch": 0.86, "learning_rate": 4.7850148995518706e-05, "loss": 2.7338, "step": 451500 }, { "epoch": 0.86, "learning_rate": 4.7847767353595716e-05, "loss": 2.7321, "step": 452000 }, { "epoch": 0.86, "learning_rate": 4.784538571167271e-05, "loss": 2.7339, "step": 452500 }, { "epoch": 0.86, "learning_rate": 4.784300406974972e-05, "loss": 2.7346, "step": 453000 }, { "epoch": 0.86, "learning_rate": 4.7840622427826724e-05, "loss": 2.728, "step": 453500 }, { "epoch": 0.87, "learning_rate": 4.783824078590373e-05, "loss": 2.7252, "step": 454000 }, { "epoch": 0.87, "learning_rate": 4.7835859143980736e-05, "loss": 2.7292, "step": 454500 }, { "epoch": 0.87, "learning_rate": 4.783348226534159e-05, "loss": 2.7064, "step": 455000 }, { "epoch": 0.87, "learning_rate": 4.783110538670244e-05, "loss": 2.7381, "step": 455500 }, { "epoch": 0.87, "learning_rate": 4.782872374477944e-05, "loss": 2.7256, "step": 456000 }, { "epoch": 0.87, "learning_rate": 4.7826342102856445e-05, "loss": 2.7205, "step": 456500 }, { "epoch": 0.87, "learning_rate": 4.7823960460933455e-05, "loss": 2.7384, "step": 457000 }, { "epoch": 0.87, "learning_rate": 4.782157881901046e-05, "loss": 2.728, "step": 457500 }, { "epoch": 0.87, "learning_rate": 4.781919717708747e-05, "loss": 2.7204, "step": 458000 }, { "epoch": 0.87, "learning_rate": 4.781682029844832e-05, "loss": 2.7063, "step": 458500 }, { "epoch": 0.87, "learning_rate": 4.7814438656525315e-05, "loss": 2.732, "step": 459000 }, { "epoch": 0.88, "learning_rate": 4.7812057014602324e-05, "loss": 2.7219, "step": 459500 }, { "epoch": 0.88, "learning_rate": 4.780967537267933e-05, "loss": 2.7291, "step": 460000 }, { "epoch": 0.88, "learning_rate": 4.7807293730756336e-05, "loss": 2.716, "step": 460500 }, { "epoch": 0.88, "learning_rate": 4.780491208883334e-05, "loss": 2.7414, "step": 461000 }, { "epoch": 0.88, "learning_rate": 4.780253044691035e-05, "loss": 2.7323, "step": 461500 }, { "epoch": 0.88, "learning_rate": 4.780014880498735e-05, "loss": 2.7508, "step": 462000 }, { "epoch": 0.88, "learning_rate": 4.7797771926348203e-05, "loss": 2.7341, "step": 462500 }, { "epoch": 0.88, "learning_rate": 4.7795390284425206e-05, "loss": 2.7164, "step": 463000 }, { "epoch": 0.88, "learning_rate": 4.779301340578606e-05, "loss": 2.7293, "step": 463500 }, { "epoch": 0.88, "learning_rate": 4.779063176386306e-05, "loss": 2.7404, "step": 464000 }, { "epoch": 0.89, "learning_rate": 4.778825012194007e-05, "loss": 2.7289, "step": 464500 }, { "epoch": 0.89, "learning_rate": 4.778586848001708e-05, "loss": 2.7359, "step": 465000 }, { "epoch": 0.89, "learning_rate": 4.7783486838094076e-05, "loss": 2.7256, "step": 465500 }, { "epoch": 0.89, "learning_rate": 4.778110995945493e-05, "loss": 2.7186, "step": 466000 }, { "epoch": 0.89, "learning_rate": 4.777872831753194e-05, "loss": 2.7209, "step": 466500 }, { "epoch": 0.89, "learning_rate": 4.777634667560894e-05, "loss": 2.7248, "step": 467000 }, { "epoch": 0.89, "learning_rate": 4.777396503368595e-05, "loss": 2.7097, "step": 467500 }, { "epoch": 0.89, "learning_rate": 4.777158339176295e-05, "loss": 2.7012, "step": 468000 }, { "epoch": 0.89, "learning_rate": 4.7769206513123804e-05, "loss": 2.7131, "step": 468500 }, { "epoch": 0.89, "learning_rate": 4.7766829634484657e-05, "loss": 2.7168, "step": 469000 }, { "epoch": 0.89, "learning_rate": 4.776444799256166e-05, "loss": 2.7249, "step": 469500 }, { "epoch": 0.9, "learning_rate": 4.776206635063866e-05, "loss": 2.7321, "step": 470000 }, { "epoch": 0.9, "learning_rate": 4.775968470871567e-05, "loss": 2.7188, "step": 470500 }, { "epoch": 0.9, "learning_rate": 4.7757303066792674e-05, "loss": 2.7528, "step": 471000 }, { "epoch": 0.9, "learning_rate": 4.775492142486968e-05, "loss": 2.7383, "step": 471500 }, { "epoch": 0.9, "learning_rate": 4.775253978294668e-05, "loss": 2.7228, "step": 472000 }, { "epoch": 0.9, "learning_rate": 4.775015814102369e-05, "loss": 2.6993, "step": 472500 }, { "epoch": 0.9, "learning_rate": 4.774777649910069e-05, "loss": 2.7284, "step": 473000 }, { "epoch": 0.9, "learning_rate": 4.77453948571777e-05, "loss": 2.7161, "step": 473500 }, { "epoch": 0.9, "learning_rate": 4.774301797853855e-05, "loss": 2.7222, "step": 474000 }, { "epoch": 0.9, "learning_rate": 4.7740636336615555e-05, "loss": 2.7166, "step": 474500 }, { "epoch": 0.91, "learning_rate": 4.7738254694692565e-05, "loss": 2.727, "step": 475000 }, { "epoch": 0.91, "learning_rate": 4.773587305276957e-05, "loss": 2.7339, "step": 475500 }, { "epoch": 0.91, "learning_rate": 4.773349141084657e-05, "loss": 2.7084, "step": 476000 }, { "epoch": 0.91, "learning_rate": 4.773110976892358e-05, "loss": 2.724, "step": 476500 }, { "epoch": 0.91, "learning_rate": 4.7728732890284425e-05, "loss": 2.7394, "step": 477000 }, { "epoch": 0.91, "learning_rate": 4.7726351248361434e-05, "loss": 2.7284, "step": 477500 }, { "epoch": 0.91, "learning_rate": 4.772396960643844e-05, "loss": 2.7414, "step": 478000 }, { "epoch": 0.91, "learning_rate": 4.772158796451544e-05, "loss": 2.7375, "step": 478500 }, { "epoch": 0.91, "learning_rate": 4.771920632259245e-05, "loss": 2.7253, "step": 479000 }, { "epoch": 0.91, "learning_rate": 4.771682468066945e-05, "loss": 2.724, "step": 479500 }, { "epoch": 0.91, "learning_rate": 4.7714447802030304e-05, "loss": 2.7123, "step": 480000 }, { "epoch": 0.91, "eval_accuracy": 0.5108100264956377, "eval_loss": 2.6254332065582275, "eval_runtime": 4173.1002, "eval_samples_per_second": 65.895, "eval_steps_per_second": 6.59, "step": 480000 }, { "epoch": 0.92, "learning_rate": 4.771206616010731e-05, "loss": 2.731, "step": 480500 }, { "epoch": 0.92, "learning_rate": 4.7709684518184316e-05, "loss": 2.7158, "step": 481000 }, { "epoch": 0.92, "learning_rate": 4.770730287626132e-05, "loss": 2.7145, "step": 481500 }, { "epoch": 0.92, "learning_rate": 4.770492599762217e-05, "loss": 2.707, "step": 482000 }, { "epoch": 0.92, "learning_rate": 4.770254911898302e-05, "loss": 2.729, "step": 482500 }, { "epoch": 0.92, "learning_rate": 4.7700167477060026e-05, "loss": 2.7155, "step": 483000 }, { "epoch": 0.92, "learning_rate": 4.7697785835137035e-05, "loss": 2.7178, "step": 483500 }, { "epoch": 0.92, "learning_rate": 4.769540419321404e-05, "loss": 2.7185, "step": 484000 }, { "epoch": 0.92, "learning_rate": 4.769302255129105e-05, "loss": 2.7339, "step": 484500 }, { "epoch": 0.92, "learning_rate": 4.769064090936805e-05, "loss": 2.706, "step": 485000 }, { "epoch": 0.93, "learning_rate": 4.768825926744505e-05, "loss": 2.7256, "step": 485500 }, { "epoch": 0.93, "learning_rate": 4.7685877625522055e-05, "loss": 2.7252, "step": 486000 }, { "epoch": 0.93, "learning_rate": 4.7683495983599065e-05, "loss": 2.7243, "step": 486500 }, { "epoch": 0.93, "learning_rate": 4.768111910495992e-05, "loss": 2.7143, "step": 487000 }, { "epoch": 0.93, "learning_rate": 4.767873746303692e-05, "loss": 2.7208, "step": 487500 }, { "epoch": 0.93, "learning_rate": 4.767635582111393e-05, "loss": 2.7145, "step": 488000 }, { "epoch": 0.93, "learning_rate": 4.7673974179190925e-05, "loss": 2.7276, "step": 488500 }, { "epoch": 0.93, "learning_rate": 4.7671592537267934e-05, "loss": 2.713, "step": 489000 }, { "epoch": 0.93, "learning_rate": 4.7669215658628786e-05, "loss": 2.7213, "step": 489500 }, { "epoch": 0.93, "learning_rate": 4.766683401670579e-05, "loss": 2.7337, "step": 490000 }, { "epoch": 0.93, "learning_rate": 4.76644523747828e-05, "loss": 2.7299, "step": 490500 }, { "epoch": 0.94, "learning_rate": 4.76620707328598e-05, "loss": 2.7069, "step": 491000 }, { "epoch": 0.94, "learning_rate": 4.7659689090936804e-05, "loss": 2.7238, "step": 491500 }, { "epoch": 0.94, "learning_rate": 4.765730744901381e-05, "loss": 2.73, "step": 492000 }, { "epoch": 0.94, "learning_rate": 4.7654925807090816e-05, "loss": 2.7257, "step": 492500 }, { "epoch": 0.94, "learning_rate": 4.7652544165167825e-05, "loss": 2.7145, "step": 493000 }, { "epoch": 0.94, "learning_rate": 4.765016728652868e-05, "loss": 2.7159, "step": 493500 }, { "epoch": 0.94, "learning_rate": 4.764778564460568e-05, "loss": 2.7324, "step": 494000 }, { "epoch": 0.94, "learning_rate": 4.764540400268269e-05, "loss": 2.6913, "step": 494500 }, { "epoch": 0.94, "learning_rate": 4.7643022360759685e-05, "loss": 2.7101, "step": 495000 }, { "epoch": 0.94, "learning_rate": 4.764064548212054e-05, "loss": 2.723, "step": 495500 }, { "epoch": 0.95, "learning_rate": 4.763826384019755e-05, "loss": 2.7109, "step": 496000 }, { "epoch": 0.95, "learning_rate": 4.763588219827455e-05, "loss": 2.726, "step": 496500 }, { "epoch": 0.95, "learning_rate": 4.763350055635156e-05, "loss": 2.7263, "step": 497000 }, { "epoch": 0.95, "learning_rate": 4.763112367771241e-05, "loss": 2.7163, "step": 497500 }, { "epoch": 0.95, "learning_rate": 4.7628742035789414e-05, "loss": 2.7243, "step": 498000 }, { "epoch": 0.95, "learning_rate": 4.7626365157150266e-05, "loss": 2.7335, "step": 498500 }, { "epoch": 0.95, "learning_rate": 4.762398351522727e-05, "loss": 2.7061, "step": 499000 }, { "epoch": 0.95, "learning_rate": 4.762160187330427e-05, "loss": 2.7184, "step": 499500 }, { "epoch": 0.95, "learning_rate": 4.761922023138128e-05, "loss": 2.7094, "step": 500000 }, { "epoch": 0.95, "learning_rate": 4.7616838589458283e-05, "loss": 2.7428, "step": 500500 }, { "epoch": 0.95, "learning_rate": 4.7614461710819136e-05, "loss": 2.7265, "step": 501000 }, { "epoch": 0.96, "learning_rate": 4.7612080068896145e-05, "loss": 2.7154, "step": 501500 }, { "epoch": 0.96, "learning_rate": 4.760969842697314e-05, "loss": 2.722, "step": 502000 }, { "epoch": 0.96, "learning_rate": 4.760731678505015e-05, "loss": 2.7179, "step": 502500 }, { "epoch": 0.96, "learning_rate": 4.7604939906411e-05, "loss": 2.7312, "step": 503000 }, { "epoch": 0.96, "learning_rate": 4.7602558264488005e-05, "loss": 2.7023, "step": 503500 }, { "epoch": 0.96, "learning_rate": 4.7600176622565015e-05, "loss": 2.7055, "step": 504000 }, { "epoch": 0.96, "learning_rate": 4.759779498064202e-05, "loss": 2.7109, "step": 504500 }, { "epoch": 0.96, "learning_rate": 4.759541333871902e-05, "loss": 2.7235, "step": 505000 }, { "epoch": 0.96, "learning_rate": 4.759303169679602e-05, "loss": 2.712, "step": 505500 }, { "epoch": 0.96, "learning_rate": 4.759065005487303e-05, "loss": 2.7299, "step": 506000 }, { "epoch": 0.97, "learning_rate": 4.7588268412950035e-05, "loss": 2.6988, "step": 506500 }, { "epoch": 0.97, "learning_rate": 4.758589153431089e-05, "loss": 2.7079, "step": 507000 }, { "epoch": 0.97, "learning_rate": 4.7583509892387896e-05, "loss": 2.7193, "step": 507500 }, { "epoch": 0.97, "learning_rate": 4.75811282504649e-05, "loss": 2.7229, "step": 508000 }, { "epoch": 0.97, "learning_rate": 4.75787466085419e-05, "loss": 2.7221, "step": 508500 }, { "epoch": 0.97, "learning_rate": 4.7576369729902754e-05, "loss": 2.7074, "step": 509000 }, { "epoch": 0.97, "learning_rate": 4.7573988087979756e-05, "loss": 2.7176, "step": 509500 }, { "epoch": 0.97, "learning_rate": 4.7571606446056766e-05, "loss": 2.6937, "step": 510000 }, { "epoch": 0.97, "eval_accuracy": 0.5123424029693019, "eval_loss": 2.615777015686035, "eval_runtime": 4181.8303, "eval_samples_per_second": 65.758, "eval_steps_per_second": 6.576, "step": 510000 }, { "epoch": 0.97, "learning_rate": 4.756922480413377e-05, "loss": 2.7134, "step": 510500 }, { "epoch": 0.97, "learning_rate": 4.756684792549462e-05, "loss": 2.7366, "step": 511000 }, { "epoch": 0.97, "learning_rate": 4.756446628357163e-05, "loss": 2.6927, "step": 511500 }, { "epoch": 0.98, "learning_rate": 4.756208464164863e-05, "loss": 2.7338, "step": 512000 }, { "epoch": 0.98, "learning_rate": 4.7559702999725635e-05, "loss": 2.6851, "step": 512500 }, { "epoch": 0.98, "learning_rate": 4.7557321357802645e-05, "loss": 2.7204, "step": 513000 }, { "epoch": 0.98, "learning_rate": 4.755493971587965e-05, "loss": 2.7034, "step": 513500 }, { "epoch": 0.98, "learning_rate": 4.755255807395666e-05, "loss": 2.7243, "step": 514000 }, { "epoch": 0.98, "learning_rate": 4.755017643203365e-05, "loss": 2.7126, "step": 514500 }, { "epoch": 0.98, "learning_rate": 4.7547799553394505e-05, "loss": 2.7134, "step": 515000 }, { "epoch": 0.98, "learning_rate": 4.7545417911471514e-05, "loss": 2.7088, "step": 515500 }, { "epoch": 0.98, "learning_rate": 4.754303626954852e-05, "loss": 2.7193, "step": 516000 }, { "epoch": 0.98, "learning_rate": 4.754065939090937e-05, "loss": 2.7064, "step": 516500 }, { "epoch": 0.99, "learning_rate": 4.753827774898638e-05, "loss": 2.7045, "step": 517000 }, { "epoch": 0.99, "learning_rate": 4.753589610706338e-05, "loss": 2.7157, "step": 517500 }, { "epoch": 0.99, "learning_rate": 4.753351446514039e-05, "loss": 2.7032, "step": 518000 }, { "epoch": 0.99, "learning_rate": 4.7531132823217387e-05, "loss": 2.6976, "step": 518500 }, { "epoch": 0.99, "learning_rate": 4.7528751181294396e-05, "loss": 2.6985, "step": 519000 }, { "epoch": 0.99, "learning_rate": 4.75263695393714e-05, "loss": 2.7063, "step": 519500 }, { "epoch": 0.99, "learning_rate": 4.752398789744841e-05, "loss": 2.6913, "step": 520000 }, { "epoch": 0.99, "learning_rate": 4.752161101880926e-05, "loss": 2.7031, "step": 520500 }, { "epoch": 0.99, "learning_rate": 4.751922937688626e-05, "loss": 2.7449, "step": 521000 }, { "epoch": 0.99, "learning_rate": 4.7516852498247115e-05, "loss": 2.7057, "step": 521500 }, { "epoch": 0.99, "learning_rate": 4.751447085632412e-05, "loss": 2.6962, "step": 522000 }, { "epoch": 1.0, "learning_rate": 4.751208921440112e-05, "loss": 2.7176, "step": 522500 }, { "epoch": 1.0, "learning_rate": 4.750970757247813e-05, "loss": 2.7054, "step": 523000 }, { "epoch": 1.0, "learning_rate": 4.750732593055513e-05, "loss": 2.7184, "step": 523500 }, { "epoch": 1.0, "learning_rate": 4.7504949051915985e-05, "loss": 2.7179, "step": 524000 }, { "epoch": 1.0, "learning_rate": 4.7502567409992994e-05, "loss": 2.7183, "step": 524500 }, { "epoch": 1.0, "learning_rate": 4.7500190531353846e-05, "loss": 2.7197, "step": 525000 }, { "epoch": 1.0, "learning_rate": 4.749780888943084e-05, "loss": 2.7167, "step": 525500 }, { "epoch": 1.0, "learning_rate": 4.749542724750785e-05, "loss": 2.6978, "step": 526000 }, { "epoch": 1.0, "learning_rate": 4.7493045605584854e-05, "loss": 2.6899, "step": 526500 }, { "epoch": 1.0, "learning_rate": 4.7490663963661864e-05, "loss": 2.683, "step": 527000 }, { "epoch": 1.01, "learning_rate": 4.7488282321738866e-05, "loss": 2.7136, "step": 527500 }, { "epoch": 1.01, "learning_rate": 4.748590067981587e-05, "loss": 2.6816, "step": 528000 }, { "epoch": 1.01, "learning_rate": 4.748351903789288e-05, "loss": 2.6918, "step": 528500 }, { "epoch": 1.01, "learning_rate": 4.748113739596988e-05, "loss": 2.6812, "step": 529000 }, { "epoch": 1.01, "learning_rate": 4.747875575404689e-05, "loss": 2.6945, "step": 529500 }, { "epoch": 1.01, "learning_rate": 4.747637411212389e-05, "loss": 2.706, "step": 530000 }, { "epoch": 1.01, "learning_rate": 4.74739924702009e-05, "loss": 2.6945, "step": 530500 }, { "epoch": 1.01, "learning_rate": 4.7471615591561755e-05, "loss": 2.6826, "step": 531000 }, { "epoch": 1.01, "learning_rate": 4.746923394963875e-05, "loss": 2.7121, "step": 531500 }, { "epoch": 1.01, "learning_rate": 4.746685230771576e-05, "loss": 2.6961, "step": 532000 }, { "epoch": 1.01, "learning_rate": 4.746447542907661e-05, "loss": 2.7043, "step": 532500 }, { "epoch": 1.02, "learning_rate": 4.7462093787153615e-05, "loss": 2.6904, "step": 533000 }, { "epoch": 1.02, "learning_rate": 4.7459712145230624e-05, "loss": 2.717, "step": 533500 }, { "epoch": 1.02, "learning_rate": 4.745733050330763e-05, "loss": 2.6907, "step": 534000 }, { "epoch": 1.02, "learning_rate": 4.745494886138463e-05, "loss": 2.6793, "step": 534500 }, { "epoch": 1.02, "learning_rate": 4.745256721946163e-05, "loss": 2.7047, "step": 535000 }, { "epoch": 1.02, "learning_rate": 4.745018557753864e-05, "loss": 2.6999, "step": 535500 }, { "epoch": 1.02, "learning_rate": 4.744780393561565e-05, "loss": 2.6879, "step": 536000 }, { "epoch": 1.02, "learning_rate": 4.7445427056976497e-05, "loss": 2.6967, "step": 536500 }, { "epoch": 1.02, "learning_rate": 4.744305017833735e-05, "loss": 2.7139, "step": 537000 }, { "epoch": 1.02, "learning_rate": 4.744066853641436e-05, "loss": 2.7033, "step": 537500 }, { "epoch": 1.03, "learning_rate": 4.7438286894491354e-05, "loss": 2.7086, "step": 538000 }, { "epoch": 1.03, "learning_rate": 4.7435905252568363e-05, "loss": 2.6763, "step": 538500 }, { "epoch": 1.03, "learning_rate": 4.743352361064537e-05, "loss": 2.6994, "step": 539000 }, { "epoch": 1.03, "learning_rate": 4.7431141968722375e-05, "loss": 2.6935, "step": 539500 }, { "epoch": 1.03, "learning_rate": 4.7428760326799385e-05, "loss": 2.6882, "step": 540000 }, { "epoch": 1.03, "eval_accuracy": 0.5136683863613649, "eval_loss": 2.60585355758667, "eval_runtime": 4215.7719, "eval_samples_per_second": 65.228, "eval_steps_per_second": 6.523, "step": 540000 }, { "epoch": 1.03, "learning_rate": 4.742638344816023e-05, "loss": 2.6903, "step": 540500 }, { "epoch": 1.03, "learning_rate": 4.742400180623724e-05, "loss": 2.6851, "step": 541000 }, { "epoch": 1.03, "learning_rate": 4.742162016431424e-05, "loss": 2.7033, "step": 541500 }, { "epoch": 1.03, "learning_rate": 4.7419238522391245e-05, "loss": 2.6977, "step": 542000 }, { "epoch": 1.03, "learning_rate": 4.7416856880468254e-05, "loss": 2.696, "step": 542500 }, { "epoch": 1.03, "learning_rate": 4.741447523854526e-05, "loss": 2.6966, "step": 543000 }, { "epoch": 1.04, "learning_rate": 4.7412093596622266e-05, "loss": 2.6978, "step": 543500 }, { "epoch": 1.04, "learning_rate": 4.740971195469926e-05, "loss": 2.709, "step": 544000 }, { "epoch": 1.04, "learning_rate": 4.7407335076060115e-05, "loss": 2.6953, "step": 544500 }, { "epoch": 1.04, "learning_rate": 4.740495819742097e-05, "loss": 2.7006, "step": 545000 }, { "epoch": 1.04, "learning_rate": 4.7402576555497976e-05, "loss": 2.6797, "step": 545500 }, { "epoch": 1.04, "learning_rate": 4.740019967685883e-05, "loss": 2.672, "step": 546000 }, { "epoch": 1.04, "learning_rate": 4.739781803493583e-05, "loss": 2.699, "step": 546500 }, { "epoch": 1.04, "learning_rate": 4.739543639301284e-05, "loss": 2.6933, "step": 547000 }, { "epoch": 1.04, "learning_rate": 4.739305475108984e-05, "loss": 2.69, "step": 547500 }, { "epoch": 1.04, "learning_rate": 4.7390673109166846e-05, "loss": 2.6963, "step": 548000 }, { "epoch": 1.05, "learning_rate": 4.738829146724385e-05, "loss": 2.6921, "step": 548500 }, { "epoch": 1.05, "learning_rate": 4.73859145886047e-05, "loss": 2.7074, "step": 549000 }, { "epoch": 1.05, "learning_rate": 4.738353294668171e-05, "loss": 2.7033, "step": 549500 }, { "epoch": 1.05, "learning_rate": 4.738115130475871e-05, "loss": 2.6733, "step": 550000 }, { "epoch": 1.05, "learning_rate": 4.737876966283572e-05, "loss": 2.7237, "step": 550500 }, { "epoch": 1.05, "learning_rate": 4.7376388020912725e-05, "loss": 2.7024, "step": 551000 }, { "epoch": 1.05, "learning_rate": 4.737400637898973e-05, "loss": 2.6763, "step": 551500 }, { "epoch": 1.05, "learning_rate": 4.737162473706673e-05, "loss": 2.6922, "step": 552000 }, { "epoch": 1.05, "learning_rate": 4.736924309514374e-05, "loss": 2.7002, "step": 552500 }, { "epoch": 1.05, "learning_rate": 4.736686145322074e-05, "loss": 2.6896, "step": 553000 }, { "epoch": 1.05, "learning_rate": 4.736447981129775e-05, "loss": 2.6949, "step": 553500 }, { "epoch": 1.06, "learning_rate": 4.7362098169374754e-05, "loss": 2.6985, "step": 554000 }, { "epoch": 1.06, "learning_rate": 4.735971652745176e-05, "loss": 2.695, "step": 554500 }, { "epoch": 1.06, "learning_rate": 4.735734441209645e-05, "loss": 2.7002, "step": 555000 }, { "epoch": 1.06, "learning_rate": 4.735496277017346e-05, "loss": 2.7102, "step": 555500 }, { "epoch": 1.06, "learning_rate": 4.7352581128250464e-05, "loss": 2.699, "step": 556000 }, { "epoch": 1.06, "learning_rate": 4.7350199486327473e-05, "loss": 2.6916, "step": 556500 }, { "epoch": 1.06, "learning_rate": 4.734781784440448e-05, "loss": 2.6903, "step": 557000 }, { "epoch": 1.06, "learning_rate": 4.734543620248148e-05, "loss": 2.7008, "step": 557500 }, { "epoch": 1.06, "learning_rate": 4.734305456055849e-05, "loss": 2.6993, "step": 558000 }, { "epoch": 1.06, "learning_rate": 4.734067291863549e-05, "loss": 2.714, "step": 558500 }, { "epoch": 1.07, "learning_rate": 4.7338300803280186e-05, "loss": 2.6997, "step": 559000 }, { "epoch": 1.07, "learning_rate": 4.7335919161357195e-05, "loss": 2.7058, "step": 559500 }, { "epoch": 1.07, "learning_rate": 4.73335375194342e-05, "loss": 2.6785, "step": 560000 }, { "epoch": 1.07, "learning_rate": 4.733115587751121e-05, "loss": 2.7141, "step": 560500 }, { "epoch": 1.07, "learning_rate": 4.732877423558821e-05, "loss": 2.7016, "step": 561000 }, { "epoch": 1.07, "learning_rate": 4.732639259366521e-05, "loss": 2.6998, "step": 561500 }, { "epoch": 1.07, "learning_rate": 4.732401095174222e-05, "loss": 2.6952, "step": 562000 }, { "epoch": 1.07, "learning_rate": 4.7321629309819225e-05, "loss": 2.6901, "step": 562500 }, { "epoch": 1.07, "learning_rate": 4.7319247667896234e-05, "loss": 2.6995, "step": 563000 }, { "epoch": 1.07, "learning_rate": 4.7316870789257086e-05, "loss": 2.6919, "step": 563500 }, { "epoch": 1.07, "learning_rate": 4.731448914733409e-05, "loss": 2.6878, "step": 564000 }, { "epoch": 1.08, "learning_rate": 4.731211226869494e-05, "loss": 2.69, "step": 564500 }, { "epoch": 1.08, "learning_rate": 4.7309730626771944e-05, "loss": 2.7081, "step": 565000 }, { "epoch": 1.08, "learning_rate": 4.7307348984848946e-05, "loss": 2.7075, "step": 565500 }, { "epoch": 1.08, "learning_rate": 4.73049721062098e-05, "loss": 2.7029, "step": 566000 }, { "epoch": 1.08, "learning_rate": 4.730259522757065e-05, "loss": 2.694, "step": 566500 }, { "epoch": 1.08, "learning_rate": 4.7300213585647654e-05, "loss": 2.7108, "step": 567000 }, { "epoch": 1.08, "learning_rate": 4.729783194372466e-05, "loss": 2.7039, "step": 567500 }, { "epoch": 1.08, "learning_rate": 4.729545030180167e-05, "loss": 2.7011, "step": 568000 }, { "epoch": 1.08, "learning_rate": 4.729306865987867e-05, "loss": 2.6752, "step": 568500 }, { "epoch": 1.08, "learning_rate": 4.729068701795568e-05, "loss": 2.7047, "step": 569000 }, { "epoch": 1.09, "learning_rate": 4.728830537603268e-05, "loss": 2.6728, "step": 569500 }, { "epoch": 1.09, "learning_rate": 4.728592373410969e-05, "loss": 2.6875, "step": 570000 }, { "epoch": 1.09, "eval_accuracy": 0.5145584456393092, "eval_loss": 2.5994176864624023, "eval_runtime": 4198.6604, "eval_samples_per_second": 65.494, "eval_steps_per_second": 6.549, "step": 570000 }, { "epoch": 1.09, "learning_rate": 4.728354209218669e-05, "loss": 2.6705, "step": 570500 }, { "epoch": 1.09, "learning_rate": 4.7281160450263695e-05, "loss": 2.6811, "step": 571000 }, { "epoch": 1.09, "learning_rate": 4.7278778808340704e-05, "loss": 2.687, "step": 571500 }, { "epoch": 1.09, "learning_rate": 4.727639716641771e-05, "loss": 2.6834, "step": 572000 }, { "epoch": 1.09, "learning_rate": 4.7274015524494716e-05, "loss": 2.695, "step": 572500 }, { "epoch": 1.09, "learning_rate": 4.727163388257172e-05, "loss": 2.6994, "step": 573000 }, { "epoch": 1.09, "learning_rate": 4.7269261767216414e-05, "loss": 2.6848, "step": 573500 }, { "epoch": 1.09, "learning_rate": 4.7266880125293424e-05, "loss": 2.6631, "step": 574000 }, { "epoch": 1.09, "learning_rate": 4.7264498483370426e-05, "loss": 2.7024, "step": 574500 }, { "epoch": 1.1, "learning_rate": 4.726211684144743e-05, "loss": 2.688, "step": 575000 }, { "epoch": 1.1, "learning_rate": 4.725973519952444e-05, "loss": 2.7094, "step": 575500 }, { "epoch": 1.1, "learning_rate": 4.725735355760144e-05, "loss": 2.7032, "step": 576000 }, { "epoch": 1.1, "learning_rate": 4.725497191567845e-05, "loss": 2.6972, "step": 576500 }, { "epoch": 1.1, "learning_rate": 4.725259027375545e-05, "loss": 2.6888, "step": 577000 }, { "epoch": 1.1, "learning_rate": 4.7250208631832456e-05, "loss": 2.7194, "step": 577500 }, { "epoch": 1.1, "learning_rate": 4.724782698990946e-05, "loss": 2.6903, "step": 578000 }, { "epoch": 1.1, "learning_rate": 4.724544534798647e-05, "loss": 2.6972, "step": 578500 }, { "epoch": 1.1, "learning_rate": 4.724306370606347e-05, "loss": 2.678, "step": 579000 }, { "epoch": 1.1, "learning_rate": 4.724068682742432e-05, "loss": 2.6936, "step": 579500 }, { "epoch": 1.11, "learning_rate": 4.723830518550133e-05, "loss": 2.7016, "step": 580000 }, { "epoch": 1.11, "learning_rate": 4.723592354357833e-05, "loss": 2.6894, "step": 580500 }, { "epoch": 1.11, "learning_rate": 4.723354190165534e-05, "loss": 2.6809, "step": 581000 }, { "epoch": 1.11, "learning_rate": 4.723116502301619e-05, "loss": 2.6941, "step": 581500 }, { "epoch": 1.11, "learning_rate": 4.722878338109319e-05, "loss": 2.6788, "step": 582000 }, { "epoch": 1.11, "learning_rate": 4.72264017391702e-05, "loss": 2.7075, "step": 582500 }, { "epoch": 1.11, "learning_rate": 4.7224020097247204e-05, "loss": 2.6903, "step": 583000 }, { "epoch": 1.11, "learning_rate": 4.7221643218608056e-05, "loss": 2.6933, "step": 583500 }, { "epoch": 1.11, "learning_rate": 4.721926157668506e-05, "loss": 2.6798, "step": 584000 }, { "epoch": 1.11, "learning_rate": 4.721688469804591e-05, "loss": 2.6878, "step": 584500 }, { "epoch": 1.11, "learning_rate": 4.7214503056122914e-05, "loss": 2.6794, "step": 585000 }, { "epoch": 1.12, "learning_rate": 4.721212141419992e-05, "loss": 2.6992, "step": 585500 }, { "epoch": 1.12, "learning_rate": 4.7209739772276926e-05, "loss": 2.7117, "step": 586000 }, { "epoch": 1.12, "learning_rate": 4.7207358130353935e-05, "loss": 2.6932, "step": 586500 }, { "epoch": 1.12, "learning_rate": 4.720497648843094e-05, "loss": 2.6792, "step": 587000 }, { "epoch": 1.12, "learning_rate": 4.720259484650794e-05, "loss": 2.7139, "step": 587500 }, { "epoch": 1.12, "learning_rate": 4.720021320458495e-05, "loss": 2.7016, "step": 588000 }, { "epoch": 1.12, "learning_rate": 4.7197841089229645e-05, "loss": 2.6932, "step": 588500 }, { "epoch": 1.12, "learning_rate": 4.71954642105905e-05, "loss": 2.6807, "step": 589000 }, { "epoch": 1.12, "learning_rate": 4.71930825686675e-05, "loss": 2.7042, "step": 589500 }, { "epoch": 1.12, "learning_rate": 4.719070569002835e-05, "loss": 2.7131, "step": 590000 }, { "epoch": 1.13, "learning_rate": 4.718832404810536e-05, "loss": 2.6878, "step": 590500 }, { "epoch": 1.13, "learning_rate": 4.7185942406182364e-05, "loss": 2.6843, "step": 591000 }, { "epoch": 1.13, "learning_rate": 4.7183560764259374e-05, "loss": 2.6736, "step": 591500 }, { "epoch": 1.13, "learning_rate": 4.718117912233637e-05, "loss": 2.6902, "step": 592000 }, { "epoch": 1.13, "learning_rate": 4.717879748041338e-05, "loss": 2.6992, "step": 592500 }, { "epoch": 1.13, "learning_rate": 4.717641583849038e-05, "loss": 2.68, "step": 593000 }, { "epoch": 1.13, "learning_rate": 4.717403419656739e-05, "loss": 2.6816, "step": 593500 }, { "epoch": 1.13, "learning_rate": 4.7171652554644394e-05, "loss": 2.6932, "step": 594000 }, { "epoch": 1.13, "learning_rate": 4.7169270912721396e-05, "loss": 2.6888, "step": 594500 }, { "epoch": 1.13, "learning_rate": 4.7166889270798406e-05, "loss": 2.6985, "step": 595000 }, { "epoch": 1.13, "learning_rate": 4.716450762887541e-05, "loss": 2.6678, "step": 595500 }, { "epoch": 1.14, "learning_rate": 4.716212598695242e-05, "loss": 2.694, "step": 596000 }, { "epoch": 1.14, "learning_rate": 4.715974434502942e-05, "loss": 2.6825, "step": 596500 }, { "epoch": 1.14, "learning_rate": 4.715736270310643e-05, "loss": 2.6752, "step": 597000 }, { "epoch": 1.14, "learning_rate": 4.715498582446728e-05, "loss": 2.6781, "step": 597500 }, { "epoch": 1.14, "learning_rate": 4.715260418254428e-05, "loss": 2.6855, "step": 598000 }, { "epoch": 1.14, "learning_rate": 4.715022254062129e-05, "loss": 2.7066, "step": 598500 }, { "epoch": 1.14, "learning_rate": 4.714784566198214e-05, "loss": 2.68, "step": 599000 }, { "epoch": 1.14, "learning_rate": 4.714546402005914e-05, "loss": 2.7226, "step": 599500 }, { "epoch": 1.14, "learning_rate": 4.714308237813615e-05, "loss": 2.6787, "step": 600000 }, { "epoch": 1.14, "eval_accuracy": 0.5154796162010787, "eval_loss": 2.592280864715576, "eval_runtime": 4195.3477, "eval_samples_per_second": 65.546, "eval_steps_per_second": 6.555, "step": 600000 }, { "epoch": 1.14, "learning_rate": 4.7140700736213154e-05, "loss": 2.6742, "step": 600500 }, { "epoch": 1.15, "learning_rate": 4.713831909429016e-05, "loss": 2.6712, "step": 601000 }, { "epoch": 1.15, "learning_rate": 4.713593745236716e-05, "loss": 2.6777, "step": 601500 }, { "epoch": 1.15, "learning_rate": 4.713355581044417e-05, "loss": 2.6927, "step": 602000 }, { "epoch": 1.15, "learning_rate": 4.713117416852118e-05, "loss": 2.6851, "step": 602500 }, { "epoch": 1.15, "learning_rate": 4.712879252659818e-05, "loss": 2.6849, "step": 603000 }, { "epoch": 1.15, "learning_rate": 4.7126410884675184e-05, "loss": 2.6926, "step": 603500 }, { "epoch": 1.15, "learning_rate": 4.7124029242752186e-05, "loss": 2.6917, "step": 604000 }, { "epoch": 1.15, "learning_rate": 4.7121647600829196e-05, "loss": 2.6922, "step": 604500 }, { "epoch": 1.15, "learning_rate": 4.711927072219005e-05, "loss": 2.674, "step": 605000 }, { "epoch": 1.15, "learning_rate": 4.711689384355089e-05, "loss": 2.6723, "step": 605500 }, { "epoch": 1.15, "learning_rate": 4.71145122016279e-05, "loss": 2.6981, "step": 606000 }, { "epoch": 1.16, "learning_rate": 4.711213055970491e-05, "loss": 2.6657, "step": 606500 }, { "epoch": 1.16, "learning_rate": 4.7109748917781915e-05, "loss": 2.68, "step": 607000 }, { "epoch": 1.16, "learning_rate": 4.710736727585892e-05, "loss": 2.686, "step": 607500 }, { "epoch": 1.16, "learning_rate": 4.710498563393592e-05, "loss": 2.695, "step": 608000 }, { "epoch": 1.16, "learning_rate": 4.710260399201293e-05, "loss": 2.6643, "step": 608500 }, { "epoch": 1.16, "learning_rate": 4.710022235008993e-05, "loss": 2.6762, "step": 609000 }, { "epoch": 1.16, "learning_rate": 4.7097845471450784e-05, "loss": 2.6624, "step": 609500 }, { "epoch": 1.16, "learning_rate": 4.709546859281164e-05, "loss": 2.6669, "step": 610000 }, { "epoch": 1.16, "learning_rate": 4.709309171417249e-05, "loss": 2.6744, "step": 610500 }, { "epoch": 1.16, "learning_rate": 4.709071007224949e-05, "loss": 2.6766, "step": 611000 }, { "epoch": 1.17, "learning_rate": 4.7088328430326494e-05, "loss": 2.6918, "step": 611500 }, { "epoch": 1.17, "learning_rate": 4.7085946788403504e-05, "loss": 2.6906, "step": 612000 }, { "epoch": 1.17, "learning_rate": 4.7083565146480506e-05, "loss": 2.7058, "step": 612500 }, { "epoch": 1.17, "learning_rate": 4.7081183504557516e-05, "loss": 2.6782, "step": 613000 }, { "epoch": 1.17, "learning_rate": 4.707880186263452e-05, "loss": 2.6982, "step": 613500 }, { "epoch": 1.17, "learning_rate": 4.707642022071152e-05, "loss": 2.6735, "step": 614000 }, { "epoch": 1.17, "learning_rate": 4.7074038578788524e-05, "loss": 2.7012, "step": 614500 }, { "epoch": 1.17, "learning_rate": 4.707165693686553e-05, "loss": 2.6834, "step": 615000 }, { "epoch": 1.17, "learning_rate": 4.7069275294942536e-05, "loss": 2.6918, "step": 615500 }, { "epoch": 1.17, "learning_rate": 4.7066893653019545e-05, "loss": 2.6952, "step": 616000 }, { "epoch": 1.17, "learning_rate": 4.70645167743804e-05, "loss": 2.6852, "step": 616500 }, { "epoch": 1.18, "learning_rate": 4.706213989574125e-05, "loss": 2.6824, "step": 617000 }, { "epoch": 1.18, "learning_rate": 4.7059758253818245e-05, "loss": 2.6601, "step": 617500 }, { "epoch": 1.18, "learning_rate": 4.7057376611895255e-05, "loss": 2.6737, "step": 618000 }, { "epoch": 1.18, "learning_rate": 4.705499496997226e-05, "loss": 2.6672, "step": 618500 }, { "epoch": 1.18, "learning_rate": 4.705261332804927e-05, "loss": 2.6526, "step": 619000 }, { "epoch": 1.18, "learning_rate": 4.705023168612627e-05, "loss": 2.656, "step": 619500 }, { "epoch": 1.18, "learning_rate": 4.704785004420328e-05, "loss": 2.6934, "step": 620000 }, { "epoch": 1.18, "learning_rate": 4.704546840228028e-05, "loss": 2.6566, "step": 620500 }, { "epoch": 1.18, "learning_rate": 4.7043096286924983e-05, "loss": 2.6799, "step": 621000 }, { "epoch": 1.18, "learning_rate": 4.704071464500198e-05, "loss": 2.6945, "step": 621500 }, { "epoch": 1.19, "learning_rate": 4.703833300307899e-05, "loss": 2.6831, "step": 622000 }, { "epoch": 1.19, "learning_rate": 4.703595136115599e-05, "loss": 2.6868, "step": 622500 }, { "epoch": 1.19, "learning_rate": 4.7033569719233e-05, "loss": 2.6593, "step": 623000 }, { "epoch": 1.19, "learning_rate": 4.703118807731e-05, "loss": 2.6886, "step": 623500 }, { "epoch": 1.19, "learning_rate": 4.7028806435387006e-05, "loss": 2.6741, "step": 624000 }, { "epoch": 1.19, "learning_rate": 4.7026424793464015e-05, "loss": 2.6841, "step": 624500 }, { "epoch": 1.19, "learning_rate": 4.702404791482487e-05, "loss": 2.699, "step": 625000 }, { "epoch": 1.19, "learning_rate": 4.702166627290187e-05, "loss": 2.6761, "step": 625500 }, { "epoch": 1.19, "learning_rate": 4.701928463097888e-05, "loss": 2.6783, "step": 626000 }, { "epoch": 1.19, "learning_rate": 4.701690298905588e-05, "loss": 2.689, "step": 626500 }, { "epoch": 1.19, "learning_rate": 4.7014526110416735e-05, "loss": 2.6889, "step": 627000 }, { "epoch": 1.2, "learning_rate": 4.701214446849374e-05, "loss": 2.6981, "step": 627500 }, { "epoch": 1.2, "learning_rate": 4.700976282657074e-05, "loss": 2.6787, "step": 628000 }, { "epoch": 1.2, "learning_rate": 4.700738118464775e-05, "loss": 2.7018, "step": 628500 }, { "epoch": 1.2, "learning_rate": 4.70050043060086e-05, "loss": 2.6734, "step": 629000 }, { "epoch": 1.2, "learning_rate": 4.7002622664085604e-05, "loss": 2.6783, "step": 629500 }, { "epoch": 1.2, "learning_rate": 4.7000241022162614e-05, "loss": 2.6724, "step": 630000 }, { "epoch": 1.2, "eval_accuracy": 0.5167217000820766, "eval_loss": 2.5831358432769775, "eval_runtime": 4199.0371, "eval_samples_per_second": 65.488, "eval_steps_per_second": 6.549, "step": 630000 }, { "epoch": 1.2, "learning_rate": 4.6997859380239616e-05, "loss": 2.6889, "step": 630500 }, { "epoch": 1.2, "learning_rate": 4.699547773831662e-05, "loss": 2.6906, "step": 631000 }, { "epoch": 1.2, "learning_rate": 4.699309609639362e-05, "loss": 2.6861, "step": 631500 }, { "epoch": 1.2, "learning_rate": 4.699071445447063e-05, "loss": 2.6842, "step": 632000 }, { "epoch": 1.21, "learning_rate": 4.6988332812547633e-05, "loss": 2.6648, "step": 632500 }, { "epoch": 1.21, "learning_rate": 4.698595117062464e-05, "loss": 2.6737, "step": 633000 }, { "epoch": 1.21, "learning_rate": 4.6983574291985495e-05, "loss": 2.7084, "step": 633500 }, { "epoch": 1.21, "learning_rate": 4.698119265006249e-05, "loss": 2.6892, "step": 634000 }, { "epoch": 1.21, "learning_rate": 4.69788110081395e-05, "loss": 2.6759, "step": 634500 }, { "epoch": 1.21, "learning_rate": 4.697642936621651e-05, "loss": 2.6819, "step": 635000 }, { "epoch": 1.21, "learning_rate": 4.6974052487577355e-05, "loss": 2.6863, "step": 635500 }, { "epoch": 1.21, "learning_rate": 4.6971670845654365e-05, "loss": 2.6888, "step": 636000 }, { "epoch": 1.21, "learning_rate": 4.696929396701522e-05, "loss": 2.6727, "step": 636500 }, { "epoch": 1.21, "learning_rate": 4.696691232509222e-05, "loss": 2.694, "step": 637000 }, { "epoch": 1.21, "learning_rate": 4.696453068316922e-05, "loss": 2.6636, "step": 637500 }, { "epoch": 1.22, "learning_rate": 4.696214904124623e-05, "loss": 2.6783, "step": 638000 }, { "epoch": 1.22, "learning_rate": 4.6959767399323234e-05, "loss": 2.6461, "step": 638500 }, { "epoch": 1.22, "learning_rate": 4.6957390520684087e-05, "loss": 2.6709, "step": 639000 }, { "epoch": 1.22, "learning_rate": 4.695500887876109e-05, "loss": 2.683, "step": 639500 }, { "epoch": 1.22, "learning_rate": 4.69526272368381e-05, "loss": 2.6934, "step": 640000 }, { "epoch": 1.22, "learning_rate": 4.69502455949151e-05, "loss": 2.67, "step": 640500 }, { "epoch": 1.22, "learning_rate": 4.6947863952992104e-05, "loss": 2.6988, "step": 641000 }, { "epoch": 1.22, "learning_rate": 4.694548231106911e-05, "loss": 2.6719, "step": 641500 }, { "epoch": 1.22, "learning_rate": 4.6943105432429966e-05, "loss": 2.6553, "step": 642000 }, { "epoch": 1.22, "learning_rate": 4.694072379050697e-05, "loss": 2.691, "step": 642500 }, { "epoch": 1.23, "learning_rate": 4.693834214858398e-05, "loss": 2.6904, "step": 643000 }, { "epoch": 1.23, "learning_rate": 4.693596050666098e-05, "loss": 2.6773, "step": 643500 }, { "epoch": 1.23, "learning_rate": 4.693358362802183e-05, "loss": 2.6711, "step": 644000 }, { "epoch": 1.23, "learning_rate": 4.6931201986098835e-05, "loss": 2.6848, "step": 644500 }, { "epoch": 1.23, "learning_rate": 4.692882034417584e-05, "loss": 2.6695, "step": 645000 }, { "epoch": 1.23, "learning_rate": 4.692643870225285e-05, "loss": 2.6906, "step": 645500 }, { "epoch": 1.23, "learning_rate": 4.692405706032985e-05, "loss": 2.6696, "step": 646000 }, { "epoch": 1.23, "learning_rate": 4.692167541840686e-05, "loss": 2.6818, "step": 646500 }, { "epoch": 1.23, "learning_rate": 4.6919293776483855e-05, "loss": 2.685, "step": 647000 }, { "epoch": 1.23, "learning_rate": 4.6916912134560864e-05, "loss": 2.6744, "step": 647500 }, { "epoch": 1.23, "learning_rate": 4.691453525592172e-05, "loss": 2.6852, "step": 648000 }, { "epoch": 1.24, "learning_rate": 4.691215361399872e-05, "loss": 2.6739, "step": 648500 }, { "epoch": 1.24, "learning_rate": 4.690977197207573e-05, "loss": 2.6814, "step": 649000 }, { "epoch": 1.24, "learning_rate": 4.690739509343658e-05, "loss": 2.6918, "step": 649500 }, { "epoch": 1.24, "learning_rate": 4.6905013451513584e-05, "loss": 2.689, "step": 650000 }, { "epoch": 1.24, "learning_rate": 4.6902631809590586e-05, "loss": 2.6828, "step": 650500 }, { "epoch": 1.24, "learning_rate": 4.690025016766759e-05, "loss": 2.6676, "step": 651000 }, { "epoch": 1.24, "learning_rate": 4.689787328902844e-05, "loss": 2.6808, "step": 651500 }, { "epoch": 1.24, "learning_rate": 4.689549164710545e-05, "loss": 2.6865, "step": 652000 }, { "epoch": 1.24, "learning_rate": 4.689311000518245e-05, "loss": 2.679, "step": 652500 }, { "epoch": 1.24, "learning_rate": 4.689072836325946e-05, "loss": 2.6955, "step": 653000 }, { "epoch": 1.25, "learning_rate": 4.6888346721336465e-05, "loss": 2.6715, "step": 653500 }, { "epoch": 1.25, "learning_rate": 4.688596507941347e-05, "loss": 2.7001, "step": 654000 }, { "epoch": 1.25, "learning_rate": 4.688358343749048e-05, "loss": 2.6724, "step": 654500 }, { "epoch": 1.25, "learning_rate": 4.688120179556748e-05, "loss": 2.6896, "step": 655000 }, { "epoch": 1.25, "learning_rate": 4.687882491692833e-05, "loss": 2.6809, "step": 655500 }, { "epoch": 1.25, "learning_rate": 4.687644327500534e-05, "loss": 2.6632, "step": 656000 }, { "epoch": 1.25, "learning_rate": 4.6874061633082344e-05, "loss": 2.6904, "step": 656500 }, { "epoch": 1.25, "learning_rate": 4.687167999115935e-05, "loss": 2.6593, "step": 657000 }, { "epoch": 1.25, "learning_rate": 4.68693031125202e-05, "loss": 2.6668, "step": 657500 }, { "epoch": 1.25, "learning_rate": 4.68669214705972e-05, "loss": 2.6775, "step": 658000 }, { "epoch": 1.25, "learning_rate": 4.686453982867421e-05, "loss": 2.6632, "step": 658500 }, { "epoch": 1.26, "learning_rate": 4.6862162950035063e-05, "loss": 2.6836, "step": 659000 }, { "epoch": 1.26, "learning_rate": 4.6859781308112066e-05, "loss": 2.6874, "step": 659500 }, { "epoch": 1.26, "learning_rate": 4.685740442947292e-05, "loss": 2.6833, "step": 660000 }, { "epoch": 1.26, "eval_accuracy": 0.5175295910184283, "eval_loss": 2.578192710876465, "eval_runtime": 4208.5343, "eval_samples_per_second": 65.341, "eval_steps_per_second": 6.534, "step": 660000 }, { "epoch": 1.26, "learning_rate": 4.685502278754992e-05, "loss": 2.7071, "step": 660500 }, { "epoch": 1.26, "learning_rate": 4.6852641145626924e-05, "loss": 2.6969, "step": 661000 }, { "epoch": 1.26, "learning_rate": 4.685025950370393e-05, "loss": 2.6778, "step": 661500 }, { "epoch": 1.26, "learning_rate": 4.6847877861780936e-05, "loss": 2.6719, "step": 662000 }, { "epoch": 1.26, "learning_rate": 4.6845496219857945e-05, "loss": 2.6781, "step": 662500 }, { "epoch": 1.26, "learning_rate": 4.684311457793495e-05, "loss": 2.6805, "step": 663000 }, { "epoch": 1.26, "learning_rate": 4.684073293601196e-05, "loss": 2.6777, "step": 663500 }, { "epoch": 1.27, "learning_rate": 4.683835129408895e-05, "loss": 2.6687, "step": 664000 }, { "epoch": 1.27, "learning_rate": 4.683596965216596e-05, "loss": 2.6797, "step": 664500 }, { "epoch": 1.27, "learning_rate": 4.6833588010242965e-05, "loss": 2.6883, "step": 665000 }, { "epoch": 1.27, "learning_rate": 4.683121113160382e-05, "loss": 2.6685, "step": 665500 }, { "epoch": 1.27, "learning_rate": 4.682882948968083e-05, "loss": 2.6609, "step": 666000 }, { "epoch": 1.27, "learning_rate": 4.682644784775783e-05, "loss": 2.6707, "step": 666500 }, { "epoch": 1.27, "learning_rate": 4.682406620583483e-05, "loss": 2.6774, "step": 667000 }, { "epoch": 1.27, "learning_rate": 4.682168456391184e-05, "loss": 2.6726, "step": 667500 }, { "epoch": 1.27, "learning_rate": 4.6819302921988844e-05, "loss": 2.6712, "step": 668000 }, { "epoch": 1.27, "learning_rate": 4.6816926043349696e-05, "loss": 2.6857, "step": 668500 }, { "epoch": 1.27, "learning_rate": 4.68145444014267e-05, "loss": 2.6772, "step": 669000 }, { "epoch": 1.28, "learning_rate": 4.681216275950371e-05, "loss": 2.6645, "step": 669500 }, { "epoch": 1.28, "learning_rate": 4.680978111758071e-05, "loss": 2.6689, "step": 670000 }, { "epoch": 1.28, "learning_rate": 4.680740423894156e-05, "loss": 2.662, "step": 670500 }, { "epoch": 1.28, "learning_rate": 4.6805022597018566e-05, "loss": 2.6611, "step": 671000 }, { "epoch": 1.28, "learning_rate": 4.6802640955095575e-05, "loss": 2.6651, "step": 671500 }, { "epoch": 1.28, "learning_rate": 4.680025931317258e-05, "loss": 2.6842, "step": 672000 }, { "epoch": 1.28, "learning_rate": 4.679787767124959e-05, "loss": 2.6692, "step": 672500 }, { "epoch": 1.28, "learning_rate": 4.679549602932659e-05, "loss": 2.6849, "step": 673000 }, { "epoch": 1.28, "learning_rate": 4.679311438740359e-05, "loss": 2.6853, "step": 673500 }, { "epoch": 1.28, "learning_rate": 4.6790737508764445e-05, "loss": 2.6645, "step": 674000 }, { "epoch": 1.29, "learning_rate": 4.678835586684145e-05, "loss": 2.6657, "step": 674500 }, { "epoch": 1.29, "learning_rate": 4.678597422491846e-05, "loss": 2.6575, "step": 675000 }, { "epoch": 1.29, "learning_rate": 4.678359258299546e-05, "loss": 2.6815, "step": 675500 }, { "epoch": 1.29, "learning_rate": 4.678121094107247e-05, "loss": 2.6751, "step": 676000 }, { "epoch": 1.29, "learning_rate": 4.677882929914947e-05, "loss": 2.6689, "step": 676500 }, { "epoch": 1.29, "learning_rate": 4.6776447657226474e-05, "loss": 2.6607, "step": 677000 }, { "epoch": 1.29, "learning_rate": 4.6774066015303483e-05, "loss": 2.6943, "step": 677500 }, { "epoch": 1.29, "learning_rate": 4.677168913666433e-05, "loss": 2.6556, "step": 678000 }, { "epoch": 1.29, "learning_rate": 4.676930749474134e-05, "loss": 2.6801, "step": 678500 }, { "epoch": 1.29, "learning_rate": 4.676693061610219e-05, "loss": 2.6758, "step": 679000 }, { "epoch": 1.29, "learning_rate": 4.676455373746304e-05, "loss": 2.6664, "step": 679500 }, { "epoch": 1.3, "learning_rate": 4.6762172095540046e-05, "loss": 2.7001, "step": 680000 }, { "epoch": 1.3, "learning_rate": 4.675979045361705e-05, "loss": 2.6827, "step": 680500 }, { "epoch": 1.3, "learning_rate": 4.675740881169405e-05, "loss": 2.6867, "step": 681000 }, { "epoch": 1.3, "learning_rate": 4.675502716977106e-05, "loss": 2.6689, "step": 681500 }, { "epoch": 1.3, "learning_rate": 4.675264552784806e-05, "loss": 2.6761, "step": 682000 }, { "epoch": 1.3, "learning_rate": 4.675026388592507e-05, "loss": 2.6756, "step": 682500 }, { "epoch": 1.3, "learning_rate": 4.6747882244002075e-05, "loss": 2.6693, "step": 683000 }, { "epoch": 1.3, "learning_rate": 4.674550060207908e-05, "loss": 2.6688, "step": 683500 }, { "epoch": 1.3, "learning_rate": 4.674312372343993e-05, "loss": 2.6681, "step": 684000 }, { "epoch": 1.3, "learning_rate": 4.674074208151694e-05, "loss": 2.6762, "step": 684500 }, { "epoch": 1.31, "learning_rate": 4.673836043959394e-05, "loss": 2.6512, "step": 685000 }, { "epoch": 1.31, "learning_rate": 4.673597879767095e-05, "loss": 2.6555, "step": 685500 }, { "epoch": 1.31, "learning_rate": 4.6733597155747954e-05, "loss": 2.6852, "step": 686000 }, { "epoch": 1.31, "learning_rate": 4.6731220277108806e-05, "loss": 2.6895, "step": 686500 }, { "epoch": 1.31, "learning_rate": 4.672883863518581e-05, "loss": 2.6495, "step": 687000 }, { "epoch": 1.31, "learning_rate": 4.672645699326281e-05, "loss": 2.6774, "step": 687500 }, { "epoch": 1.31, "learning_rate": 4.672407535133982e-05, "loss": 2.6613, "step": 688000 }, { "epoch": 1.31, "learning_rate": 4.6721693709416823e-05, "loss": 2.6691, "step": 688500 }, { "epoch": 1.31, "learning_rate": 4.671931206749383e-05, "loss": 2.6703, "step": 689000 }, { "epoch": 1.31, "learning_rate": 4.671693042557083e-05, "loss": 2.683, "step": 689500 }, { "epoch": 1.31, "learning_rate": 4.671455354693168e-05, "loss": 2.679, "step": 690000 }, { "epoch": 1.31, "eval_accuracy": 0.5181796256041372, "eval_loss": 2.5731215476989746, "eval_runtime": 4204.9585, "eval_samples_per_second": 65.396, "eval_steps_per_second": 6.54, "step": 690000 }, { "epoch": 1.32, "learning_rate": 4.671217190500869e-05, "loss": 2.6649, "step": 690500 }, { "epoch": 1.32, "learning_rate": 4.670979026308569e-05, "loss": 2.6883, "step": 691000 }, { "epoch": 1.32, "learning_rate": 4.67074086211627e-05, "loss": 2.6826, "step": 691500 }, { "epoch": 1.32, "learning_rate": 4.6705031742523555e-05, "loss": 2.6683, "step": 692000 }, { "epoch": 1.32, "learning_rate": 4.670265010060056e-05, "loss": 2.6645, "step": 692500 }, { "epoch": 1.32, "learning_rate": 4.670026845867756e-05, "loss": 2.6495, "step": 693000 }, { "epoch": 1.32, "learning_rate": 4.669789158003841e-05, "loss": 2.6813, "step": 693500 }, { "epoch": 1.32, "learning_rate": 4.6695509938115415e-05, "loss": 2.6609, "step": 694000 }, { "epoch": 1.32, "learning_rate": 4.6693128296192424e-05, "loss": 2.6441, "step": 694500 }, { "epoch": 1.32, "learning_rate": 4.669074665426943e-05, "loss": 2.6683, "step": 695000 }, { "epoch": 1.33, "learning_rate": 4.6688365012346436e-05, "loss": 2.6599, "step": 695500 }, { "epoch": 1.33, "learning_rate": 4.668598337042344e-05, "loss": 2.6728, "step": 696000 }, { "epoch": 1.33, "learning_rate": 4.668360172850044e-05, "loss": 2.6579, "step": 696500 }, { "epoch": 1.33, "learning_rate": 4.668122008657745e-05, "loss": 2.6537, "step": 697000 }, { "epoch": 1.33, "learning_rate": 4.6678838444654454e-05, "loss": 2.6613, "step": 697500 }, { "epoch": 1.33, "learning_rate": 4.667645680273146e-05, "loss": 2.6736, "step": 698000 }, { "epoch": 1.33, "learning_rate": 4.6674079924092315e-05, "loss": 2.6609, "step": 698500 }, { "epoch": 1.33, "learning_rate": 4.667169828216932e-05, "loss": 2.6514, "step": 699000 }, { "epoch": 1.33, "learning_rate": 4.666931664024632e-05, "loss": 2.6639, "step": 699500 }, { "epoch": 1.33, "learning_rate": 4.666693976160717e-05, "loss": 2.6667, "step": 700000 }, { "epoch": 1.33, "learning_rate": 4.6664558119684175e-05, "loss": 2.6677, "step": 700500 }, { "epoch": 1.34, "learning_rate": 4.6662176477761185e-05, "loss": 2.6807, "step": 701000 }, { "epoch": 1.34, "learning_rate": 4.665979483583819e-05, "loss": 2.6624, "step": 701500 }, { "epoch": 1.34, "learning_rate": 4.66574131939152e-05, "loss": 2.6547, "step": 702000 }, { "epoch": 1.34, "learning_rate": 4.665503631527605e-05, "loss": 2.6697, "step": 702500 }, { "epoch": 1.34, "learning_rate": 4.6652654673353045e-05, "loss": 2.6788, "step": 703000 }, { "epoch": 1.34, "learning_rate": 4.6650273031430054e-05, "loss": 2.6668, "step": 703500 }, { "epoch": 1.34, "learning_rate": 4.664789138950706e-05, "loss": 2.6626, "step": 704000 }, { "epoch": 1.34, "learning_rate": 4.6645509747584066e-05, "loss": 2.6711, "step": 704500 }, { "epoch": 1.34, "learning_rate": 4.664312810566107e-05, "loss": 2.6566, "step": 705000 }, { "epoch": 1.34, "learning_rate": 4.664074646373808e-05, "loss": 2.667, "step": 705500 }, { "epoch": 1.35, "learning_rate": 4.663836482181508e-05, "loss": 2.6713, "step": 706000 }, { "epoch": 1.35, "learning_rate": 4.6635983179892084e-05, "loss": 2.6746, "step": 706500 }, { "epoch": 1.35, "learning_rate": 4.6633606301252936e-05, "loss": 2.6791, "step": 707000 }, { "epoch": 1.35, "learning_rate": 4.663122465932994e-05, "loss": 2.6773, "step": 707500 }, { "epoch": 1.35, "learning_rate": 4.662884778069079e-05, "loss": 2.6877, "step": 708000 }, { "epoch": 1.35, "learning_rate": 4.66264661387678e-05, "loss": 2.6506, "step": 708500 }, { "epoch": 1.35, "learning_rate": 4.66240844968448e-05, "loss": 2.6694, "step": 709000 }, { "epoch": 1.35, "learning_rate": 4.6621702854921806e-05, "loss": 2.644, "step": 709500 }, { "epoch": 1.35, "learning_rate": 4.6619321212998815e-05, "loss": 2.6732, "step": 710000 }, { "epoch": 1.35, "learning_rate": 4.661693957107582e-05, "loss": 2.6439, "step": 710500 }, { "epoch": 1.35, "learning_rate": 4.661455792915283e-05, "loss": 2.6872, "step": 711000 }, { "epoch": 1.36, "learning_rate": 4.661217628722983e-05, "loss": 2.6825, "step": 711500 }, { "epoch": 1.36, "learning_rate": 4.660979464530683e-05, "loss": 2.6632, "step": 712000 }, { "epoch": 1.36, "learning_rate": 4.6607413003383835e-05, "loss": 2.6697, "step": 712500 }, { "epoch": 1.36, "learning_rate": 4.660503612474469e-05, "loss": 2.6768, "step": 713000 }, { "epoch": 1.36, "learning_rate": 4.6602654482821697e-05, "loss": 2.6531, "step": 713500 }, { "epoch": 1.36, "learning_rate": 4.66002728408987e-05, "loss": 2.6727, "step": 714000 }, { "epoch": 1.36, "learning_rate": 4.659789119897571e-05, "loss": 2.6763, "step": 714500 }, { "epoch": 1.36, "learning_rate": 4.659550955705271e-05, "loss": 2.6554, "step": 715000 }, { "epoch": 1.36, "learning_rate": 4.6593127915129714e-05, "loss": 2.6734, "step": 715500 }, { "epoch": 1.36, "learning_rate": 4.659074627320672e-05, "loss": 2.6626, "step": 716000 }, { "epoch": 1.37, "learning_rate": 4.6588364631283726e-05, "loss": 2.6632, "step": 716500 }, { "epoch": 1.37, "learning_rate": 4.658598775264458e-05, "loss": 2.6693, "step": 717000 }, { "epoch": 1.37, "learning_rate": 4.658360611072158e-05, "loss": 2.6519, "step": 717500 }, { "epoch": 1.37, "learning_rate": 4.658122446879859e-05, "loss": 2.6619, "step": 718000 }, { "epoch": 1.37, "learning_rate": 4.657884282687559e-05, "loss": 2.675, "step": 718500 }, { "epoch": 1.37, "learning_rate": 4.6576465948236445e-05, "loss": 2.669, "step": 719000 }, { "epoch": 1.37, "learning_rate": 4.657408430631345e-05, "loss": 2.6529, "step": 719500 }, { "epoch": 1.37, "learning_rate": 4.657170266439046e-05, "loss": 2.6649, "step": 720000 }, { "epoch": 1.37, "eval_accuracy": 0.519216208648991, "eval_loss": 2.5663814544677734, "eval_runtime": 4195.7549, "eval_samples_per_second": 65.54, "eval_steps_per_second": 6.554, "step": 720000 }, { "epoch": 1.37, "learning_rate": 4.656932102246746e-05, "loss": 2.6647, "step": 720500 }, { "epoch": 1.37, "learning_rate": 4.656693938054447e-05, "loss": 2.653, "step": 721000 }, { "epoch": 1.37, "learning_rate": 4.6564557738621465e-05, "loss": 2.6594, "step": 721500 }, { "epoch": 1.38, "learning_rate": 4.656218085998232e-05, "loss": 2.671, "step": 722000 }, { "epoch": 1.38, "learning_rate": 4.655979921805933e-05, "loss": 2.6469, "step": 722500 }, { "epoch": 1.38, "learning_rate": 4.655741757613633e-05, "loss": 2.6641, "step": 723000 }, { "epoch": 1.38, "learning_rate": 4.655503593421334e-05, "loss": 2.6475, "step": 723500 }, { "epoch": 1.38, "learning_rate": 4.655265429229034e-05, "loss": 2.6646, "step": 724000 }, { "epoch": 1.38, "learning_rate": 4.655027265036735e-05, "loss": 2.6565, "step": 724500 }, { "epoch": 1.38, "learning_rate": 4.6547891008444347e-05, "loss": 2.6755, "step": 725000 }, { "epoch": 1.38, "learning_rate": 4.65455141298052e-05, "loss": 2.6546, "step": 725500 }, { "epoch": 1.38, "learning_rate": 4.654313248788221e-05, "loss": 2.6693, "step": 726000 }, { "epoch": 1.38, "learning_rate": 4.654075084595921e-05, "loss": 2.6527, "step": 726500 }, { "epoch": 1.39, "learning_rate": 4.653836920403622e-05, "loss": 2.6624, "step": 727000 }, { "epoch": 1.39, "learning_rate": 4.653598756211322e-05, "loss": 2.6636, "step": 727500 }, { "epoch": 1.39, "learning_rate": 4.6533605920190226e-05, "loss": 2.6644, "step": 728000 }, { "epoch": 1.39, "learning_rate": 4.6531224278267235e-05, "loss": 2.6675, "step": 728500 }, { "epoch": 1.39, "learning_rate": 4.652884263634424e-05, "loss": 2.6777, "step": 729000 }, { "epoch": 1.39, "learning_rate": 4.652646575770509e-05, "loss": 2.6509, "step": 729500 }, { "epoch": 1.39, "learning_rate": 4.652408887906594e-05, "loss": 2.6562, "step": 730000 }, { "epoch": 1.39, "learning_rate": 4.6521707237142945e-05, "loss": 2.6461, "step": 730500 }, { "epoch": 1.39, "learning_rate": 4.6519325595219954e-05, "loss": 2.6746, "step": 731000 }, { "epoch": 1.39, "learning_rate": 4.651694395329696e-05, "loss": 2.6458, "step": 731500 }, { "epoch": 1.39, "learning_rate": 4.651456231137396e-05, "loss": 2.6602, "step": 732000 }, { "epoch": 1.4, "learning_rate": 4.651218543273481e-05, "loss": 2.6424, "step": 732500 }, { "epoch": 1.4, "learning_rate": 4.650980379081182e-05, "loss": 2.6661, "step": 733000 }, { "epoch": 1.4, "learning_rate": 4.6507422148888824e-05, "loss": 2.6513, "step": 733500 }, { "epoch": 1.4, "learning_rate": 4.6505045270249676e-05, "loss": 2.6552, "step": 734000 }, { "epoch": 1.4, "learning_rate": 4.650266362832668e-05, "loss": 2.68, "step": 734500 }, { "epoch": 1.4, "learning_rate": 4.650028198640368e-05, "loss": 2.6488, "step": 735000 }, { "epoch": 1.4, "learning_rate": 4.649790034448069e-05, "loss": 2.6505, "step": 735500 }, { "epoch": 1.4, "learning_rate": 4.649551870255769e-05, "loss": 2.6689, "step": 736000 }, { "epoch": 1.4, "learning_rate": 4.64931370606347e-05, "loss": 2.6544, "step": 736500 }, { "epoch": 1.4, "learning_rate": 4.6490755418711705e-05, "loss": 2.6526, "step": 737000 }, { "epoch": 1.41, "learning_rate": 4.6488373776788715e-05, "loss": 2.667, "step": 737500 }, { "epoch": 1.41, "learning_rate": 4.648599213486571e-05, "loss": 2.6608, "step": 738000 }, { "epoch": 1.41, "learning_rate": 4.648361049294272e-05, "loss": 2.6784, "step": 738500 }, { "epoch": 1.41, "learning_rate": 4.648122885101972e-05, "loss": 2.666, "step": 739000 }, { "epoch": 1.41, "learning_rate": 4.647884720909673e-05, "loss": 2.6436, "step": 739500 }, { "epoch": 1.41, "learning_rate": 4.6476470330457584e-05, "loss": 2.6606, "step": 740000 }, { "epoch": 1.41, "learning_rate": 4.647409345181844e-05, "loss": 2.6647, "step": 740500 }, { "epoch": 1.41, "learning_rate": 4.647171180989544e-05, "loss": 2.6588, "step": 741000 }, { "epoch": 1.41, "learning_rate": 4.646933493125629e-05, "loss": 2.6743, "step": 741500 }, { "epoch": 1.41, "learning_rate": 4.6466953289333294e-05, "loss": 2.6607, "step": 742000 }, { "epoch": 1.41, "learning_rate": 4.64645716474103e-05, "loss": 2.6531, "step": 742500 }, { "epoch": 1.42, "learning_rate": 4.6462190005487306e-05, "loss": 2.658, "step": 743000 }, { "epoch": 1.42, "learning_rate": 4.645980836356431e-05, "loss": 2.6232, "step": 743500 }, { "epoch": 1.42, "learning_rate": 4.645742672164132e-05, "loss": 2.6679, "step": 744000 }, { "epoch": 1.42, "learning_rate": 4.645504507971832e-05, "loss": 2.6629, "step": 744500 }, { "epoch": 1.42, "learning_rate": 4.6452663437795323e-05, "loss": 2.6738, "step": 745000 }, { "epoch": 1.42, "learning_rate": 4.645028179587233e-05, "loss": 2.6553, "step": 745500 }, { "epoch": 1.42, "learning_rate": 4.6447900153949336e-05, "loss": 2.6642, "step": 746000 }, { "epoch": 1.42, "learning_rate": 4.6445518512026345e-05, "loss": 2.6691, "step": 746500 }, { "epoch": 1.42, "learning_rate": 4.644314639667104e-05, "loss": 2.6621, "step": 747000 }, { "epoch": 1.42, "learning_rate": 4.644076475474804e-05, "loss": 2.6515, "step": 747500 }, { "epoch": 1.43, "learning_rate": 4.643838311282505e-05, "loss": 2.6532, "step": 748000 }, { "epoch": 1.43, "learning_rate": 4.6436006234185904e-05, "loss": 2.6568, "step": 748500 }, { "epoch": 1.43, "learning_rate": 4.64336245922629e-05, "loss": 2.6439, "step": 749000 }, { "epoch": 1.43, "learning_rate": 4.643124295033991e-05, "loss": 2.6758, "step": 749500 }, { "epoch": 1.43, "learning_rate": 4.642886130841691e-05, "loss": 2.6692, "step": 750000 }, { "epoch": 1.43, "eval_accuracy": 0.519575879546778, "eval_loss": 2.561831474304199, "eval_runtime": 4202.889, "eval_samples_per_second": 65.428, "eval_steps_per_second": 6.543, "step": 750000 }, { "epoch": 1.43, "learning_rate": 4.642647966649392e-05, "loss": 2.6532, "step": 750500 }, { "epoch": 1.43, "learning_rate": 4.642409802457093e-05, "loss": 2.656, "step": 751000 }, { "epoch": 1.43, "learning_rate": 4.642171638264793e-05, "loss": 2.6645, "step": 751500 }, { "epoch": 1.43, "learning_rate": 4.6419334740724936e-05, "loss": 2.6729, "step": 752000 }, { "epoch": 1.43, "learning_rate": 4.641695309880194e-05, "loss": 2.6568, "step": 752500 }, { "epoch": 1.43, "learning_rate": 4.641457145687895e-05, "loss": 2.6821, "step": 753000 }, { "epoch": 1.44, "learning_rate": 4.641218981495595e-05, "loss": 2.6567, "step": 753500 }, { "epoch": 1.44, "learning_rate": 4.640980817303296e-05, "loss": 2.6492, "step": 754000 }, { "epoch": 1.44, "learning_rate": 4.640742653110996e-05, "loss": 2.6394, "step": 754500 }, { "epoch": 1.44, "learning_rate": 4.6405044889186966e-05, "loss": 2.6421, "step": 755000 }, { "epoch": 1.44, "learning_rate": 4.6402663247263975e-05, "loss": 2.6417, "step": 755500 }, { "epoch": 1.44, "learning_rate": 4.640028160534098e-05, "loss": 2.6616, "step": 756000 }, { "epoch": 1.44, "learning_rate": 4.639790472670183e-05, "loss": 2.6696, "step": 756500 }, { "epoch": 1.44, "learning_rate": 4.639552308477883e-05, "loss": 2.6591, "step": 757000 }, { "epoch": 1.44, "learning_rate": 4.6393141442855835e-05, "loss": 2.6651, "step": 757500 }, { "epoch": 1.44, "learning_rate": 4.6390759800932845e-05, "loss": 2.6613, "step": 758000 }, { "epoch": 1.45, "learning_rate": 4.63883829222937e-05, "loss": 2.6485, "step": 758500 }, { "epoch": 1.45, "learning_rate": 4.63860012803707e-05, "loss": 2.6532, "step": 759000 }, { "epoch": 1.45, "learning_rate": 4.638361963844771e-05, "loss": 2.6562, "step": 759500 }, { "epoch": 1.45, "learning_rate": 4.638123799652471e-05, "loss": 2.6596, "step": 760000 }, { "epoch": 1.45, "learning_rate": 4.6378861117885564e-05, "loss": 2.6691, "step": 760500 }, { "epoch": 1.45, "learning_rate": 4.6376479475962566e-05, "loss": 2.6625, "step": 761000 }, { "epoch": 1.45, "learning_rate": 4.637409783403957e-05, "loss": 2.6426, "step": 761500 }, { "epoch": 1.45, "learning_rate": 4.637172095540042e-05, "loss": 2.6541, "step": 762000 }, { "epoch": 1.45, "learning_rate": 4.636933931347743e-05, "loss": 2.6449, "step": 762500 }, { "epoch": 1.45, "learning_rate": 4.6366957671554433e-05, "loss": 2.6468, "step": 763000 }, { "epoch": 1.45, "learning_rate": 4.636457602963144e-05, "loss": 2.6709, "step": 763500 }, { "epoch": 1.46, "learning_rate": 4.636219438770844e-05, "loss": 2.6792, "step": 764000 }, { "epoch": 1.46, "learning_rate": 4.635981274578545e-05, "loss": 2.6569, "step": 764500 }, { "epoch": 1.46, "learning_rate": 4.635743110386245e-05, "loss": 2.6512, "step": 765000 }, { "epoch": 1.46, "learning_rate": 4.635504946193946e-05, "loss": 2.6528, "step": 765500 }, { "epoch": 1.46, "learning_rate": 4.635266782001646e-05, "loss": 2.6661, "step": 766000 }, { "epoch": 1.46, "learning_rate": 4.635028617809347e-05, "loss": 2.6584, "step": 766500 }, { "epoch": 1.46, "learning_rate": 4.6347904536170475e-05, "loss": 2.6904, "step": 767000 }, { "epoch": 1.46, "learning_rate": 4.634552765753132e-05, "loss": 2.6702, "step": 767500 }, { "epoch": 1.46, "learning_rate": 4.634314601560833e-05, "loss": 2.6316, "step": 768000 }, { "epoch": 1.46, "learning_rate": 4.634076437368534e-05, "loss": 2.6479, "step": 768500 }, { "epoch": 1.47, "learning_rate": 4.633838273176234e-05, "loss": 2.6672, "step": 769000 }, { "epoch": 1.47, "learning_rate": 4.633600108983935e-05, "loss": 2.6563, "step": 769500 }, { "epoch": 1.47, "learning_rate": 4.633361944791635e-05, "loss": 2.6475, "step": 770000 }, { "epoch": 1.47, "learning_rate": 4.6331237805993356e-05, "loss": 2.6337, "step": 770500 }, { "epoch": 1.47, "learning_rate": 4.632886092735421e-05, "loss": 2.6532, "step": 771000 }, { "epoch": 1.47, "learning_rate": 4.632647928543121e-05, "loss": 2.6647, "step": 771500 }, { "epoch": 1.47, "learning_rate": 4.632409764350822e-05, "loss": 2.6667, "step": 772000 }, { "epoch": 1.47, "learning_rate": 4.632171600158522e-05, "loss": 2.6511, "step": 772500 }, { "epoch": 1.47, "learning_rate": 4.631933435966223e-05, "loss": 2.6679, "step": 773000 }, { "epoch": 1.47, "learning_rate": 4.631695271773923e-05, "loss": 2.6599, "step": 773500 }, { "epoch": 1.47, "learning_rate": 4.631457107581624e-05, "loss": 2.6608, "step": 774000 }, { "epoch": 1.48, "learning_rate": 4.631218943389324e-05, "loss": 2.6646, "step": 774500 }, { "epoch": 1.48, "learning_rate": 4.630981255525409e-05, "loss": 2.6642, "step": 775000 }, { "epoch": 1.48, "learning_rate": 4.63074309133311e-05, "loss": 2.6642, "step": 775500 }, { "epoch": 1.48, "learning_rate": 4.6305049271408105e-05, "loss": 2.6539, "step": 776000 }, { "epoch": 1.48, "learning_rate": 4.630266762948511e-05, "loss": 2.6405, "step": 776500 }, { "epoch": 1.48, "learning_rate": 4.630028598756212e-05, "loss": 2.648, "step": 777000 }, { "epoch": 1.48, "learning_rate": 4.629790910892296e-05, "loss": 2.6366, "step": 777500 }, { "epoch": 1.48, "learning_rate": 4.629552746699997e-05, "loss": 2.6424, "step": 778000 }, { "epoch": 1.48, "learning_rate": 4.6293145825076974e-05, "loss": 2.6559, "step": 778500 }, { "epoch": 1.48, "learning_rate": 4.6290764183153984e-05, "loss": 2.668, "step": 779000 }, { "epoch": 1.49, "learning_rate": 4.6288387304514836e-05, "loss": 2.6642, "step": 779500 }, { "epoch": 1.49, "learning_rate": 4.628600566259184e-05, "loss": 2.6571, "step": 780000 }, { "epoch": 1.49, "eval_accuracy": 0.5205963310781492, "eval_loss": 2.555237293243408, "eval_runtime": 4194.1295, "eval_samples_per_second": 65.565, "eval_steps_per_second": 6.557, "step": 780000 }, { "epoch": 1.49, "learning_rate": 4.628362402066884e-05, "loss": 2.6507, "step": 780500 }, { "epoch": 1.49, "learning_rate": 4.628124237874585e-05, "loss": 2.6613, "step": 781000 }, { "epoch": 1.49, "learning_rate": 4.6278860736822853e-05, "loss": 2.6677, "step": 781500 }, { "epoch": 1.49, "learning_rate": 4.627647909489986e-05, "loss": 2.6643, "step": 782000 }, { "epoch": 1.49, "learning_rate": 4.6274097452976865e-05, "loss": 2.6741, "step": 782500 }, { "epoch": 1.49, "learning_rate": 4.627172057433771e-05, "loss": 2.6659, "step": 783000 }, { "epoch": 1.49, "learning_rate": 4.626933893241472e-05, "loss": 2.6316, "step": 783500 }, { "epoch": 1.49, "learning_rate": 4.626695729049172e-05, "loss": 2.6609, "step": 784000 }, { "epoch": 1.49, "learning_rate": 4.626457564856873e-05, "loss": 2.6652, "step": 784500 }, { "epoch": 1.5, "learning_rate": 4.6262194006645735e-05, "loss": 2.6696, "step": 785000 }, { "epoch": 1.5, "learning_rate": 4.6259812364722744e-05, "loss": 2.6729, "step": 785500 }, { "epoch": 1.5, "learning_rate": 4.625743072279975e-05, "loss": 2.6426, "step": 786000 }, { "epoch": 1.5, "learning_rate": 4.625505384416059e-05, "loss": 2.673, "step": 786500 }, { "epoch": 1.5, "learning_rate": 4.6252676965521445e-05, "loss": 2.6467, "step": 787000 }, { "epoch": 1.5, "learning_rate": 4.6250295323598454e-05, "loss": 2.6582, "step": 787500 }, { "epoch": 1.5, "learning_rate": 4.624791368167546e-05, "loss": 2.646, "step": 788000 }, { "epoch": 1.5, "learning_rate": 4.6245532039752466e-05, "loss": 2.6471, "step": 788500 }, { "epoch": 1.5, "learning_rate": 4.624315039782947e-05, "loss": 2.6643, "step": 789000 }, { "epoch": 1.5, "learning_rate": 4.624076875590647e-05, "loss": 2.6373, "step": 789500 }, { "epoch": 1.51, "learning_rate": 4.623838711398348e-05, "loss": 2.6665, "step": 790000 }, { "epoch": 1.51, "learning_rate": 4.6236005472060484e-05, "loss": 2.6646, "step": 790500 }, { "epoch": 1.51, "learning_rate": 4.623362383013749e-05, "loss": 2.6573, "step": 791000 }, { "epoch": 1.51, "learning_rate": 4.6231242188214496e-05, "loss": 2.6577, "step": 791500 }, { "epoch": 1.51, "learning_rate": 4.6228860546291505e-05, "loss": 2.6574, "step": 792000 }, { "epoch": 1.51, "learning_rate": 4.622648366765235e-05, "loss": 2.6596, "step": 792500 }, { "epoch": 1.51, "learning_rate": 4.622410202572935e-05, "loss": 2.6495, "step": 793000 }, { "epoch": 1.51, "learning_rate": 4.622172038380636e-05, "loss": 2.6498, "step": 793500 }, { "epoch": 1.51, "learning_rate": 4.6219338741883365e-05, "loss": 2.6517, "step": 794000 }, { "epoch": 1.51, "learning_rate": 4.6216957099960375e-05, "loss": 2.6733, "step": 794500 }, { "epoch": 1.51, "learning_rate": 4.621457545803738e-05, "loss": 2.6422, "step": 795000 }, { "epoch": 1.52, "learning_rate": 4.621219381611438e-05, "loss": 2.6582, "step": 795500 }, { "epoch": 1.52, "learning_rate": 4.620981217419139e-05, "loss": 2.643, "step": 796000 }, { "epoch": 1.52, "learning_rate": 4.6207435295552235e-05, "loss": 2.659, "step": 796500 }, { "epoch": 1.52, "learning_rate": 4.620505841691309e-05, "loss": 2.6589, "step": 797000 }, { "epoch": 1.52, "learning_rate": 4.6202676774990096e-05, "loss": 2.6421, "step": 797500 }, { "epoch": 1.52, "learning_rate": 4.62002951330671e-05, "loss": 2.6547, "step": 798000 }, { "epoch": 1.52, "learning_rate": 4.619791349114411e-05, "loss": 2.6632, "step": 798500 }, { "epoch": 1.52, "learning_rate": 4.6195531849221104e-05, "loss": 2.6557, "step": 799000 }, { "epoch": 1.52, "learning_rate": 4.6193150207298114e-05, "loss": 2.6485, "step": 799500 }, { "epoch": 1.52, "learning_rate": 4.619076856537512e-05, "loss": 2.6548, "step": 800000 }, { "epoch": 1.53, "learning_rate": 4.6188386923452126e-05, "loss": 2.6524, "step": 800500 }, { "epoch": 1.53, "learning_rate": 4.618601004481298e-05, "loss": 2.6403, "step": 801000 }, { "epoch": 1.53, "learning_rate": 4.618363316617383e-05, "loss": 2.6505, "step": 801500 }, { "epoch": 1.53, "learning_rate": 4.618125152425083e-05, "loss": 2.6582, "step": 802000 }, { "epoch": 1.53, "learning_rate": 4.6178869882327836e-05, "loss": 2.6704, "step": 802500 }, { "epoch": 1.53, "learning_rate": 4.617648824040484e-05, "loss": 2.6488, "step": 803000 }, { "epoch": 1.53, "learning_rate": 4.617410659848185e-05, "loss": 2.6615, "step": 803500 }, { "epoch": 1.53, "learning_rate": 4.61717297198427e-05, "loss": 2.6378, "step": 804000 }, { "epoch": 1.53, "learning_rate": 4.616935284120355e-05, "loss": 2.6576, "step": 804500 }, { "epoch": 1.53, "learning_rate": 4.6166971199280555e-05, "loss": 2.6463, "step": 805000 }, { "epoch": 1.53, "learning_rate": 4.6164589557357564e-05, "loss": 2.641, "step": 805500 }, { "epoch": 1.54, "learning_rate": 4.616220791543457e-05, "loss": 2.6411, "step": 806000 }, { "epoch": 1.54, "learning_rate": 4.615983103679541e-05, "loss": 2.6419, "step": 806500 }, { "epoch": 1.54, "learning_rate": 4.615744939487242e-05, "loss": 2.6489, "step": 807000 }, { "epoch": 1.54, "learning_rate": 4.6155067752949424e-05, "loss": 2.6415, "step": 807500 }, { "epoch": 1.54, "learning_rate": 4.6152686111026434e-05, "loss": 2.6503, "step": 808000 }, { "epoch": 1.54, "learning_rate": 4.6150304469103436e-05, "loss": 2.6278, "step": 808500 }, { "epoch": 1.54, "learning_rate": 4.6147922827180446e-05, "loss": 2.6498, "step": 809000 }, { "epoch": 1.54, "learning_rate": 4.614554118525745e-05, "loss": 2.6128, "step": 809500 }, { "epoch": 1.54, "learning_rate": 4.614315954333445e-05, "loss": 2.6632, "step": 810000 }, { "epoch": 1.54, "eval_accuracy": 0.5212842025588575, "eval_loss": 2.5512163639068604, "eval_runtime": 4198.9475, "eval_samples_per_second": 65.49, "eval_steps_per_second": 6.549, "step": 810000 }, { "epoch": 1.54, "learning_rate": 4.614077790141146e-05, "loss": 2.6393, "step": 810500 }, { "epoch": 1.55, "learning_rate": 4.613839625948846e-05, "loss": 2.6454, "step": 811000 }, { "epoch": 1.55, "learning_rate": 4.613601461756547e-05, "loss": 2.6385, "step": 811500 }, { "epoch": 1.55, "learning_rate": 4.613363297564247e-05, "loss": 2.6713, "step": 812000 }, { "epoch": 1.55, "learning_rate": 4.613125609700332e-05, "loss": 2.6524, "step": 812500 }, { "epoch": 1.55, "learning_rate": 4.612887921836417e-05, "loss": 2.6497, "step": 813000 }, { "epoch": 1.55, "learning_rate": 4.612649757644118e-05, "loss": 2.63, "step": 813500 }, { "epoch": 1.55, "learning_rate": 4.6124115934518185e-05, "loss": 2.6415, "step": 814000 }, { "epoch": 1.55, "learning_rate": 4.6121734292595194e-05, "loss": 2.6325, "step": 814500 }, { "epoch": 1.55, "learning_rate": 4.61193526506722e-05, "loss": 2.6554, "step": 815000 }, { "epoch": 1.55, "learning_rate": 4.6116971008749206e-05, "loss": 2.6367, "step": 815500 }, { "epoch": 1.55, "learning_rate": 4.61145893668262e-05, "loss": 2.6499, "step": 816000 }, { "epoch": 1.56, "learning_rate": 4.611220772490321e-05, "loss": 2.6478, "step": 816500 }, { "epoch": 1.56, "learning_rate": 4.6109826082980214e-05, "loss": 2.6837, "step": 817000 }, { "epoch": 1.56, "learning_rate": 4.6107444441057224e-05, "loss": 2.649, "step": 817500 }, { "epoch": 1.56, "learning_rate": 4.610506279913423e-05, "loss": 2.6584, "step": 818000 }, { "epoch": 1.56, "learning_rate": 4.610268115721123e-05, "loss": 2.6687, "step": 818500 }, { "epoch": 1.56, "learning_rate": 4.610030427857208e-05, "loss": 2.655, "step": 819000 }, { "epoch": 1.56, "learning_rate": 4.609792263664909e-05, "loss": 2.6542, "step": 819500 }, { "epoch": 1.56, "learning_rate": 4.6095545758009936e-05, "loss": 2.6591, "step": 820000 }, { "epoch": 1.56, "learning_rate": 4.6093164116086946e-05, "loss": 2.6334, "step": 820500 }, { "epoch": 1.56, "learning_rate": 4.6090782474163955e-05, "loss": 2.6479, "step": 821000 }, { "epoch": 1.57, "learning_rate": 4.608840083224096e-05, "loss": 2.6419, "step": 821500 }, { "epoch": 1.57, "learning_rate": 4.608601919031796e-05, "loss": 2.6535, "step": 822000 }, { "epoch": 1.57, "learning_rate": 4.608363754839496e-05, "loss": 2.6558, "step": 822500 }, { "epoch": 1.57, "learning_rate": 4.608125590647197e-05, "loss": 2.6503, "step": 823000 }, { "epoch": 1.57, "learning_rate": 4.6078879027832824e-05, "loss": 2.6613, "step": 823500 }, { "epoch": 1.57, "learning_rate": 4.607649738590983e-05, "loss": 2.644, "step": 824000 }, { "epoch": 1.57, "learning_rate": 4.6074115743986837e-05, "loss": 2.6487, "step": 824500 }, { "epoch": 1.57, "learning_rate": 4.607173410206384e-05, "loss": 2.6497, "step": 825000 }, { "epoch": 1.57, "learning_rate": 4.606935246014084e-05, "loss": 2.6707, "step": 825500 }, { "epoch": 1.57, "learning_rate": 4.6066970818217844e-05, "loss": 2.6534, "step": 826000 }, { "epoch": 1.57, "learning_rate": 4.6064589176294854e-05, "loss": 2.6433, "step": 826500 }, { "epoch": 1.58, "learning_rate": 4.606221706093955e-05, "loss": 2.66, "step": 827000 }, { "epoch": 1.58, "learning_rate": 4.605983541901656e-05, "loss": 2.63, "step": 827500 }, { "epoch": 1.58, "learning_rate": 4.605745377709356e-05, "loss": 2.6388, "step": 828000 }, { "epoch": 1.58, "learning_rate": 4.605507213517057e-05, "loss": 2.6394, "step": 828500 }, { "epoch": 1.58, "learning_rate": 4.6052690493247566e-05, "loss": 2.6559, "step": 829000 }, { "epoch": 1.58, "learning_rate": 4.6050308851324576e-05, "loss": 2.6643, "step": 829500 }, { "epoch": 1.58, "learning_rate": 4.604792720940158e-05, "loss": 2.6451, "step": 830000 }, { "epoch": 1.58, "learning_rate": 4.604555033076243e-05, "loss": 2.6413, "step": 830500 }, { "epoch": 1.58, "learning_rate": 4.604316868883944e-05, "loss": 2.6234, "step": 831000 }, { "epoch": 1.58, "learning_rate": 4.604078704691644e-05, "loss": 2.6437, "step": 831500 }, { "epoch": 1.59, "learning_rate": 4.6038405404993445e-05, "loss": 2.6386, "step": 832000 }, { "epoch": 1.59, "learning_rate": 4.6036023763070455e-05, "loss": 2.652, "step": 832500 }, { "epoch": 1.59, "learning_rate": 4.603364212114746e-05, "loss": 2.661, "step": 833000 }, { "epoch": 1.59, "learning_rate": 4.603126047922447e-05, "loss": 2.6371, "step": 833500 }, { "epoch": 1.59, "learning_rate": 4.602887883730147e-05, "loss": 2.6346, "step": 834000 }, { "epoch": 1.59, "learning_rate": 4.602649719537848e-05, "loss": 2.6487, "step": 834500 }, { "epoch": 1.59, "learning_rate": 4.6024120316739324e-05, "loss": 2.6388, "step": 835000 }, { "epoch": 1.59, "learning_rate": 4.602173867481633e-05, "loss": 2.6519, "step": 835500 }, { "epoch": 1.59, "learning_rate": 4.6019357032893336e-05, "loss": 2.6428, "step": 836000 }, { "epoch": 1.59, "learning_rate": 4.601697539097034e-05, "loss": 2.6301, "step": 836500 }, { "epoch": 1.59, "learning_rate": 4.601459374904735e-05, "loss": 2.6703, "step": 837000 }, { "epoch": 1.6, "learning_rate": 4.601221210712435e-05, "loss": 2.6492, "step": 837500 }, { "epoch": 1.6, "learning_rate": 4.6009830465201354e-05, "loss": 2.6537, "step": 838000 }, { "epoch": 1.6, "learning_rate": 4.600744882327836e-05, "loss": 2.6435, "step": 838500 }, { "epoch": 1.6, "learning_rate": 4.600507194463921e-05, "loss": 2.6413, "step": 839000 }, { "epoch": 1.6, "learning_rate": 4.600269030271622e-05, "loss": 2.6356, "step": 839500 }, { "epoch": 1.6, "learning_rate": 4.600031342407707e-05, "loss": 2.6711, "step": 840000 }, { "epoch": 1.6, "eval_accuracy": 0.5215292536856598, "eval_loss": 2.547076463699341, "eval_runtime": 4203.2407, "eval_samples_per_second": 65.423, "eval_steps_per_second": 6.542, "step": 840000 }, { "epoch": 1.6, "learning_rate": 4.599793178215407e-05, "loss": 2.6455, "step": 840500 }, { "epoch": 1.6, "learning_rate": 4.599555014023108e-05, "loss": 2.6561, "step": 841000 }, { "epoch": 1.6, "learning_rate": 4.599316849830808e-05, "loss": 2.6514, "step": 841500 }, { "epoch": 1.6, "learning_rate": 4.599079161966893e-05, "loss": 2.6379, "step": 842000 }, { "epoch": 1.61, "learning_rate": 4.598840997774594e-05, "loss": 2.6327, "step": 842500 }, { "epoch": 1.61, "learning_rate": 4.598602833582294e-05, "loss": 2.6426, "step": 843000 }, { "epoch": 1.61, "learning_rate": 4.598364669389995e-05, "loss": 2.6639, "step": 843500 }, { "epoch": 1.61, "learning_rate": 4.5981265051976954e-05, "loss": 2.659, "step": 844000 }, { "epoch": 1.61, "learning_rate": 4.5978892936621656e-05, "loss": 2.6453, "step": 844500 }, { "epoch": 1.61, "learning_rate": 4.597651129469866e-05, "loss": 2.6368, "step": 845000 }, { "epoch": 1.61, "learning_rate": 4.597412965277566e-05, "loss": 2.634, "step": 845500 }, { "epoch": 1.61, "learning_rate": 4.5971748010852664e-05, "loss": 2.6488, "step": 846000 }, { "epoch": 1.61, "learning_rate": 4.5969366368929674e-05, "loss": 2.6481, "step": 846500 }, { "epoch": 1.61, "learning_rate": 4.5966984727006676e-05, "loss": 2.6337, "step": 847000 }, { "epoch": 1.61, "learning_rate": 4.5964603085083686e-05, "loss": 2.6434, "step": 847500 }, { "epoch": 1.62, "learning_rate": 4.596222144316069e-05, "loss": 2.6376, "step": 848000 }, { "epoch": 1.62, "learning_rate": 4.595983980123769e-05, "loss": 2.6331, "step": 848500 }, { "epoch": 1.62, "learning_rate": 4.59574581593147e-05, "loss": 2.6423, "step": 849000 }, { "epoch": 1.62, "learning_rate": 4.595508128067555e-05, "loss": 2.6465, "step": 849500 }, { "epoch": 1.62, "learning_rate": 4.5952699638752555e-05, "loss": 2.6263, "step": 850000 }, { "epoch": 1.62, "learning_rate": 4.5950317996829565e-05, "loss": 2.6585, "step": 850500 }, { "epoch": 1.62, "learning_rate": 4.594793635490657e-05, "loss": 2.6492, "step": 851000 }, { "epoch": 1.62, "learning_rate": 4.594555471298357e-05, "loss": 2.6421, "step": 851500 }, { "epoch": 1.62, "learning_rate": 4.594317307106057e-05, "loss": 2.6424, "step": 852000 }, { "epoch": 1.62, "learning_rate": 4.5940796192421425e-05, "loss": 2.659, "step": 852500 }, { "epoch": 1.63, "learning_rate": 4.5938414550498434e-05, "loss": 2.6545, "step": 853000 }, { "epoch": 1.63, "learning_rate": 4.593603290857544e-05, "loss": 2.6425, "step": 853500 }, { "epoch": 1.63, "learning_rate": 4.5933651266652446e-05, "loss": 2.6619, "step": 854000 }, { "epoch": 1.63, "learning_rate": 4.593126962472944e-05, "loss": 2.6647, "step": 854500 }, { "epoch": 1.63, "learning_rate": 4.592888798280645e-05, "loss": 2.6587, "step": 855000 }, { "epoch": 1.63, "learning_rate": 4.5926506340883454e-05, "loss": 2.6476, "step": 855500 }, { "epoch": 1.63, "learning_rate": 4.5924124698960463e-05, "loss": 2.632, "step": 856000 }, { "epoch": 1.63, "learning_rate": 4.5921747820321316e-05, "loss": 2.6434, "step": 856500 }, { "epoch": 1.63, "learning_rate": 4.591936617839832e-05, "loss": 2.6544, "step": 857000 }, { "epoch": 1.63, "learning_rate": 4.591698453647533e-05, "loss": 2.6625, "step": 857500 }, { "epoch": 1.63, "learning_rate": 4.591460289455233e-05, "loss": 2.6209, "step": 858000 }, { "epoch": 1.64, "learning_rate": 4.5912226015913176e-05, "loss": 2.6423, "step": 858500 }, { "epoch": 1.64, "learning_rate": 4.5909844373990185e-05, "loss": 2.6334, "step": 859000 }, { "epoch": 1.64, "learning_rate": 4.590746273206719e-05, "loss": 2.6243, "step": 859500 }, { "epoch": 1.64, "learning_rate": 4.59050810901442e-05, "loss": 2.6518, "step": 860000 }, { "epoch": 1.64, "learning_rate": 4.590270421150505e-05, "loss": 2.6566, "step": 860500 }, { "epoch": 1.64, "learning_rate": 4.590032256958205e-05, "loss": 2.6445, "step": 861000 }, { "epoch": 1.64, "learning_rate": 4.5897940927659055e-05, "loss": 2.6461, "step": 861500 }, { "epoch": 1.64, "learning_rate": 4.5895559285736064e-05, "loss": 2.6482, "step": 862000 }, { "epoch": 1.64, "learning_rate": 4.589317764381307e-05, "loss": 2.6509, "step": 862500 }, { "epoch": 1.64, "learning_rate": 4.589080076517392e-05, "loss": 2.6632, "step": 863000 }, { "epoch": 1.65, "learning_rate": 4.588841912325093e-05, "loss": 2.6588, "step": 863500 }, { "epoch": 1.65, "learning_rate": 4.588603748132793e-05, "loss": 2.6495, "step": 864000 }, { "epoch": 1.65, "learning_rate": 4.5883655839404934e-05, "loss": 2.637, "step": 864500 }, { "epoch": 1.65, "learning_rate": 4.5881274197481936e-05, "loss": 2.6486, "step": 865000 }, { "epoch": 1.65, "learning_rate": 4.5878892555558946e-05, "loss": 2.6428, "step": 865500 }, { "epoch": 1.65, "learning_rate": 4.587651091363595e-05, "loss": 2.6531, "step": 866000 }, { "epoch": 1.65, "learning_rate": 4.58741340349968e-05, "loss": 2.625, "step": 866500 }, { "epoch": 1.65, "learning_rate": 4.587175239307381e-05, "loss": 2.6644, "step": 867000 }, { "epoch": 1.65, "learning_rate": 4.586937075115081e-05, "loss": 2.6268, "step": 867500 }, { "epoch": 1.65, "learning_rate": 4.5866989109227815e-05, "loss": 2.6456, "step": 868000 }, { "epoch": 1.65, "learning_rate": 4.586460746730482e-05, "loss": 2.638, "step": 868500 }, { "epoch": 1.66, "learning_rate": 4.586223058866567e-05, "loss": 2.6585, "step": 869000 }, { "epoch": 1.66, "learning_rate": 4.585984894674268e-05, "loss": 2.6613, "step": 869500 }, { "epoch": 1.66, "learning_rate": 4.585746730481968e-05, "loss": 2.6516, "step": 870000 }, { "epoch": 1.66, "eval_accuracy": 0.5221764231325704, "eval_loss": 2.5439419746398926, "eval_runtime": 4199.2725, "eval_samples_per_second": 65.485, "eval_steps_per_second": 6.549, "step": 870000 }, { "epoch": 1.66, "learning_rate": 4.585508566289669e-05, "loss": 2.6325, "step": 870500 }, { "epoch": 1.66, "learning_rate": 4.5852704020973694e-05, "loss": 2.6576, "step": 871000 }, { "epoch": 1.66, "learning_rate": 4.585032714233454e-05, "loss": 2.6467, "step": 871500 }, { "epoch": 1.66, "learning_rate": 4.584794550041155e-05, "loss": 2.6576, "step": 872000 }, { "epoch": 1.66, "learning_rate": 4.584556385848855e-05, "loss": 2.6344, "step": 872500 }, { "epoch": 1.66, "learning_rate": 4.584318221656556e-05, "loss": 2.6487, "step": 873000 }, { "epoch": 1.66, "learning_rate": 4.5840800574642564e-05, "loss": 2.6451, "step": 873500 }, { "epoch": 1.67, "learning_rate": 4.5838423696003416e-05, "loss": 2.6619, "step": 874000 }, { "epoch": 1.67, "learning_rate": 4.583604205408042e-05, "loss": 2.6424, "step": 874500 }, { "epoch": 1.67, "learning_rate": 4.583366041215743e-05, "loss": 2.6423, "step": 875000 }, { "epoch": 1.67, "learning_rate": 4.583127877023443e-05, "loss": 2.6651, "step": 875500 }, { "epoch": 1.67, "learning_rate": 4.582890189159528e-05, "loss": 2.6406, "step": 876000 }, { "epoch": 1.67, "learning_rate": 4.5826520249672286e-05, "loss": 2.6392, "step": 876500 }, { "epoch": 1.67, "learning_rate": 4.5824138607749295e-05, "loss": 2.6139, "step": 877000 }, { "epoch": 1.67, "learning_rate": 4.5821756965826305e-05, "loss": 2.6397, "step": 877500 }, { "epoch": 1.67, "learning_rate": 4.58193753239033e-05, "loss": 2.6429, "step": 878000 }, { "epoch": 1.67, "learning_rate": 4.581699368198031e-05, "loss": 2.6447, "step": 878500 }, { "epoch": 1.67, "learning_rate": 4.581461204005731e-05, "loss": 2.6416, "step": 879000 }, { "epoch": 1.68, "learning_rate": 4.581223039813432e-05, "loss": 2.647, "step": 879500 }, { "epoch": 1.68, "learning_rate": 4.5809853519495174e-05, "loss": 2.6543, "step": 880000 }, { "epoch": 1.68, "learning_rate": 4.580747664085602e-05, "loss": 2.6487, "step": 880500 }, { "epoch": 1.68, "learning_rate": 4.580509499893303e-05, "loss": 2.6198, "step": 881000 }, { "epoch": 1.68, "learning_rate": 4.580271335701003e-05, "loss": 2.6442, "step": 881500 }, { "epoch": 1.68, "learning_rate": 4.5800331715087034e-05, "loss": 2.636, "step": 882000 }, { "epoch": 1.68, "learning_rate": 4.5797950073164044e-05, "loss": 2.6515, "step": 882500 }, { "epoch": 1.68, "learning_rate": 4.5795568431241046e-05, "loss": 2.6151, "step": 883000 }, { "epoch": 1.68, "learning_rate": 4.5793186789318056e-05, "loss": 2.6427, "step": 883500 }, { "epoch": 1.68, "learning_rate": 4.579080514739505e-05, "loss": 2.6402, "step": 884000 }, { "epoch": 1.69, "learning_rate": 4.578842350547206e-05, "loss": 2.6468, "step": 884500 }, { "epoch": 1.69, "learning_rate": 4.578604662683291e-05, "loss": 2.6257, "step": 885000 }, { "epoch": 1.69, "learning_rate": 4.5783664984909916e-05, "loss": 2.6453, "step": 885500 }, { "epoch": 1.69, "learning_rate": 4.5781283342986925e-05, "loss": 2.6352, "step": 886000 }, { "epoch": 1.69, "learning_rate": 4.577890170106393e-05, "loss": 2.6461, "step": 886500 }, { "epoch": 1.69, "learning_rate": 4.577652005914094e-05, "loss": 2.6444, "step": 887000 }, { "epoch": 1.69, "learning_rate": 4.577414318050178e-05, "loss": 2.6475, "step": 887500 }, { "epoch": 1.69, "learning_rate": 4.577176153857879e-05, "loss": 2.6408, "step": 888000 }, { "epoch": 1.69, "learning_rate": 4.5769379896655795e-05, "loss": 2.6437, "step": 888500 }, { "epoch": 1.69, "learning_rate": 4.5766998254732804e-05, "loss": 2.6185, "step": 889000 }, { "epoch": 1.69, "learning_rate": 4.576461661280981e-05, "loss": 2.6385, "step": 889500 }, { "epoch": 1.7, "learning_rate": 4.576223973417066e-05, "loss": 2.6497, "step": 890000 }, { "epoch": 1.7, "learning_rate": 4.575985809224766e-05, "loss": 2.6348, "step": 890500 }, { "epoch": 1.7, "learning_rate": 4.5757476450324664e-05, "loss": 2.6311, "step": 891000 }, { "epoch": 1.7, "learning_rate": 4.5755094808401674e-05, "loss": 2.6409, "step": 891500 }, { "epoch": 1.7, "learning_rate": 4.5752713166478677e-05, "loss": 2.6628, "step": 892000 }, { "epoch": 1.7, "learning_rate": 4.5750331524555686e-05, "loss": 2.6351, "step": 892500 }, { "epoch": 1.7, "learning_rate": 4.574794988263269e-05, "loss": 2.6309, "step": 893000 }, { "epoch": 1.7, "learning_rate": 4.574557300399354e-05, "loss": 2.6338, "step": 893500 }, { "epoch": 1.7, "learning_rate": 4.5743191362070543e-05, "loss": 2.6364, "step": 894000 }, { "epoch": 1.7, "learning_rate": 4.5740809720147546e-05, "loss": 2.6446, "step": 894500 }, { "epoch": 1.71, "learning_rate": 4.57384328415084e-05, "loss": 2.6259, "step": 895000 }, { "epoch": 1.71, "learning_rate": 4.573605119958541e-05, "loss": 2.6268, "step": 895500 }, { "epoch": 1.71, "learning_rate": 4.573366955766241e-05, "loss": 2.6348, "step": 896000 }, { "epoch": 1.71, "learning_rate": 4.573128791573942e-05, "loss": 2.6322, "step": 896500 }, { "epoch": 1.71, "learning_rate": 4.5728906273816416e-05, "loss": 2.6245, "step": 897000 }, { "epoch": 1.71, "learning_rate": 4.5726524631893425e-05, "loss": 2.6347, "step": 897500 }, { "epoch": 1.71, "learning_rate": 4.572414298997043e-05, "loss": 2.6389, "step": 898000 }, { "epoch": 1.71, "learning_rate": 4.572176134804744e-05, "loss": 2.6502, "step": 898500 }, { "epoch": 1.71, "learning_rate": 4.571938446940829e-05, "loss": 2.6657, "step": 899000 }, { "epoch": 1.71, "learning_rate": 4.571700282748529e-05, "loss": 2.6406, "step": 899500 }, { "epoch": 1.71, "learning_rate": 4.57146211855623e-05, "loss": 2.6564, "step": 900000 }, { "epoch": 1.71, "eval_accuracy": 0.5228933867918466, "eval_loss": 2.538137197494507, "eval_runtime": 4197.9338, "eval_samples_per_second": 65.506, "eval_steps_per_second": 6.551, "step": 900000 }, { "epoch": 1.72, "learning_rate": 4.5712239543639304e-05, "loss": 2.6507, "step": 900500 }, { "epoch": 1.72, "learning_rate": 4.570985790171631e-05, "loss": 2.639, "step": 901000 }, { "epoch": 1.72, "learning_rate": 4.5707476259793316e-05, "loss": 2.6245, "step": 901500 }, { "epoch": 1.72, "learning_rate": 4.570509461787032e-05, "loss": 2.6424, "step": 902000 }, { "epoch": 1.72, "learning_rate": 4.570271297594733e-05, "loss": 2.6247, "step": 902500 }, { "epoch": 1.72, "learning_rate": 4.5700331334024324e-05, "loss": 2.6257, "step": 903000 }, { "epoch": 1.72, "learning_rate": 4.5697954455385176e-05, "loss": 2.6264, "step": 903500 }, { "epoch": 1.72, "learning_rate": 4.5695572813462186e-05, "loss": 2.6502, "step": 904000 }, { "epoch": 1.72, "learning_rate": 4.569319117153919e-05, "loss": 2.6579, "step": 904500 }, { "epoch": 1.72, "learning_rate": 4.56908095296162e-05, "loss": 2.6435, "step": 905000 }, { "epoch": 1.73, "learning_rate": 4.56884278876932e-05, "loss": 2.6326, "step": 905500 }, { "epoch": 1.73, "learning_rate": 4.568604624577021e-05, "loss": 2.6405, "step": 906000 }, { "epoch": 1.73, "learning_rate": 4.568366460384721e-05, "loss": 2.64, "step": 906500 }, { "epoch": 1.73, "learning_rate": 4.5681282961924215e-05, "loss": 2.6552, "step": 907000 }, { "epoch": 1.73, "learning_rate": 4.567890608328507e-05, "loss": 2.6356, "step": 907500 }, { "epoch": 1.73, "learning_rate": 4.567652444136207e-05, "loss": 2.648, "step": 908000 }, { "epoch": 1.73, "learning_rate": 4.567414279943908e-05, "loss": 2.6293, "step": 908500 }, { "epoch": 1.73, "learning_rate": 4.567176115751609e-05, "loss": 2.6429, "step": 909000 }, { "epoch": 1.73, "learning_rate": 4.5669384278876934e-05, "loss": 2.621, "step": 909500 }, { "epoch": 1.73, "learning_rate": 4.566700263695394e-05, "loss": 2.6368, "step": 910000 }, { "epoch": 1.73, "learning_rate": 4.5664620995030946e-05, "loss": 2.6286, "step": 910500 }, { "epoch": 1.74, "learning_rate": 4.566223935310795e-05, "loss": 2.6368, "step": 911000 }, { "epoch": 1.74, "learning_rate": 4.565985771118496e-05, "loss": 2.6464, "step": 911500 }, { "epoch": 1.74, "learning_rate": 4.5657480832545804e-05, "loss": 2.6243, "step": 912000 }, { "epoch": 1.74, "learning_rate": 4.565509919062281e-05, "loss": 2.6449, "step": 912500 }, { "epoch": 1.74, "learning_rate": 4.5652717548699816e-05, "loss": 2.6463, "step": 913000 }, { "epoch": 1.74, "learning_rate": 4.565033590677682e-05, "loss": 2.6303, "step": 913500 }, { "epoch": 1.74, "learning_rate": 4.564795902813767e-05, "loss": 2.6351, "step": 914000 }, { "epoch": 1.74, "learning_rate": 4.564557738621468e-05, "loss": 2.6327, "step": 914500 }, { "epoch": 1.74, "learning_rate": 4.564319574429168e-05, "loss": 2.6438, "step": 915000 }, { "epoch": 1.74, "learning_rate": 4.564081410236869e-05, "loss": 2.6299, "step": 915500 }, { "epoch": 1.75, "learning_rate": 4.5638432460445695e-05, "loss": 2.6327, "step": 916000 }, { "epoch": 1.75, "learning_rate": 4.563605558180654e-05, "loss": 2.6286, "step": 916500 }, { "epoch": 1.75, "learning_rate": 4.563367393988355e-05, "loss": 2.6307, "step": 917000 }, { "epoch": 1.75, "learning_rate": 4.563129229796055e-05, "loss": 2.6205, "step": 917500 }, { "epoch": 1.75, "learning_rate": 4.562891065603756e-05, "loss": 2.6251, "step": 918000 }, { "epoch": 1.75, "learning_rate": 4.5626533777398414e-05, "loss": 2.6108, "step": 918500 }, { "epoch": 1.75, "learning_rate": 4.5624152135475417e-05, "loss": 2.6427, "step": 919000 }, { "epoch": 1.75, "learning_rate": 4.5621770493552426e-05, "loss": 2.6276, "step": 919500 }, { "epoch": 1.75, "learning_rate": 4.561938885162942e-05, "loss": 2.6562, "step": 920000 }, { "epoch": 1.75, "learning_rate": 4.561700720970643e-05, "loss": 2.6316, "step": 920500 }, { "epoch": 1.75, "learning_rate": 4.5614625567783434e-05, "loss": 2.6309, "step": 921000 }, { "epoch": 1.76, "learning_rate": 4.561224392586044e-05, "loss": 2.6425, "step": 921500 }, { "epoch": 1.76, "learning_rate": 4.5609862283937446e-05, "loss": 2.6472, "step": 922000 }, { "epoch": 1.76, "learning_rate": 4.56074854052983e-05, "loss": 2.6422, "step": 922500 }, { "epoch": 1.76, "learning_rate": 4.56051037633753e-05, "loss": 2.6389, "step": 923000 }, { "epoch": 1.76, "learning_rate": 4.560272212145231e-05, "loss": 2.635, "step": 923500 }, { "epoch": 1.76, "learning_rate": 4.560034047952931e-05, "loss": 2.6257, "step": 924000 }, { "epoch": 1.76, "learning_rate": 4.559795883760632e-05, "loss": 2.6387, "step": 924500 }, { "epoch": 1.76, "learning_rate": 4.5595577195683325e-05, "loss": 2.6428, "step": 925000 }, { "epoch": 1.76, "learning_rate": 4.5593195553760334e-05, "loss": 2.6389, "step": 925500 }, { "epoch": 1.76, "learning_rate": 4.559081867512118e-05, "loss": 2.6411, "step": 926000 }, { "epoch": 1.77, "learning_rate": 4.558843703319818e-05, "loss": 2.6349, "step": 926500 }, { "epoch": 1.77, "learning_rate": 4.558605539127519e-05, "loss": 2.6403, "step": 927000 }, { "epoch": 1.77, "learning_rate": 4.5583673749352194e-05, "loss": 2.6237, "step": 927500 }, { "epoch": 1.77, "learning_rate": 4.5581292107429204e-05, "loss": 2.6266, "step": 928000 }, { "epoch": 1.77, "learning_rate": 4.5578910465506206e-05, "loss": 2.6426, "step": 928500 }, { "epoch": 1.77, "learning_rate": 4.557652882358321e-05, "loss": 2.6406, "step": 929000 }, { "epoch": 1.77, "learning_rate": 4.557414718166021e-05, "loss": 2.6381, "step": 929500 }, { "epoch": 1.77, "learning_rate": 4.5571770303021064e-05, "loss": 2.6339, "step": 930000 }, { "epoch": 1.77, "eval_accuracy": 0.5233222369280388, "eval_loss": 2.5349619388580322, "eval_runtime": 4196.4562, "eval_samples_per_second": 65.529, "eval_steps_per_second": 6.553, "step": 930000 }, { "epoch": 1.77, "learning_rate": 4.5569388661098073e-05, "loss": 2.6354, "step": 930500 }, { "epoch": 1.77, "learning_rate": 4.5567007019175076e-05, "loss": 2.6417, "step": 931000 }, { "epoch": 1.77, "learning_rate": 4.5564625377252085e-05, "loss": 2.6495, "step": 931500 }, { "epoch": 1.78, "learning_rate": 4.556224849861294e-05, "loss": 2.6465, "step": 932000 }, { "epoch": 1.78, "learning_rate": 4.5559866856689934e-05, "loss": 2.6402, "step": 932500 }, { "epoch": 1.78, "learning_rate": 4.555748521476694e-05, "loss": 2.6236, "step": 933000 }, { "epoch": 1.78, "learning_rate": 4.555510357284395e-05, "loss": 2.6224, "step": 933500 }, { "epoch": 1.78, "learning_rate": 4.5552721930920955e-05, "loss": 2.6272, "step": 934000 }, { "epoch": 1.78, "learning_rate": 4.555034505228181e-05, "loss": 2.6347, "step": 934500 }, { "epoch": 1.78, "learning_rate": 4.554796341035881e-05, "loss": 2.6362, "step": 935000 }, { "epoch": 1.78, "learning_rate": 4.554558176843581e-05, "loss": 2.6358, "step": 935500 }, { "epoch": 1.78, "learning_rate": 4.554320012651282e-05, "loss": 2.6376, "step": 936000 }, { "epoch": 1.78, "learning_rate": 4.5540818484589825e-05, "loss": 2.6286, "step": 936500 }, { "epoch": 1.79, "learning_rate": 4.553844160595068e-05, "loss": 2.6369, "step": 937000 }, { "epoch": 1.79, "learning_rate": 4.5536059964027686e-05, "loss": 2.626, "step": 937500 }, { "epoch": 1.79, "learning_rate": 4.553367832210469e-05, "loss": 2.6344, "step": 938000 }, { "epoch": 1.79, "learning_rate": 4.553130144346554e-05, "loss": 2.6325, "step": 938500 }, { "epoch": 1.79, "learning_rate": 4.5528919801542544e-05, "loss": 2.6301, "step": 939000 }, { "epoch": 1.79, "learning_rate": 4.5526538159619546e-05, "loss": 2.6241, "step": 939500 }, { "epoch": 1.79, "learning_rate": 4.5524156517696556e-05, "loss": 2.6396, "step": 940000 }, { "epoch": 1.79, "learning_rate": 4.552177487577356e-05, "loss": 2.6361, "step": 940500 }, { "epoch": 1.79, "learning_rate": 4.551939323385057e-05, "loss": 2.6466, "step": 941000 }, { "epoch": 1.79, "learning_rate": 4.551701159192757e-05, "loss": 2.6255, "step": 941500 }, { "epoch": 1.79, "learning_rate": 4.551462995000457e-05, "loss": 2.6345, "step": 942000 }, { "epoch": 1.8, "learning_rate": 4.5512253071365425e-05, "loss": 2.6284, "step": 942500 }, { "epoch": 1.8, "learning_rate": 4.550987142944243e-05, "loss": 2.6426, "step": 943000 }, { "epoch": 1.8, "learning_rate": 4.550748978751944e-05, "loss": 2.6468, "step": 943500 }, { "epoch": 1.8, "learning_rate": 4.550510814559644e-05, "loss": 2.6389, "step": 944000 }, { "epoch": 1.8, "learning_rate": 4.550272650367345e-05, "loss": 2.6401, "step": 944500 }, { "epoch": 1.8, "learning_rate": 4.550034486175045e-05, "loss": 2.6252, "step": 945000 }, { "epoch": 1.8, "learning_rate": 4.5497963219827455e-05, "loss": 2.6455, "step": 945500 }, { "epoch": 1.8, "learning_rate": 4.5495581577904464e-05, "loss": 2.6443, "step": 946000 }, { "epoch": 1.8, "learning_rate": 4.549319993598147e-05, "loss": 2.6194, "step": 946500 }, { "epoch": 1.8, "learning_rate": 4.549082305734232e-05, "loss": 2.64, "step": 947000 }, { "epoch": 1.81, "learning_rate": 4.548844141541932e-05, "loss": 2.628, "step": 947500 }, { "epoch": 1.81, "learning_rate": 4.548605977349633e-05, "loss": 2.625, "step": 948000 }, { "epoch": 1.81, "learning_rate": 4.548368289485718e-05, "loss": 2.6493, "step": 948500 }, { "epoch": 1.81, "learning_rate": 4.5481301252934186e-05, "loss": 2.6317, "step": 949000 }, { "epoch": 1.81, "learning_rate": 4.547891961101119e-05, "loss": 2.6345, "step": 949500 }, { "epoch": 1.81, "learning_rate": 4.547654273237204e-05, "loss": 2.6452, "step": 950000 }, { "epoch": 1.81, "learning_rate": 4.5474161090449044e-05, "loss": 2.6333, "step": 950500 }, { "epoch": 1.81, "learning_rate": 4.547177944852605e-05, "loss": 2.6085, "step": 951000 }, { "epoch": 1.81, "learning_rate": 4.546939780660306e-05, "loss": 2.6143, "step": 951500 }, { "epoch": 1.81, "learning_rate": 4.546701616468006e-05, "loss": 2.6452, "step": 952000 }, { "epoch": 1.81, "learning_rate": 4.546463928604091e-05, "loss": 2.6467, "step": 952500 }, { "epoch": 1.82, "learning_rate": 4.546225764411792e-05, "loss": 2.6071, "step": 953000 }, { "epoch": 1.82, "learning_rate": 4.545987600219492e-05, "loss": 2.6436, "step": 953500 }, { "epoch": 1.82, "learning_rate": 4.545749436027193e-05, "loss": 2.6311, "step": 954000 }, { "epoch": 1.82, "learning_rate": 4.5455112718348935e-05, "loss": 2.6401, "step": 954500 }, { "epoch": 1.82, "learning_rate": 4.545273107642594e-05, "loss": 2.629, "step": 955000 }, { "epoch": 1.82, "learning_rate": 4.545035419778679e-05, "loss": 2.6076, "step": 955500 }, { "epoch": 1.82, "learning_rate": 4.544797255586379e-05, "loss": 2.655, "step": 956000 }, { "epoch": 1.82, "learning_rate": 4.54455909139408e-05, "loss": 2.6291, "step": 956500 }, { "epoch": 1.82, "learning_rate": 4.5443209272017804e-05, "loss": 2.6287, "step": 957000 }, { "epoch": 1.82, "learning_rate": 4.5440827630094813e-05, "loss": 2.6338, "step": 957500 }, { "epoch": 1.83, "learning_rate": 4.5438445988171816e-05, "loss": 2.6184, "step": 958000 }, { "epoch": 1.83, "learning_rate": 4.543606910953267e-05, "loss": 2.6526, "step": 958500 }, { "epoch": 1.83, "learning_rate": 4.543368746760967e-05, "loss": 2.6162, "step": 959000 }, { "epoch": 1.83, "learning_rate": 4.5431305825686674e-05, "loss": 2.6358, "step": 959500 }, { "epoch": 1.83, "learning_rate": 4.542892418376368e-05, "loss": 2.6278, "step": 960000 }, { "epoch": 1.83, "eval_accuracy": 0.5241299501262516, "eval_loss": 2.5304858684539795, "eval_runtime": 4203.7069, "eval_samples_per_second": 65.416, "eval_steps_per_second": 6.542, "step": 960000 }, { "epoch": 1.83, "learning_rate": 4.5426542541840686e-05, "loss": 2.6331, "step": 960500 }, { "epoch": 1.83, "learning_rate": 4.5424160899917695e-05, "loss": 2.6423, "step": 961000 }, { "epoch": 1.83, "learning_rate": 4.54217792579947e-05, "loss": 2.6397, "step": 961500 }, { "epoch": 1.83, "learning_rate": 4.54193976160717e-05, "loss": 2.6339, "step": 962000 }, { "epoch": 1.83, "learning_rate": 4.541701597414871e-05, "loss": 2.6388, "step": 962500 }, { "epoch": 1.83, "learning_rate": 4.541463909550956e-05, "loss": 2.6294, "step": 963000 }, { "epoch": 1.84, "learning_rate": 4.5412257453586565e-05, "loss": 2.635, "step": 963500 }, { "epoch": 1.84, "learning_rate": 4.5409875811663574e-05, "loss": 2.6219, "step": 964000 }, { "epoch": 1.84, "learning_rate": 4.540749416974057e-05, "loss": 2.6278, "step": 964500 }, { "epoch": 1.84, "learning_rate": 4.540511252781758e-05, "loss": 2.6343, "step": 965000 }, { "epoch": 1.84, "learning_rate": 4.540273564917843e-05, "loss": 2.6143, "step": 965500 }, { "epoch": 1.84, "learning_rate": 4.5400354007255434e-05, "loss": 2.632, "step": 966000 }, { "epoch": 1.84, "learning_rate": 4.5397972365332444e-05, "loss": 2.623, "step": 966500 }, { "epoch": 1.84, "learning_rate": 4.5395590723409446e-05, "loss": 2.6286, "step": 967000 }, { "epoch": 1.84, "learning_rate": 4.53932138447703e-05, "loss": 2.6296, "step": 967500 }, { "epoch": 1.84, "learning_rate": 4.539083220284731e-05, "loss": 2.6275, "step": 968000 }, { "epoch": 1.85, "learning_rate": 4.5388455324208153e-05, "loss": 2.6296, "step": 968500 }, { "epoch": 1.85, "learning_rate": 4.5386073682285156e-05, "loss": 2.6378, "step": 969000 }, { "epoch": 1.85, "learning_rate": 4.5383692040362165e-05, "loss": 2.6431, "step": 969500 }, { "epoch": 1.85, "learning_rate": 4.538131039843917e-05, "loss": 2.622, "step": 970000 }, { "epoch": 1.85, "learning_rate": 4.537892875651618e-05, "loss": 2.634, "step": 970500 }, { "epoch": 1.85, "learning_rate": 4.537655187787703e-05, "loss": 2.6162, "step": 971000 }, { "epoch": 1.85, "learning_rate": 4.537417023595403e-05, "loss": 2.6288, "step": 971500 }, { "epoch": 1.85, "learning_rate": 4.5371788594031035e-05, "loss": 2.6521, "step": 972000 }, { "epoch": 1.85, "learning_rate": 4.536940695210804e-05, "loss": 2.6383, "step": 972500 }, { "epoch": 1.85, "learning_rate": 4.536702531018505e-05, "loss": 2.6606, "step": 973000 }, { "epoch": 1.85, "learning_rate": 4.536464366826205e-05, "loss": 2.6421, "step": 973500 }, { "epoch": 1.86, "learning_rate": 4.536226202633906e-05, "loss": 2.6342, "step": 974000 }, { "epoch": 1.86, "learning_rate": 4.535988038441606e-05, "loss": 2.6156, "step": 974500 }, { "epoch": 1.86, "learning_rate": 4.5357498742493064e-05, "loss": 2.6379, "step": 975000 }, { "epoch": 1.86, "learning_rate": 4.535512186385392e-05, "loss": 2.6443, "step": 975500 }, { "epoch": 1.86, "learning_rate": 4.5352740221930926e-05, "loss": 2.6298, "step": 976000 }, { "epoch": 1.86, "learning_rate": 4.535035858000793e-05, "loss": 2.6294, "step": 976500 }, { "epoch": 1.86, "learning_rate": 4.534798170136878e-05, "loss": 2.6162, "step": 977000 }, { "epoch": 1.86, "learning_rate": 4.5345600059445784e-05, "loss": 2.6377, "step": 977500 }, { "epoch": 1.86, "learning_rate": 4.5343218417522786e-05, "loss": 2.638, "step": 978000 }, { "epoch": 1.86, "learning_rate": 4.5340836775599796e-05, "loss": 2.6345, "step": 978500 }, { "epoch": 1.87, "learning_rate": 4.53384551336768e-05, "loss": 2.6403, "step": 979000 }, { "epoch": 1.87, "learning_rate": 4.533607349175381e-05, "loss": 2.6391, "step": 979500 }, { "epoch": 1.87, "learning_rate": 4.533369184983081e-05, "loss": 2.6254, "step": 980000 }, { "epoch": 1.87, "learning_rate": 4.533131020790782e-05, "loss": 2.6248, "step": 980500 }, { "epoch": 1.87, "learning_rate": 4.532893332926867e-05, "loss": 2.6347, "step": 981000 }, { "epoch": 1.87, "learning_rate": 4.532655168734567e-05, "loss": 2.6175, "step": 981500 }, { "epoch": 1.87, "learning_rate": 4.532417004542268e-05, "loss": 2.6098, "step": 982000 }, { "epoch": 1.87, "learning_rate": 4.532178840349968e-05, "loss": 2.6245, "step": 982500 }, { "epoch": 1.87, "learning_rate": 4.531940676157669e-05, "loss": 2.6215, "step": 983000 }, { "epoch": 1.87, "learning_rate": 4.531702988293754e-05, "loss": 2.6236, "step": 983500 }, { "epoch": 1.87, "learning_rate": 4.5314648241014544e-05, "loss": 2.6306, "step": 984000 }, { "epoch": 1.88, "learning_rate": 4.531226659909155e-05, "loss": 2.6265, "step": 984500 }, { "epoch": 1.88, "learning_rate": 4.530988495716855e-05, "loss": 2.6218, "step": 985000 }, { "epoch": 1.88, "learning_rate": 4.53075080785294e-05, "loss": 2.6099, "step": 985500 }, { "epoch": 1.88, "learning_rate": 4.530512643660641e-05, "loss": 2.6415, "step": 986000 }, { "epoch": 1.88, "learning_rate": 4.5302744794683414e-05, "loss": 2.6352, "step": 986500 }, { "epoch": 1.88, "learning_rate": 4.530036315276042e-05, "loss": 2.6388, "step": 987000 }, { "epoch": 1.88, "learning_rate": 4.5297981510837426e-05, "loss": 2.6288, "step": 987500 }, { "epoch": 1.88, "learning_rate": 4.529559986891443e-05, "loss": 2.6311, "step": 988000 }, { "epoch": 1.88, "learning_rate": 4.529322299027528e-05, "loss": 2.6368, "step": 988500 }, { "epoch": 1.88, "learning_rate": 4.529084134835228e-05, "loss": 2.637, "step": 989000 }, { "epoch": 1.89, "learning_rate": 4.528845970642929e-05, "loss": 2.6381, "step": 989500 }, { "epoch": 1.89, "learning_rate": 4.52860780645063e-05, "loss": 2.6433, "step": 990000 }, { "epoch": 1.89, "eval_accuracy": 0.5244361680561594, "eval_loss": 2.5283498764038086, "eval_runtime": 4200.8134, "eval_samples_per_second": 65.461, "eval_steps_per_second": 6.546, "step": 990000 }, { "epoch": 1.89, "learning_rate": 4.528370118586715e-05, "loss": 2.6137, "step": 990500 }, { "epoch": 1.89, "learning_rate": 4.528131954394416e-05, "loss": 2.6421, "step": 991000 }, { "epoch": 1.89, "learning_rate": 4.527893790202116e-05, "loss": 2.6384, "step": 991500 }, { "epoch": 1.89, "learning_rate": 4.527655626009816e-05, "loss": 2.6138, "step": 992000 }, { "epoch": 1.89, "learning_rate": 4.527417461817517e-05, "loss": 2.6331, "step": 992500 }, { "epoch": 1.89, "learning_rate": 4.5271792976252174e-05, "loss": 2.6217, "step": 993000 }, { "epoch": 1.89, "learning_rate": 4.5269411334329184e-05, "loss": 2.6306, "step": 993500 }, { "epoch": 1.89, "learning_rate": 4.5267034455690036e-05, "loss": 2.6403, "step": 994000 }, { "epoch": 1.89, "learning_rate": 4.526465281376703e-05, "loss": 2.6132, "step": 994500 }, { "epoch": 1.9, "learning_rate": 4.526227117184404e-05, "loss": 2.6089, "step": 995000 }, { "epoch": 1.9, "learning_rate": 4.5259894293204894e-05, "loss": 2.6177, "step": 995500 }, { "epoch": 1.9, "learning_rate": 4.5257512651281896e-05, "loss": 2.6329, "step": 996000 }, { "epoch": 1.9, "learning_rate": 4.525513577264275e-05, "loss": 2.6064, "step": 996500 }, { "epoch": 1.9, "learning_rate": 4.525275413071975e-05, "loss": 2.6247, "step": 997000 }, { "epoch": 1.9, "learning_rate": 4.525037248879676e-05, "loss": 2.6344, "step": 997500 }, { "epoch": 1.9, "learning_rate": 4.524799084687376e-05, "loss": 2.6296, "step": 998000 }, { "epoch": 1.9, "learning_rate": 4.5245609204950766e-05, "loss": 2.6273, "step": 998500 }, { "epoch": 1.9, "learning_rate": 4.5243227563027775e-05, "loss": 2.6268, "step": 999000 }, { "epoch": 1.9, "learning_rate": 4.524084592110478e-05, "loss": 2.6213, "step": 999500 }, { "epoch": 1.91, "learning_rate": 4.523846427918179e-05, "loss": 2.6282, "step": 1000000 }, { "epoch": 1.91, "learning_rate": 4.523608263725879e-05, "loss": 2.6244, "step": 1000500 }, { "epoch": 1.91, "learning_rate": 4.523370099533579e-05, "loss": 2.6336, "step": 1001000 }, { "epoch": 1.91, "learning_rate": 4.52313193534128e-05, "loss": 2.6181, "step": 1001500 }, { "epoch": 1.91, "learning_rate": 4.5228937711489804e-05, "loss": 2.6157, "step": 1002000 }, { "epoch": 1.91, "learning_rate": 4.5226556069566814e-05, "loss": 2.6362, "step": 1002500 }, { "epoch": 1.91, "learning_rate": 4.522417919092766e-05, "loss": 2.633, "step": 1003000 }, { "epoch": 1.91, "learning_rate": 4.522179754900467e-05, "loss": 2.6345, "step": 1003500 }, { "epoch": 1.91, "learning_rate": 4.521942067036552e-05, "loss": 2.6257, "step": 1004000 }, { "epoch": 1.91, "learning_rate": 4.5217039028442524e-05, "loss": 2.628, "step": 1004500 }, { "epoch": 1.91, "learning_rate": 4.5214657386519526e-05, "loss": 2.6284, "step": 1005000 }, { "epoch": 1.92, "learning_rate": 4.5212275744596536e-05, "loss": 2.6479, "step": 1005500 }, { "epoch": 1.92, "learning_rate": 4.520989410267354e-05, "loss": 2.6278, "step": 1006000 }, { "epoch": 1.92, "learning_rate": 4.520751246075055e-05, "loss": 2.6266, "step": 1006500 }, { "epoch": 1.92, "learning_rate": 4.5205130818827544e-05, "loss": 2.6381, "step": 1007000 }, { "epoch": 1.92, "learning_rate": 4.520274917690455e-05, "loss": 2.6162, "step": 1007500 }, { "epoch": 1.92, "learning_rate": 4.5200372298265405e-05, "loss": 2.632, "step": 1008000 }, { "epoch": 1.92, "learning_rate": 4.519799065634241e-05, "loss": 2.6316, "step": 1008500 }, { "epoch": 1.92, "learning_rate": 4.519560901441942e-05, "loss": 2.6234, "step": 1009000 }, { "epoch": 1.92, "learning_rate": 4.519322737249642e-05, "loss": 2.6309, "step": 1009500 }, { "epoch": 1.92, "learning_rate": 4.519084573057343e-05, "loss": 2.6234, "step": 1010000 }, { "epoch": 1.93, "learning_rate": 4.5188464088650425e-05, "loss": 2.6156, "step": 1010500 }, { "epoch": 1.93, "learning_rate": 4.5186082446727435e-05, "loss": 2.6501, "step": 1011000 }, { "epoch": 1.93, "learning_rate": 4.5183700804804444e-05, "loss": 2.6232, "step": 1011500 }, { "epoch": 1.93, "learning_rate": 4.518132392616529e-05, "loss": 2.6276, "step": 1012000 }, { "epoch": 1.93, "learning_rate": 4.51789422842423e-05, "loss": 2.6071, "step": 1012500 }, { "epoch": 1.93, "learning_rate": 4.51765606423193e-05, "loss": 2.6131, "step": 1013000 }, { "epoch": 1.93, "learning_rate": 4.5174179000396304e-05, "loss": 2.6189, "step": 1013500 }, { "epoch": 1.93, "learning_rate": 4.5171802121757156e-05, "loss": 2.6153, "step": 1014000 }, { "epoch": 1.93, "learning_rate": 4.5169420479834166e-05, "loss": 2.6318, "step": 1014500 }, { "epoch": 1.93, "learning_rate": 4.516703883791117e-05, "loss": 2.6084, "step": 1015000 }, { "epoch": 1.93, "learning_rate": 4.516465719598818e-05, "loss": 2.6389, "step": 1015500 }, { "epoch": 1.94, "learning_rate": 4.516227555406518e-05, "loss": 2.6154, "step": 1016000 }, { "epoch": 1.94, "learning_rate": 4.515989391214218e-05, "loss": 2.6239, "step": 1016500 }, { "epoch": 1.94, "learning_rate": 4.5157517033503035e-05, "loss": 2.6197, "step": 1017000 }, { "epoch": 1.94, "learning_rate": 4.515513539158004e-05, "loss": 2.6274, "step": 1017500 }, { "epoch": 1.94, "learning_rate": 4.515275374965705e-05, "loss": 2.6396, "step": 1018000 }, { "epoch": 1.94, "learning_rate": 4.515037210773405e-05, "loss": 2.6043, "step": 1018500 }, { "epoch": 1.94, "learning_rate": 4.51479952290949e-05, "loss": 2.6376, "step": 1019000 }, { "epoch": 1.94, "learning_rate": 4.514561358717191e-05, "loss": 2.6011, "step": 1019500 }, { "epoch": 1.94, "learning_rate": 4.514323670853276e-05, "loss": 2.6028, "step": 1020000 }, { "epoch": 1.94, "eval_accuracy": 0.5249437668521527, "eval_loss": 2.5246870517730713, "eval_runtime": 4198.1718, "eval_samples_per_second": 65.502, "eval_steps_per_second": 6.55, "step": 1020000 }, { "epoch": 1.94, "learning_rate": 4.514085506660976e-05, "loss": 2.6317, "step": 1020500 }, { "epoch": 1.95, "learning_rate": 4.513847342468677e-05, "loss": 2.6311, "step": 1021000 }, { "epoch": 1.95, "learning_rate": 4.513609178276377e-05, "loss": 2.6263, "step": 1021500 }, { "epoch": 1.95, "learning_rate": 4.513371014084078e-05, "loss": 2.6341, "step": 1022000 }, { "epoch": 1.95, "learning_rate": 4.5131328498917784e-05, "loss": 2.6188, "step": 1022500 }, { "epoch": 1.95, "learning_rate": 4.512894685699479e-05, "loss": 2.6322, "step": 1023000 }, { "epoch": 1.95, "learning_rate": 4.512656521507179e-05, "loss": 2.6375, "step": 1023500 }, { "epoch": 1.95, "learning_rate": 4.51241835731488e-05, "loss": 2.6082, "step": 1024000 }, { "epoch": 1.95, "learning_rate": 4.5121811457793494e-05, "loss": 2.6126, "step": 1024500 }, { "epoch": 1.95, "learning_rate": 4.51194298158705e-05, "loss": 2.6295, "step": 1025000 }, { "epoch": 1.95, "learning_rate": 4.5117048173947506e-05, "loss": 2.6116, "step": 1025500 }, { "epoch": 1.95, "learning_rate": 4.5114666532024515e-05, "loss": 2.6259, "step": 1026000 }, { "epoch": 1.96, "learning_rate": 4.511228489010152e-05, "loss": 2.6274, "step": 1026500 }, { "epoch": 1.96, "learning_rate": 4.510990324817852e-05, "loss": 2.6217, "step": 1027000 }, { "epoch": 1.96, "learning_rate": 4.510752160625552e-05, "loss": 2.6215, "step": 1027500 }, { "epoch": 1.96, "learning_rate": 4.510513996433253e-05, "loss": 2.6092, "step": 1028000 }, { "epoch": 1.96, "learning_rate": 4.5102758322409535e-05, "loss": 2.6271, "step": 1028500 }, { "epoch": 1.96, "learning_rate": 4.510038144377039e-05, "loss": 2.6387, "step": 1029000 }, { "epoch": 1.96, "learning_rate": 4.50979998018474e-05, "loss": 2.646, "step": 1029500 }, { "epoch": 1.96, "learning_rate": 4.50956181599244e-05, "loss": 2.6272, "step": 1030000 }, { "epoch": 1.96, "learning_rate": 4.50932365180014e-05, "loss": 2.6287, "step": 1030500 }, { "epoch": 1.96, "learning_rate": 4.509085487607841e-05, "loss": 2.6333, "step": 1031000 }, { "epoch": 1.97, "learning_rate": 4.508847799743926e-05, "loss": 2.616, "step": 1031500 }, { "epoch": 1.97, "learning_rate": 4.5086096355516266e-05, "loss": 2.6321, "step": 1032000 }, { "epoch": 1.97, "learning_rate": 4.5083714713593276e-05, "loss": 2.6153, "step": 1032500 }, { "epoch": 1.97, "learning_rate": 4.508133307167028e-05, "loss": 2.6294, "step": 1033000 }, { "epoch": 1.97, "learning_rate": 4.507895619303113e-05, "loss": 2.6412, "step": 1033500 }, { "epoch": 1.97, "learning_rate": 4.507657455110813e-05, "loss": 2.6209, "step": 1034000 }, { "epoch": 1.97, "learning_rate": 4.5074192909185136e-05, "loss": 2.6359, "step": 1034500 }, { "epoch": 1.97, "learning_rate": 4.5071811267262145e-05, "loss": 2.6144, "step": 1035000 }, { "epoch": 1.97, "learning_rate": 4.506942962533915e-05, "loss": 2.6167, "step": 1035500 }, { "epoch": 1.97, "learning_rate": 4.506704798341616e-05, "loss": 2.6233, "step": 1036000 }, { "epoch": 1.97, "learning_rate": 4.506466634149315e-05, "loss": 2.6345, "step": 1036500 }, { "epoch": 1.98, "learning_rate": 4.506228469957016e-05, "loss": 2.6339, "step": 1037000 }, { "epoch": 1.98, "learning_rate": 4.505991258421486e-05, "loss": 2.6257, "step": 1037500 }, { "epoch": 1.98, "learning_rate": 4.505753094229187e-05, "loss": 2.6044, "step": 1038000 }, { "epoch": 1.98, "learning_rate": 4.505514930036887e-05, "loss": 2.6254, "step": 1038500 }, { "epoch": 1.98, "learning_rate": 4.505276765844588e-05, "loss": 2.6187, "step": 1039000 }, { "epoch": 1.98, "learning_rate": 4.505038601652288e-05, "loss": 2.6373, "step": 1039500 }, { "epoch": 1.98, "learning_rate": 4.5048009137883734e-05, "loss": 2.6356, "step": 1040000 }, { "epoch": 1.98, "learning_rate": 4.504562749596074e-05, "loss": 2.6051, "step": 1040500 }, { "epoch": 1.98, "learning_rate": 4.504324585403774e-05, "loss": 2.6175, "step": 1041000 }, { "epoch": 1.98, "learning_rate": 4.504086421211475e-05, "loss": 2.612, "step": 1041500 }, { "epoch": 1.99, "learning_rate": 4.503848257019175e-05, "loss": 2.6162, "step": 1042000 }, { "epoch": 1.99, "learning_rate": 4.503610092826876e-05, "loss": 2.6032, "step": 1042500 }, { "epoch": 1.99, "learning_rate": 4.503372404962961e-05, "loss": 2.6372, "step": 1043000 }, { "epoch": 1.99, "learning_rate": 4.5031342407706616e-05, "loss": 2.6086, "step": 1043500 }, { "epoch": 1.99, "learning_rate": 4.502896076578362e-05, "loss": 2.61, "step": 1044000 }, { "epoch": 1.99, "learning_rate": 4.502657912386062e-05, "loss": 2.5972, "step": 1044500 }, { "epoch": 1.99, "learning_rate": 4.502419748193763e-05, "loss": 2.6357, "step": 1045000 }, { "epoch": 1.99, "learning_rate": 4.502182060329848e-05, "loss": 2.6193, "step": 1045500 }, { "epoch": 1.99, "learning_rate": 4.5019438961375485e-05, "loss": 2.6039, "step": 1046000 }, { "epoch": 1.99, "learning_rate": 4.5017057319452495e-05, "loss": 2.6185, "step": 1046500 }, { "epoch": 1.99, "learning_rate": 4.50146756775295e-05, "loss": 2.6033, "step": 1047000 }, { "epoch": 2.0, "learning_rate": 4.501229879889034e-05, "loss": 2.6482, "step": 1047500 }, { "epoch": 2.0, "learning_rate": 4.500991715696735e-05, "loss": 2.628, "step": 1048000 }, { "epoch": 2.0, "learning_rate": 4.5007535515044355e-05, "loss": 2.649, "step": 1048500 }, { "epoch": 2.0, "learning_rate": 4.5005153873121364e-05, "loss": 2.6317, "step": 1049000 }, { "epoch": 2.0, "learning_rate": 4.500277223119837e-05, "loss": 2.6219, "step": 1049500 }, { "epoch": 2.0, "learning_rate": 4.500039058927537e-05, "loss": 2.6248, "step": 1050000 }, { "epoch": 2.0, "eval_accuracy": 0.5254876704403886, "eval_loss": 2.5208752155303955, "eval_runtime": 4200.5399, "eval_samples_per_second": 65.465, "eval_steps_per_second": 6.547, "step": 1050000 }, { "epoch": 2.0, "learning_rate": 4.499800894735238e-05, "loss": 2.614, "step": 1050500 }, { "epoch": 2.0, "learning_rate": 4.499563206871323e-05, "loss": 2.617, "step": 1051000 }, { "epoch": 2.0, "learning_rate": 4.4993250426790234e-05, "loss": 2.6103, "step": 1051500 }, { "epoch": 2.0, "learning_rate": 4.499086878486724e-05, "loss": 2.6247, "step": 1052000 }, { "epoch": 2.01, "learning_rate": 4.4988487142944246e-05, "loss": 2.6114, "step": 1052500 }, { "epoch": 2.01, "learning_rate": 4.4986105501021255e-05, "loss": 2.6185, "step": 1053000 }, { "epoch": 2.01, "learning_rate": 4.49837286223821e-05, "loss": 2.6109, "step": 1053500 }, { "epoch": 2.01, "learning_rate": 4.4981346980459103e-05, "loss": 2.602, "step": 1054000 }, { "epoch": 2.01, "learning_rate": 4.4978970101819956e-05, "loss": 2.6137, "step": 1054500 }, { "epoch": 2.01, "learning_rate": 4.4976588459896965e-05, "loss": 2.6207, "step": 1055000 }, { "epoch": 2.01, "learning_rate": 4.497420681797397e-05, "loss": 2.6138, "step": 1055500 }, { "epoch": 2.01, "learning_rate": 4.497182517605098e-05, "loss": 2.628, "step": 1056000 }, { "epoch": 2.01, "learning_rate": 4.496944353412798e-05, "loss": 2.6151, "step": 1056500 }, { "epoch": 2.01, "learning_rate": 4.496706189220498e-05, "loss": 2.6099, "step": 1057000 }, { "epoch": 2.01, "learning_rate": 4.4964680250281985e-05, "loss": 2.6224, "step": 1057500 }, { "epoch": 2.02, "learning_rate": 4.4962298608358994e-05, "loss": 2.6171, "step": 1058000 }, { "epoch": 2.02, "learning_rate": 4.4959916966436e-05, "loss": 2.6006, "step": 1058500 }, { "epoch": 2.02, "learning_rate": 4.4957535324513006e-05, "loss": 2.613, "step": 1059000 }, { "epoch": 2.02, "learning_rate": 4.495515368259001e-05, "loss": 2.6094, "step": 1059500 }, { "epoch": 2.02, "learning_rate": 4.495277204066701e-05, "loss": 2.5872, "step": 1060000 }, { "epoch": 2.02, "learning_rate": 4.495039992531171e-05, "loss": 2.6081, "step": 1060500 }, { "epoch": 2.02, "learning_rate": 4.4948018283388716e-05, "loss": 2.597, "step": 1061000 }, { "epoch": 2.02, "learning_rate": 4.494563664146572e-05, "loss": 2.6283, "step": 1061500 }, { "epoch": 2.02, "learning_rate": 4.494325499954273e-05, "loss": 2.6257, "step": 1062000 }, { "epoch": 2.02, "learning_rate": 4.494087812090358e-05, "loss": 2.6153, "step": 1062500 }, { "epoch": 2.03, "learning_rate": 4.493849647898058e-05, "loss": 2.6132, "step": 1063000 }, { "epoch": 2.03, "learning_rate": 4.4936114837057586e-05, "loss": 2.6157, "step": 1063500 }, { "epoch": 2.03, "learning_rate": 4.493373795841844e-05, "loss": 2.6044, "step": 1064000 }, { "epoch": 2.03, "learning_rate": 4.493135631649544e-05, "loss": 2.6162, "step": 1064500 }, { "epoch": 2.03, "learning_rate": 4.492897943785629e-05, "loss": 2.6207, "step": 1065000 }, { "epoch": 2.03, "learning_rate": 4.49265977959333e-05, "loss": 2.594, "step": 1065500 }, { "epoch": 2.03, "learning_rate": 4.4924216154010305e-05, "loss": 2.6134, "step": 1066000 }, { "epoch": 2.03, "learning_rate": 4.4921834512087314e-05, "loss": 2.6276, "step": 1066500 }, { "epoch": 2.03, "learning_rate": 4.491945287016432e-05, "loss": 2.6283, "step": 1067000 }, { "epoch": 2.03, "learning_rate": 4.491707122824132e-05, "loss": 2.6027, "step": 1067500 }, { "epoch": 2.03, "learning_rate": 4.491468958631833e-05, "loss": 2.6142, "step": 1068000 }, { "epoch": 2.04, "learning_rate": 4.491230794439533e-05, "loss": 2.6184, "step": 1068500 }, { "epoch": 2.04, "learning_rate": 4.490992630247234e-05, "loss": 2.607, "step": 1069000 }, { "epoch": 2.04, "learning_rate": 4.4907544660549344e-05, "loss": 2.6116, "step": 1069500 }, { "epoch": 2.04, "learning_rate": 4.4905163018626346e-05, "loss": 2.633, "step": 1070000 }, { "epoch": 2.04, "learning_rate": 4.490278137670335e-05, "loss": 2.6096, "step": 1070500 }, { "epoch": 2.04, "learning_rate": 4.490039973478036e-05, "loss": 2.5982, "step": 1071000 }, { "epoch": 2.04, "learning_rate": 4.489801809285736e-05, "loss": 2.6097, "step": 1071500 }, { "epoch": 2.04, "learning_rate": 4.489564121421821e-05, "loss": 2.6036, "step": 1072000 }, { "epoch": 2.04, "learning_rate": 4.489325957229522e-05, "loss": 2.6269, "step": 1072500 }, { "epoch": 2.04, "learning_rate": 4.489087793037222e-05, "loss": 2.6002, "step": 1073000 }, { "epoch": 2.05, "learning_rate": 4.488849628844923e-05, "loss": 2.6166, "step": 1073500 }, { "epoch": 2.05, "learning_rate": 4.488611464652623e-05, "loss": 2.6277, "step": 1074000 }, { "epoch": 2.05, "learning_rate": 4.488373776788708e-05, "loss": 2.6133, "step": 1074500 }, { "epoch": 2.05, "learning_rate": 4.488135612596409e-05, "loss": 2.6247, "step": 1075000 }, { "epoch": 2.05, "learning_rate": 4.4878974484041095e-05, "loss": 2.5959, "step": 1075500 }, { "epoch": 2.05, "learning_rate": 4.4876592842118104e-05, "loss": 2.6096, "step": 1076000 }, { "epoch": 2.05, "learning_rate": 4.487421596347896e-05, "loss": 2.6162, "step": 1076500 }, { "epoch": 2.05, "learning_rate": 4.487183432155595e-05, "loss": 2.6118, "step": 1077000 }, { "epoch": 2.05, "learning_rate": 4.486945267963296e-05, "loss": 2.6065, "step": 1077500 }, { "epoch": 2.05, "learning_rate": 4.4867071037709965e-05, "loss": 2.5935, "step": 1078000 }, { "epoch": 2.05, "learning_rate": 4.4864689395786974e-05, "loss": 2.5925, "step": 1078500 }, { "epoch": 2.06, "learning_rate": 4.4862312517147826e-05, "loss": 2.6108, "step": 1079000 }, { "epoch": 2.06, "learning_rate": 4.485993087522483e-05, "loss": 2.6199, "step": 1079500 }, { "epoch": 2.06, "learning_rate": 4.485754923330183e-05, "loss": 2.6225, "step": 1080000 }, { "epoch": 2.06, "eval_accuracy": 0.5255543862281893, "eval_loss": 2.5183653831481934, "eval_runtime": 4202.0372, "eval_samples_per_second": 65.442, "eval_steps_per_second": 6.544, "step": 1080000 }, { "epoch": 2.06, "learning_rate": 4.485516759137884e-05, "loss": 2.6323, "step": 1080500 }, { "epoch": 2.06, "learning_rate": 4.4852790712739686e-05, "loss": 2.6089, "step": 1081000 }, { "epoch": 2.06, "learning_rate": 4.4850409070816696e-05, "loss": 2.6073, "step": 1081500 }, { "epoch": 2.06, "learning_rate": 4.4848027428893705e-05, "loss": 2.6202, "step": 1082000 }, { "epoch": 2.06, "learning_rate": 4.484564578697071e-05, "loss": 2.6098, "step": 1082500 }, { "epoch": 2.06, "learning_rate": 4.484326414504771e-05, "loss": 2.6065, "step": 1083000 }, { "epoch": 2.06, "learning_rate": 4.484088726640856e-05, "loss": 2.6189, "step": 1083500 }, { "epoch": 2.07, "learning_rate": 4.4838505624485565e-05, "loss": 2.6164, "step": 1084000 }, { "epoch": 2.07, "learning_rate": 4.4836123982562575e-05, "loss": 2.6166, "step": 1084500 }, { "epoch": 2.07, "learning_rate": 4.483374234063958e-05, "loss": 2.6193, "step": 1085000 }, { "epoch": 2.07, "learning_rate": 4.483136069871659e-05, "loss": 2.6067, "step": 1085500 }, { "epoch": 2.07, "learning_rate": 4.482898382007744e-05, "loss": 2.5976, "step": 1086000 }, { "epoch": 2.07, "learning_rate": 4.4826602178154435e-05, "loss": 2.619, "step": 1086500 }, { "epoch": 2.07, "learning_rate": 4.4824220536231444e-05, "loss": 2.6299, "step": 1087000 }, { "epoch": 2.07, "learning_rate": 4.482183889430845e-05, "loss": 2.6204, "step": 1087500 }, { "epoch": 2.07, "learning_rate": 4.4819457252385456e-05, "loss": 2.6094, "step": 1088000 }, { "epoch": 2.07, "learning_rate": 4.481707561046246e-05, "loss": 2.6219, "step": 1088500 }, { "epoch": 2.07, "learning_rate": 4.481469873182331e-05, "loss": 2.6138, "step": 1089000 }, { "epoch": 2.08, "learning_rate": 4.481231708990032e-05, "loss": 2.6201, "step": 1089500 }, { "epoch": 2.08, "learning_rate": 4.4809935447977316e-05, "loss": 2.6059, "step": 1090000 }, { "epoch": 2.08, "learning_rate": 4.4807553806054326e-05, "loss": 2.6228, "step": 1090500 }, { "epoch": 2.08, "learning_rate": 4.480517216413133e-05, "loss": 2.6292, "step": 1091000 }, { "epoch": 2.08, "learning_rate": 4.480279528549218e-05, "loss": 2.598, "step": 1091500 }, { "epoch": 2.08, "learning_rate": 4.480041840685303e-05, "loss": 2.5898, "step": 1092000 }, { "epoch": 2.08, "learning_rate": 4.479803676493004e-05, "loss": 2.6154, "step": 1092500 }, { "epoch": 2.08, "learning_rate": 4.4795655123007045e-05, "loss": 2.623, "step": 1093000 }, { "epoch": 2.08, "learning_rate": 4.479327348108405e-05, "loss": 2.598, "step": 1093500 }, { "epoch": 2.08, "learning_rate": 4.479089183916105e-05, "loss": 2.6133, "step": 1094000 }, { "epoch": 2.09, "learning_rate": 4.478851019723806e-05, "loss": 2.6149, "step": 1094500 }, { "epoch": 2.09, "learning_rate": 4.478612855531506e-05, "loss": 2.6121, "step": 1095000 }, { "epoch": 2.09, "learning_rate": 4.478374691339207e-05, "loss": 2.6161, "step": 1095500 }, { "epoch": 2.09, "learning_rate": 4.4781370034752924e-05, "loss": 2.6134, "step": 1096000 }, { "epoch": 2.09, "learning_rate": 4.477898839282993e-05, "loss": 2.6023, "step": 1096500 }, { "epoch": 2.09, "learning_rate": 4.477660675090693e-05, "loss": 2.6182, "step": 1097000 }, { "epoch": 2.09, "learning_rate": 4.477422510898394e-05, "loss": 2.6058, "step": 1097500 }, { "epoch": 2.09, "learning_rate": 4.477184346706094e-05, "loss": 2.5966, "step": 1098000 }, { "epoch": 2.09, "learning_rate": 4.476946182513795e-05, "loss": 2.6153, "step": 1098500 }, { "epoch": 2.09, "learning_rate": 4.4767084946498796e-05, "loss": 2.6176, "step": 1099000 }, { "epoch": 2.09, "learning_rate": 4.4764703304575806e-05, "loss": 2.6074, "step": 1099500 }, { "epoch": 2.1, "learning_rate": 4.476232166265281e-05, "loss": 2.6062, "step": 1100000 }, { "epoch": 2.1, "learning_rate": 4.475994002072981e-05, "loss": 2.614, "step": 1100500 }, { "epoch": 2.1, "learning_rate": 4.475755837880682e-05, "loss": 2.6062, "step": 1101000 }, { "epoch": 2.1, "learning_rate": 4.4755186263451516e-05, "loss": 2.587, "step": 1101500 }, { "epoch": 2.1, "learning_rate": 4.475280462152852e-05, "loss": 2.5996, "step": 1102000 }, { "epoch": 2.1, "learning_rate": 4.475042297960553e-05, "loss": 2.6189, "step": 1102500 }, { "epoch": 2.1, "learning_rate": 4.474804133768254e-05, "loss": 2.6239, "step": 1103000 }, { "epoch": 2.1, "learning_rate": 4.474565969575953e-05, "loss": 2.6336, "step": 1103500 }, { "epoch": 2.1, "learning_rate": 4.474327805383654e-05, "loss": 2.6314, "step": 1104000 }, { "epoch": 2.1, "learning_rate": 4.4740896411913545e-05, "loss": 2.6215, "step": 1104500 }, { "epoch": 2.11, "learning_rate": 4.47385195332744e-05, "loss": 2.6097, "step": 1105000 }, { "epoch": 2.11, "learning_rate": 4.4736137891351407e-05, "loss": 2.6066, "step": 1105500 }, { "epoch": 2.11, "learning_rate": 4.473375624942841e-05, "loss": 2.6, "step": 1106000 }, { "epoch": 2.11, "learning_rate": 4.473137460750541e-05, "loss": 2.6004, "step": 1106500 }, { "epoch": 2.11, "learning_rate": 4.4728992965582414e-05, "loss": 2.6012, "step": 1107000 }, { "epoch": 2.11, "learning_rate": 4.4726611323659424e-05, "loss": 2.6188, "step": 1107500 }, { "epoch": 2.11, "learning_rate": 4.4724229681736426e-05, "loss": 2.6033, "step": 1108000 }, { "epoch": 2.11, "learning_rate": 4.4721848039813436e-05, "loss": 2.5951, "step": 1108500 }, { "epoch": 2.11, "learning_rate": 4.471946639789044e-05, "loss": 2.6096, "step": 1109000 }, { "epoch": 2.11, "learning_rate": 4.471708475596744e-05, "loss": 2.6095, "step": 1109500 }, { "epoch": 2.11, "learning_rate": 4.471470787732829e-05, "loss": 2.6287, "step": 1110000 }, { "epoch": 2.11, "eval_accuracy": 0.5263314609260262, "eval_loss": 2.5154707431793213, "eval_runtime": 4197.5249, "eval_samples_per_second": 65.512, "eval_steps_per_second": 6.551, "step": 1110000 }, { "epoch": 2.12, "learning_rate": 4.47123262354053e-05, "loss": 2.6126, "step": 1110500 }, { "epoch": 2.12, "learning_rate": 4.4709944593482305e-05, "loss": 2.6146, "step": 1111000 }, { "epoch": 2.12, "learning_rate": 4.4707562951559315e-05, "loss": 2.6039, "step": 1111500 }, { "epoch": 2.12, "learning_rate": 4.470518130963632e-05, "loss": 2.6173, "step": 1112000 }, { "epoch": 2.12, "learning_rate": 4.470280443099717e-05, "loss": 2.6149, "step": 1112500 }, { "epoch": 2.12, "learning_rate": 4.470042278907417e-05, "loss": 2.6235, "step": 1113000 }, { "epoch": 2.12, "learning_rate": 4.4698041147151175e-05, "loss": 2.5996, "step": 1113500 }, { "epoch": 2.12, "learning_rate": 4.4695659505228184e-05, "loss": 2.6083, "step": 1114000 }, { "epoch": 2.12, "learning_rate": 4.469327786330519e-05, "loss": 2.6144, "step": 1114500 }, { "epoch": 2.12, "learning_rate": 4.469090098466604e-05, "loss": 2.6192, "step": 1115000 }, { "epoch": 2.13, "learning_rate": 4.468851934274305e-05, "loss": 2.6129, "step": 1115500 }, { "epoch": 2.13, "learning_rate": 4.4686137700820045e-05, "loss": 2.614, "step": 1116000 }, { "epoch": 2.13, "learning_rate": 4.4683756058897054e-05, "loss": 2.6199, "step": 1116500 }, { "epoch": 2.13, "learning_rate": 4.4681379180257906e-05, "loss": 2.6111, "step": 1117000 }, { "epoch": 2.13, "learning_rate": 4.467899753833491e-05, "loss": 2.6202, "step": 1117500 }, { "epoch": 2.13, "learning_rate": 4.467661589641192e-05, "loss": 2.609, "step": 1118000 }, { "epoch": 2.13, "learning_rate": 4.467423425448892e-05, "loss": 2.6149, "step": 1118500 }, { "epoch": 2.13, "learning_rate": 4.467185737584977e-05, "loss": 2.6022, "step": 1119000 }, { "epoch": 2.13, "learning_rate": 4.4669475733926776e-05, "loss": 2.6092, "step": 1119500 }, { "epoch": 2.13, "learning_rate": 4.466709409200378e-05, "loss": 2.615, "step": 1120000 }, { "epoch": 2.13, "learning_rate": 4.466471245008079e-05, "loss": 2.6259, "step": 1120500 }, { "epoch": 2.14, "learning_rate": 4.466233080815779e-05, "loss": 2.6074, "step": 1121000 }, { "epoch": 2.14, "learning_rate": 4.46599491662348e-05, "loss": 2.6198, "step": 1121500 }, { "epoch": 2.14, "learning_rate": 4.46575675243118e-05, "loss": 2.599, "step": 1122000 }, { "epoch": 2.14, "learning_rate": 4.4655185882388805e-05, "loss": 2.6004, "step": 1122500 }, { "epoch": 2.14, "learning_rate": 4.4652804240465815e-05, "loss": 2.611, "step": 1123000 }, { "epoch": 2.14, "learning_rate": 4.465042259854282e-05, "loss": 2.6215, "step": 1123500 }, { "epoch": 2.14, "learning_rate": 4.4648040956619827e-05, "loss": 2.6203, "step": 1124000 }, { "epoch": 2.14, "learning_rate": 4.464565931469683e-05, "loss": 2.5773, "step": 1124500 }, { "epoch": 2.14, "learning_rate": 4.464328243605768e-05, "loss": 2.6121, "step": 1125000 }, { "epoch": 2.14, "learning_rate": 4.4640900794134684e-05, "loss": 2.6155, "step": 1125500 }, { "epoch": 2.15, "learning_rate": 4.463851915221169e-05, "loss": 2.6266, "step": 1126000 }, { "epoch": 2.15, "learning_rate": 4.463614227357254e-05, "loss": 2.5867, "step": 1126500 }, { "epoch": 2.15, "learning_rate": 4.463376063164955e-05, "loss": 2.6069, "step": 1127000 }, { "epoch": 2.15, "learning_rate": 4.463137898972655e-05, "loss": 2.6162, "step": 1127500 }, { "epoch": 2.15, "learning_rate": 4.462899734780356e-05, "loss": 2.599, "step": 1128000 }, { "epoch": 2.15, "learning_rate": 4.462662046916441e-05, "loss": 2.5926, "step": 1128500 }, { "epoch": 2.15, "learning_rate": 4.462423882724141e-05, "loss": 2.6237, "step": 1129000 }, { "epoch": 2.15, "learning_rate": 4.462186194860226e-05, "loss": 2.6179, "step": 1129500 }, { "epoch": 2.15, "learning_rate": 4.461948030667927e-05, "loss": 2.6171, "step": 1130000 }, { "epoch": 2.15, "learning_rate": 4.461709866475627e-05, "loss": 2.6089, "step": 1130500 }, { "epoch": 2.15, "learning_rate": 4.461471702283328e-05, "loss": 2.6111, "step": 1131000 }, { "epoch": 2.16, "learning_rate": 4.4612335380910285e-05, "loss": 2.6163, "step": 1131500 }, { "epoch": 2.16, "learning_rate": 4.4609953738987294e-05, "loss": 2.5838, "step": 1132000 }, { "epoch": 2.16, "learning_rate": 4.460757209706429e-05, "loss": 2.6047, "step": 1132500 }, { "epoch": 2.16, "learning_rate": 4.46051904551413e-05, "loss": 2.5962, "step": 1133000 }, { "epoch": 2.16, "learning_rate": 4.46028088132183e-05, "loss": 2.6229, "step": 1133500 }, { "epoch": 2.16, "learning_rate": 4.4600431934579154e-05, "loss": 2.6165, "step": 1134000 }, { "epoch": 2.16, "learning_rate": 4.4598050292656164e-05, "loss": 2.6039, "step": 1134500 }, { "epoch": 2.16, "learning_rate": 4.4595668650733167e-05, "loss": 2.6211, "step": 1135000 }, { "epoch": 2.16, "learning_rate": 4.459328700881017e-05, "loss": 2.6074, "step": 1135500 }, { "epoch": 2.16, "learning_rate": 4.459091013017102e-05, "loss": 2.6015, "step": 1136000 }, { "epoch": 2.17, "learning_rate": 4.4588528488248024e-05, "loss": 2.6283, "step": 1136500 }, { "epoch": 2.17, "learning_rate": 4.4586146846325033e-05, "loss": 2.5886, "step": 1137000 }, { "epoch": 2.17, "learning_rate": 4.4583765204402036e-05, "loss": 2.6135, "step": 1137500 }, { "epoch": 2.17, "learning_rate": 4.458138832576289e-05, "loss": 2.6001, "step": 1138000 }, { "epoch": 2.17, "learning_rate": 4.45790066838399e-05, "loss": 2.6236, "step": 1138500 }, { "epoch": 2.17, "learning_rate": 4.45766250419169e-05, "loss": 2.6167, "step": 1139000 }, { "epoch": 2.17, "learning_rate": 4.45742433999939e-05, "loss": 2.6049, "step": 1139500 }, { "epoch": 2.17, "learning_rate": 4.457186175807091e-05, "loss": 2.6429, "step": 1140000 }, { "epoch": 2.17, "eval_accuracy": 0.5268307629056977, "eval_loss": 2.5120418071746826, "eval_runtime": 4200.2082, "eval_samples_per_second": 65.47, "eval_steps_per_second": 6.547, "step": 1140000 }, { "epoch": 2.17, "learning_rate": 4.4569480116147915e-05, "loss": 2.6267, "step": 1140500 }, { "epoch": 2.17, "learning_rate": 4.4567098474224924e-05, "loss": 2.6113, "step": 1141000 }, { "epoch": 2.17, "learning_rate": 4.456472159558577e-05, "loss": 2.6226, "step": 1141500 }, { "epoch": 2.18, "learning_rate": 4.456233995366278e-05, "loss": 2.603, "step": 1142000 }, { "epoch": 2.18, "learning_rate": 4.455995831173978e-05, "loss": 2.6032, "step": 1142500 }, { "epoch": 2.18, "learning_rate": 4.4557581433100634e-05, "loss": 2.6347, "step": 1143000 }, { "epoch": 2.18, "learning_rate": 4.455519979117764e-05, "loss": 2.6253, "step": 1143500 }, { "epoch": 2.18, "learning_rate": 4.4552818149254646e-05, "loss": 2.5997, "step": 1144000 }, { "epoch": 2.18, "learning_rate": 4.455043650733165e-05, "loss": 2.6045, "step": 1144500 }, { "epoch": 2.18, "learning_rate": 4.454805486540866e-05, "loss": 2.6016, "step": 1145000 }, { "epoch": 2.18, "learning_rate": 4.4545673223485654e-05, "loss": 2.6205, "step": 1145500 }, { "epoch": 2.18, "learning_rate": 4.4543291581562664e-05, "loss": 2.5943, "step": 1146000 }, { "epoch": 2.18, "learning_rate": 4.4540909939639666e-05, "loss": 2.6038, "step": 1146500 }, { "epoch": 2.19, "learning_rate": 4.453853306100052e-05, "loss": 2.6117, "step": 1147000 }, { "epoch": 2.19, "learning_rate": 4.453615141907753e-05, "loss": 2.6199, "step": 1147500 }, { "epoch": 2.19, "learning_rate": 4.453376977715453e-05, "loss": 2.6213, "step": 1148000 }, { "epoch": 2.19, "learning_rate": 4.453138813523153e-05, "loss": 2.6102, "step": 1148500 }, { "epoch": 2.19, "learning_rate": 4.452900649330854e-05, "loss": 2.5866, "step": 1149000 }, { "epoch": 2.19, "learning_rate": 4.4526624851385545e-05, "loss": 2.6152, "step": 1149500 }, { "epoch": 2.19, "learning_rate": 4.4524243209462555e-05, "loss": 2.6296, "step": 1150000 }, { "epoch": 2.19, "learning_rate": 4.452186156753956e-05, "loss": 2.6216, "step": 1150500 }, { "epoch": 2.19, "learning_rate": 4.451947992561657e-05, "loss": 2.604, "step": 1151000 }, { "epoch": 2.19, "learning_rate": 4.451709828369356e-05, "loss": 2.6302, "step": 1151500 }, { "epoch": 2.19, "learning_rate": 4.451471664177057e-05, "loss": 2.604, "step": 1152000 }, { "epoch": 2.2, "learning_rate": 4.4512334999847574e-05, "loss": 2.6156, "step": 1152500 }, { "epoch": 2.2, "learning_rate": 4.4509962884492276e-05, "loss": 2.6288, "step": 1153000 }, { "epoch": 2.2, "learning_rate": 4.450758124256928e-05, "loss": 2.6099, "step": 1153500 }, { "epoch": 2.2, "learning_rate": 4.450519960064629e-05, "loss": 2.6292, "step": 1154000 }, { "epoch": 2.2, "learning_rate": 4.450281795872329e-05, "loss": 2.6219, "step": 1154500 }, { "epoch": 2.2, "learning_rate": 4.4500436316800294e-05, "loss": 2.6169, "step": 1155000 }, { "epoch": 2.2, "learning_rate": 4.4498054674877296e-05, "loss": 2.6333, "step": 1155500 }, { "epoch": 2.2, "learning_rate": 4.449567779623815e-05, "loss": 2.5992, "step": 1156000 }, { "epoch": 2.2, "learning_rate": 4.449329615431516e-05, "loss": 2.6202, "step": 1156500 }, { "epoch": 2.2, "learning_rate": 4.449091451239216e-05, "loss": 2.586, "step": 1157000 }, { "epoch": 2.21, "learning_rate": 4.448853287046917e-05, "loss": 2.5939, "step": 1157500 }, { "epoch": 2.21, "learning_rate": 4.448615599183002e-05, "loss": 2.6162, "step": 1158000 }, { "epoch": 2.21, "learning_rate": 4.448377434990702e-05, "loss": 2.6169, "step": 1158500 }, { "epoch": 2.21, "learning_rate": 4.448139270798403e-05, "loss": 2.6173, "step": 1159000 }, { "epoch": 2.21, "learning_rate": 4.447901106606103e-05, "loss": 2.6103, "step": 1159500 }, { "epoch": 2.21, "learning_rate": 4.447662942413804e-05, "loss": 2.6191, "step": 1160000 }, { "epoch": 2.21, "learning_rate": 4.447424778221504e-05, "loss": 2.6105, "step": 1160500 }, { "epoch": 2.21, "learning_rate": 4.447186614029205e-05, "loss": 2.6158, "step": 1161000 }, { "epoch": 2.21, "learning_rate": 4.4469489261652904e-05, "loss": 2.6154, "step": 1161500 }, { "epoch": 2.21, "learning_rate": 4.44671076197299e-05, "loss": 2.6032, "step": 1162000 }, { "epoch": 2.21, "learning_rate": 4.446473074109075e-05, "loss": 2.6252, "step": 1162500 }, { "epoch": 2.22, "learning_rate": 4.446234909916776e-05, "loss": 2.6174, "step": 1163000 }, { "epoch": 2.22, "learning_rate": 4.4459967457244764e-05, "loss": 2.6171, "step": 1163500 }, { "epoch": 2.22, "learning_rate": 4.4457585815321774e-05, "loss": 2.6182, "step": 1164000 }, { "epoch": 2.22, "learning_rate": 4.4455204173398776e-05, "loss": 2.6108, "step": 1164500 }, { "epoch": 2.22, "learning_rate": 4.445282253147578e-05, "loss": 2.6228, "step": 1165000 }, { "epoch": 2.22, "learning_rate": 4.445044088955279e-05, "loss": 2.6159, "step": 1165500 }, { "epoch": 2.22, "learning_rate": 4.444805924762979e-05, "loss": 2.625, "step": 1166000 }, { "epoch": 2.22, "learning_rate": 4.44456776057068e-05, "loss": 2.6085, "step": 1166500 }, { "epoch": 2.22, "learning_rate": 4.44432959637838e-05, "loss": 2.6087, "step": 1167000 }, { "epoch": 2.22, "learning_rate": 4.44409238484285e-05, "loss": 2.6095, "step": 1167500 }, { "epoch": 2.23, "learning_rate": 4.443854220650551e-05, "loss": 2.6296, "step": 1168000 }, { "epoch": 2.23, "learning_rate": 4.443616056458251e-05, "loss": 2.5926, "step": 1168500 }, { "epoch": 2.23, "learning_rate": 4.443377892265951e-05, "loss": 2.638, "step": 1169000 }, { "epoch": 2.23, "learning_rate": 4.443139728073652e-05, "loss": 2.5841, "step": 1169500 }, { "epoch": 2.23, "learning_rate": 4.4429015638813525e-05, "loss": 2.5828, "step": 1170000 }, { "epoch": 2.23, "eval_accuracy": 0.5272407976825108, "eval_loss": 2.5103251934051514, "eval_runtime": 4179.8983, "eval_samples_per_second": 65.788, "eval_steps_per_second": 6.579, "step": 1170000 }, { "epoch": 2.23, "learning_rate": 4.4426633996890534e-05, "loss": 2.6146, "step": 1170500 }, { "epoch": 2.23, "learning_rate": 4.442425235496754e-05, "loss": 2.6003, "step": 1171000 }, { "epoch": 2.23, "learning_rate": 4.442187071304454e-05, "loss": 2.5808, "step": 1171500 }, { "epoch": 2.23, "learning_rate": 4.441948907112154e-05, "loss": 2.6108, "step": 1172000 }, { "epoch": 2.23, "learning_rate": 4.441710742919855e-05, "loss": 2.5963, "step": 1172500 }, { "epoch": 2.23, "learning_rate": 4.4414725787275554e-05, "loss": 2.5986, "step": 1173000 }, { "epoch": 2.24, "learning_rate": 4.4412348908636406e-05, "loss": 2.616, "step": 1173500 }, { "epoch": 2.24, "learning_rate": 4.4409967266713416e-05, "loss": 2.5895, "step": 1174000 }, { "epoch": 2.24, "learning_rate": 4.440758562479042e-05, "loss": 2.6243, "step": 1174500 }, { "epoch": 2.24, "learning_rate": 4.4405208746151264e-05, "loss": 2.6182, "step": 1175000 }, { "epoch": 2.24, "learning_rate": 4.440282710422827e-05, "loss": 2.6017, "step": 1175500 }, { "epoch": 2.24, "learning_rate": 4.4400445462305276e-05, "loss": 2.6125, "step": 1176000 }, { "epoch": 2.24, "learning_rate": 4.4398063820382285e-05, "loss": 2.6062, "step": 1176500 }, { "epoch": 2.24, "learning_rate": 4.4395682178459295e-05, "loss": 2.591, "step": 1177000 }, { "epoch": 2.24, "learning_rate": 4.439330053653629e-05, "loss": 2.607, "step": 1177500 }, { "epoch": 2.24, "learning_rate": 4.43909188946133e-05, "loss": 2.6192, "step": 1178000 }, { "epoch": 2.25, "learning_rate": 4.43885372526903e-05, "loss": 2.5904, "step": 1178500 }, { "epoch": 2.25, "learning_rate": 4.438615561076731e-05, "loss": 2.6364, "step": 1179000 }, { "epoch": 2.25, "learning_rate": 4.4383773968844315e-05, "loss": 2.63, "step": 1179500 }, { "epoch": 2.25, "learning_rate": 4.438139709020517e-05, "loss": 2.59, "step": 1180000 }, { "epoch": 2.25, "learning_rate": 4.4379015448282176e-05, "loss": 2.6003, "step": 1180500 }, { "epoch": 2.25, "learning_rate": 4.437663380635917e-05, "loss": 2.6084, "step": 1181000 }, { "epoch": 2.25, "learning_rate": 4.437425216443618e-05, "loss": 2.6113, "step": 1181500 }, { "epoch": 2.25, "learning_rate": 4.4371870522513184e-05, "loss": 2.6145, "step": 1182000 }, { "epoch": 2.25, "learning_rate": 4.4369493643874036e-05, "loss": 2.6153, "step": 1182500 }, { "epoch": 2.25, "learning_rate": 4.436711676523489e-05, "loss": 2.6134, "step": 1183000 }, { "epoch": 2.25, "learning_rate": 4.43647351233119e-05, "loss": 2.5962, "step": 1183500 }, { "epoch": 2.26, "learning_rate": 4.43623534813889e-05, "loss": 2.6195, "step": 1184000 }, { "epoch": 2.26, "learning_rate": 4.43599718394659e-05, "loss": 2.6016, "step": 1184500 }, { "epoch": 2.26, "learning_rate": 4.4357590197542906e-05, "loss": 2.5939, "step": 1185000 }, { "epoch": 2.26, "learning_rate": 4.4355208555619915e-05, "loss": 2.6101, "step": 1185500 }, { "epoch": 2.26, "learning_rate": 4.435282691369692e-05, "loss": 2.5946, "step": 1186000 }, { "epoch": 2.26, "learning_rate": 4.435044527177393e-05, "loss": 2.6057, "step": 1186500 }, { "epoch": 2.26, "learning_rate": 4.434806839313478e-05, "loss": 2.6004, "step": 1187000 }, { "epoch": 2.26, "learning_rate": 4.434568675121178e-05, "loss": 2.6191, "step": 1187500 }, { "epoch": 2.26, "learning_rate": 4.4343305109288785e-05, "loss": 2.6209, "step": 1188000 }, { "epoch": 2.26, "learning_rate": 4.4340923467365794e-05, "loss": 2.5949, "step": 1188500 }, { "epoch": 2.27, "learning_rate": 4.433854658872664e-05, "loss": 2.6137, "step": 1189000 }, { "epoch": 2.27, "learning_rate": 4.433616494680365e-05, "loss": 2.5972, "step": 1189500 }, { "epoch": 2.27, "learning_rate": 4.433378330488065e-05, "loss": 2.5965, "step": 1190000 }, { "epoch": 2.27, "learning_rate": 4.433140166295766e-05, "loss": 2.6235, "step": 1190500 }, { "epoch": 2.27, "learning_rate": 4.4329020021034664e-05, "loss": 2.6063, "step": 1191000 }, { "epoch": 2.27, "learning_rate": 4.4326643142395516e-05, "loss": 2.6161, "step": 1191500 }, { "epoch": 2.27, "learning_rate": 4.432426150047252e-05, "loss": 2.6084, "step": 1192000 }, { "epoch": 2.27, "learning_rate": 4.432187985854953e-05, "loss": 2.6335, "step": 1192500 }, { "epoch": 2.27, "learning_rate": 4.431949821662653e-05, "loss": 2.6061, "step": 1193000 }, { "epoch": 2.27, "learning_rate": 4.431712133798738e-05, "loss": 2.6025, "step": 1193500 }, { "epoch": 2.27, "learning_rate": 4.4314739696064386e-05, "loss": 2.6166, "step": 1194000 }, { "epoch": 2.28, "learning_rate": 4.431235805414139e-05, "loss": 2.6014, "step": 1194500 }, { "epoch": 2.28, "learning_rate": 4.43099764122184e-05, "loss": 2.6158, "step": 1195000 }, { "epoch": 2.28, "learning_rate": 4.430759953357925e-05, "loss": 2.6206, "step": 1195500 }, { "epoch": 2.28, "learning_rate": 4.430521789165625e-05, "loss": 2.6157, "step": 1196000 }, { "epoch": 2.28, "learning_rate": 4.430283624973326e-05, "loss": 2.6031, "step": 1196500 }, { "epoch": 2.28, "learning_rate": 4.4300454607810265e-05, "loss": 2.6014, "step": 1197000 }, { "epoch": 2.28, "learning_rate": 4.429807772917112e-05, "loss": 2.5989, "step": 1197500 }, { "epoch": 2.28, "learning_rate": 4.429569608724812e-05, "loss": 2.6134, "step": 1198000 }, { "epoch": 2.28, "learning_rate": 4.429331444532512e-05, "loss": 2.614, "step": 1198500 }, { "epoch": 2.28, "learning_rate": 4.429093280340213e-05, "loss": 2.6087, "step": 1199000 }, { "epoch": 2.29, "learning_rate": 4.4288551161479134e-05, "loss": 2.6143, "step": 1199500 }, { "epoch": 2.29, "learning_rate": 4.4286174282839987e-05, "loss": 2.6008, "step": 1200000 }, { "epoch": 2.29, "eval_accuracy": 0.5275863241792078, "eval_loss": 2.5063064098358154, "eval_runtime": 4175.0811, "eval_samples_per_second": 65.864, "eval_steps_per_second": 6.586, "step": 1200000 }, { "epoch": 2.29, "learning_rate": 4.4283792640916996e-05, "loss": 2.585, "step": 1200500 }, { "epoch": 2.29, "learning_rate": 4.428141099899399e-05, "loss": 2.6052, "step": 1201000 }, { "epoch": 2.29, "learning_rate": 4.4279029357071e-05, "loss": 2.6142, "step": 1201500 }, { "epoch": 2.29, "learning_rate": 4.4276652478431854e-05, "loss": 2.5956, "step": 1202000 }, { "epoch": 2.29, "learning_rate": 4.4274270836508856e-05, "loss": 2.6134, "step": 1202500 }, { "epoch": 2.29, "learning_rate": 4.4271889194585866e-05, "loss": 2.6174, "step": 1203000 }, { "epoch": 2.29, "learning_rate": 4.426950755266287e-05, "loss": 2.6123, "step": 1203500 }, { "epoch": 2.29, "learning_rate": 4.426712591073988e-05, "loss": 2.5966, "step": 1204000 }, { "epoch": 2.29, "learning_rate": 4.426474903210072e-05, "loss": 2.5911, "step": 1204500 }, { "epoch": 2.3, "learning_rate": 4.4262367390177726e-05, "loss": 2.5932, "step": 1205000 }, { "epoch": 2.3, "learning_rate": 4.4259985748254735e-05, "loss": 2.6019, "step": 1205500 }, { "epoch": 2.3, "learning_rate": 4.425760410633174e-05, "loss": 2.5999, "step": 1206000 }, { "epoch": 2.3, "learning_rate": 4.425522722769259e-05, "loss": 2.6155, "step": 1206500 }, { "epoch": 2.3, "learning_rate": 4.42528455857696e-05, "loss": 2.5983, "step": 1207000 }, { "epoch": 2.3, "learning_rate": 4.42504639438466e-05, "loss": 2.5844, "step": 1207500 }, { "epoch": 2.3, "learning_rate": 4.4248087065207454e-05, "loss": 2.6124, "step": 1208000 }, { "epoch": 2.3, "learning_rate": 4.424570542328446e-05, "loss": 2.6104, "step": 1208500 }, { "epoch": 2.3, "learning_rate": 4.424332378136146e-05, "loss": 2.6007, "step": 1209000 }, { "epoch": 2.3, "learning_rate": 4.424094213943847e-05, "loss": 2.6057, "step": 1209500 }, { "epoch": 2.31, "learning_rate": 4.423856049751547e-05, "loss": 2.6111, "step": 1210000 }, { "epoch": 2.31, "learning_rate": 4.423617885559248e-05, "loss": 2.6257, "step": 1210500 }, { "epoch": 2.31, "learning_rate": 4.4233797213669484e-05, "loss": 2.5995, "step": 1211000 }, { "epoch": 2.31, "learning_rate": 4.423142033503033e-05, "loss": 2.6094, "step": 1211500 }, { "epoch": 2.31, "learning_rate": 4.422903869310734e-05, "loss": 2.5997, "step": 1212000 }, { "epoch": 2.31, "learning_rate": 4.422665705118435e-05, "loss": 2.5871, "step": 1212500 }, { "epoch": 2.31, "learning_rate": 4.422427540926135e-05, "loss": 2.6033, "step": 1213000 }, { "epoch": 2.31, "learning_rate": 4.42218985306222e-05, "loss": 2.6026, "step": 1213500 }, { "epoch": 2.31, "learning_rate": 4.4219516888699206e-05, "loss": 2.6044, "step": 1214000 }, { "epoch": 2.31, "learning_rate": 4.421713524677621e-05, "loss": 2.638, "step": 1214500 }, { "epoch": 2.31, "learning_rate": 4.421475360485322e-05, "loss": 2.5927, "step": 1215000 }, { "epoch": 2.32, "learning_rate": 4.421237196293022e-05, "loss": 2.6088, "step": 1215500 }, { "epoch": 2.32, "learning_rate": 4.420999032100723e-05, "loss": 2.6189, "step": 1216000 }, { "epoch": 2.32, "learning_rate": 4.420760867908423e-05, "loss": 2.6096, "step": 1216500 }, { "epoch": 2.32, "learning_rate": 4.420522703716124e-05, "loss": 2.6077, "step": 1217000 }, { "epoch": 2.32, "learning_rate": 4.420284539523824e-05, "loss": 2.5886, "step": 1217500 }, { "epoch": 2.32, "learning_rate": 4.420046375331525e-05, "loss": 2.6066, "step": 1218000 }, { "epoch": 2.32, "learning_rate": 4.419808211139225e-05, "loss": 2.6204, "step": 1218500 }, { "epoch": 2.32, "learning_rate": 4.419570046946926e-05, "loss": 2.6019, "step": 1219000 }, { "epoch": 2.32, "learning_rate": 4.4193328354113954e-05, "loss": 2.6127, "step": 1219500 }, { "epoch": 2.32, "learning_rate": 4.4190946712190963e-05, "loss": 2.5961, "step": 1220000 }, { "epoch": 2.33, "learning_rate": 4.4188565070267966e-05, "loss": 2.5946, "step": 1220500 }, { "epoch": 2.33, "learning_rate": 4.418618342834497e-05, "loss": 2.5942, "step": 1221000 }, { "epoch": 2.33, "learning_rate": 4.418380178642197e-05, "loss": 2.5935, "step": 1221500 }, { "epoch": 2.33, "learning_rate": 4.4181424907782824e-05, "loss": 2.6109, "step": 1222000 }, { "epoch": 2.33, "learning_rate": 4.417904326585983e-05, "loss": 2.5896, "step": 1222500 }, { "epoch": 2.33, "learning_rate": 4.4176661623936836e-05, "loss": 2.5899, "step": 1223000 }, { "epoch": 2.33, "learning_rate": 4.4174279982013845e-05, "loss": 2.6001, "step": 1223500 }, { "epoch": 2.33, "learning_rate": 4.417189834009085e-05, "loss": 2.608, "step": 1224000 }, { "epoch": 2.33, "learning_rate": 4.416951669816785e-05, "loss": 2.6244, "step": 1224500 }, { "epoch": 2.33, "learning_rate": 4.416713505624486e-05, "loss": 2.5955, "step": 1225000 }, { "epoch": 2.33, "learning_rate": 4.416475341432186e-05, "loss": 2.6027, "step": 1225500 }, { "epoch": 2.34, "learning_rate": 4.4162376535682715e-05, "loss": 2.608, "step": 1226000 }, { "epoch": 2.34, "learning_rate": 4.4159994893759724e-05, "loss": 2.6033, "step": 1226500 }, { "epoch": 2.34, "learning_rate": 4.415761325183673e-05, "loss": 2.5988, "step": 1227000 }, { "epoch": 2.34, "learning_rate": 4.415523160991373e-05, "loss": 2.613, "step": 1227500 }, { "epoch": 2.34, "learning_rate": 4.415284996799073e-05, "loss": 2.5973, "step": 1228000 }, { "epoch": 2.34, "learning_rate": 4.4150473089351584e-05, "loss": 2.6023, "step": 1228500 }, { "epoch": 2.34, "learning_rate": 4.4148091447428594e-05, "loss": 2.5853, "step": 1229000 }, { "epoch": 2.34, "learning_rate": 4.4145709805505596e-05, "loss": 2.5953, "step": 1229500 }, { "epoch": 2.34, "learning_rate": 4.4143328163582606e-05, "loss": 2.5954, "step": 1230000 }, { "epoch": 2.34, "eval_accuracy": 0.5278048176733022, "eval_loss": 2.504873514175415, "eval_runtime": 4192.8286, "eval_samples_per_second": 65.585, "eval_steps_per_second": 6.559, "step": 1230000 }, { "epoch": 2.34, "learning_rate": 4.414095128494346e-05, "loss": 2.601, "step": 1230500 }, { "epoch": 2.35, "learning_rate": 4.4138569643020454e-05, "loss": 2.6036, "step": 1231000 }, { "epoch": 2.35, "learning_rate": 4.4136192764381306e-05, "loss": 2.6116, "step": 1231500 }, { "epoch": 2.35, "learning_rate": 4.4133811122458315e-05, "loss": 2.5993, "step": 1232000 }, { "epoch": 2.35, "learning_rate": 4.413142948053532e-05, "loss": 2.5944, "step": 1232500 }, { "epoch": 2.35, "learning_rate": 4.412904783861233e-05, "loss": 2.5917, "step": 1233000 }, { "epoch": 2.35, "learning_rate": 4.412666619668933e-05, "loss": 2.5891, "step": 1233500 }, { "epoch": 2.35, "learning_rate": 4.412428455476633e-05, "loss": 2.5947, "step": 1234000 }, { "epoch": 2.35, "learning_rate": 4.4121902912843335e-05, "loss": 2.6034, "step": 1234500 }, { "epoch": 2.35, "learning_rate": 4.4119521270920345e-05, "loss": 2.5706, "step": 1235000 }, { "epoch": 2.35, "learning_rate": 4.411713962899735e-05, "loss": 2.5978, "step": 1235500 }, { "epoch": 2.35, "learning_rate": 4.411475798707436e-05, "loss": 2.6121, "step": 1236000 }, { "epoch": 2.36, "learning_rate": 4.411237634515136e-05, "loss": 2.6221, "step": 1236500 }, { "epoch": 2.36, "learning_rate": 4.410999946651221e-05, "loss": 2.6264, "step": 1237000 }, { "epoch": 2.36, "learning_rate": 4.4107617824589214e-05, "loss": 2.5802, "step": 1237500 }, { "epoch": 2.36, "learning_rate": 4.4105236182666224e-05, "loss": 2.6148, "step": 1238000 }, { "epoch": 2.36, "learning_rate": 4.4102854540743226e-05, "loss": 2.5911, "step": 1238500 }, { "epoch": 2.36, "learning_rate": 4.410047766210408e-05, "loss": 2.6185, "step": 1239000 }, { "epoch": 2.36, "learning_rate": 4.409809602018108e-05, "loss": 2.5894, "step": 1239500 }, { "epoch": 2.36, "learning_rate": 4.409571437825809e-05, "loss": 2.6181, "step": 1240000 }, { "epoch": 2.36, "learning_rate": 4.409333273633509e-05, "loss": 2.6081, "step": 1240500 }, { "epoch": 2.36, "learning_rate": 4.4090955857695946e-05, "loss": 2.6034, "step": 1241000 }, { "epoch": 2.37, "learning_rate": 4.408857421577295e-05, "loss": 2.5891, "step": 1241500 }, { "epoch": 2.37, "learning_rate": 4.408619257384996e-05, "loss": 2.6081, "step": 1242000 }, { "epoch": 2.37, "learning_rate": 4.40838156952108e-05, "loss": 2.5925, "step": 1242500 }, { "epoch": 2.37, "learning_rate": 4.408143405328781e-05, "loss": 2.6067, "step": 1243000 }, { "epoch": 2.37, "learning_rate": 4.4079052411364815e-05, "loss": 2.5912, "step": 1243500 }, { "epoch": 2.37, "learning_rate": 4.407667076944182e-05, "loss": 2.5816, "step": 1244000 }, { "epoch": 2.37, "learning_rate": 4.407428912751883e-05, "loss": 2.6213, "step": 1244500 }, { "epoch": 2.37, "learning_rate": 4.407190748559583e-05, "loss": 2.6074, "step": 1245000 }, { "epoch": 2.37, "learning_rate": 4.406953060695668e-05, "loss": 2.5881, "step": 1245500 }, { "epoch": 2.37, "learning_rate": 4.406714896503369e-05, "loss": 2.6149, "step": 1246000 }, { "epoch": 2.37, "learning_rate": 4.4064767323110694e-05, "loss": 2.5993, "step": 1246500 }, { "epoch": 2.38, "learning_rate": 4.40623856811877e-05, "loss": 2.6037, "step": 1247000 }, { "epoch": 2.38, "learning_rate": 4.40600040392647e-05, "loss": 2.5816, "step": 1247500 }, { "epoch": 2.38, "learning_rate": 4.405762239734171e-05, "loss": 2.6152, "step": 1248000 }, { "epoch": 2.38, "learning_rate": 4.405524075541871e-05, "loss": 2.5841, "step": 1248500 }, { "epoch": 2.38, "learning_rate": 4.405285911349572e-05, "loss": 2.5859, "step": 1249000 }, { "epoch": 2.38, "learning_rate": 4.405048223485657e-05, "loss": 2.5955, "step": 1249500 }, { "epoch": 2.38, "learning_rate": 4.4048100592933576e-05, "loss": 2.607, "step": 1250000 }, { "epoch": 2.38, "learning_rate": 4.404572371429443e-05, "loss": 2.5988, "step": 1250500 }, { "epoch": 2.38, "learning_rate": 4.404334207237143e-05, "loss": 2.591, "step": 1251000 }, { "epoch": 2.38, "learning_rate": 4.404096043044843e-05, "loss": 2.5945, "step": 1251500 }, { "epoch": 2.39, "learning_rate": 4.403857878852544e-05, "loss": 2.5961, "step": 1252000 }, { "epoch": 2.39, "learning_rate": 4.4036201909886295e-05, "loss": 2.6083, "step": 1252500 }, { "epoch": 2.39, "learning_rate": 4.40338202679633e-05, "loss": 2.6279, "step": 1253000 }, { "epoch": 2.39, "learning_rate": 4.403143862604031e-05, "loss": 2.5977, "step": 1253500 }, { "epoch": 2.39, "learning_rate": 4.40290569841173e-05, "loss": 2.5945, "step": 1254000 }, { "epoch": 2.39, "learning_rate": 4.402667534219431e-05, "loss": 2.6049, "step": 1254500 }, { "epoch": 2.39, "learning_rate": 4.402429370027132e-05, "loss": 2.5935, "step": 1255000 }, { "epoch": 2.39, "learning_rate": 4.4021912058348324e-05, "loss": 2.6058, "step": 1255500 }, { "epoch": 2.39, "learning_rate": 4.4019530416425334e-05, "loss": 2.6077, "step": 1256000 }, { "epoch": 2.39, "learning_rate": 4.4017148774502336e-05, "loss": 2.6079, "step": 1256500 }, { "epoch": 2.39, "learning_rate": 4.401476713257934e-05, "loss": 2.6017, "step": 1257000 }, { "epoch": 2.4, "learning_rate": 4.401238549065634e-05, "loss": 2.5894, "step": 1257500 }, { "epoch": 2.4, "learning_rate": 4.401000384873335e-05, "loss": 2.5813, "step": 1258000 }, { "epoch": 2.4, "learning_rate": 4.40076269700942e-05, "loss": 2.6029, "step": 1258500 }, { "epoch": 2.4, "learning_rate": 4.4005245328171206e-05, "loss": 2.6059, "step": 1259000 }, { "epoch": 2.4, "learning_rate": 4.4002863686248215e-05, "loss": 2.5977, "step": 1259500 }, { "epoch": 2.4, "learning_rate": 4.400048204432521e-05, "loss": 2.5897, "step": 1260000 }, { "epoch": 2.4, "eval_accuracy": 0.527959133480219, "eval_loss": 2.5019371509552, "eval_runtime": 4181.9516, "eval_samples_per_second": 65.756, "eval_steps_per_second": 6.576, "step": 1260000 }, { "epoch": 2.4, "learning_rate": 4.3998105165686063e-05, "loss": 2.5874, "step": 1260500 }, { "epoch": 2.4, "learning_rate": 4.399572352376307e-05, "loss": 2.6076, "step": 1261000 }, { "epoch": 2.4, "learning_rate": 4.3993341881840075e-05, "loss": 2.5956, "step": 1261500 }, { "epoch": 2.4, "learning_rate": 4.3990960239917085e-05, "loss": 2.6191, "step": 1262000 }, { "epoch": 2.41, "learning_rate": 4.398857859799409e-05, "loss": 2.6047, "step": 1262500 }, { "epoch": 2.41, "learning_rate": 4.398620171935494e-05, "loss": 2.6141, "step": 1263000 }, { "epoch": 2.41, "learning_rate": 4.398382007743194e-05, "loss": 2.6201, "step": 1263500 }, { "epoch": 2.41, "learning_rate": 4.3981438435508945e-05, "loss": 2.5923, "step": 1264000 }, { "epoch": 2.41, "learning_rate": 4.3979056793585954e-05, "loss": 2.6097, "step": 1264500 }, { "epoch": 2.41, "learning_rate": 4.397667991494681e-05, "loss": 2.5981, "step": 1265000 }, { "epoch": 2.41, "learning_rate": 4.397429827302381e-05, "loss": 2.6088, "step": 1265500 }, { "epoch": 2.41, "learning_rate": 4.397191663110082e-05, "loss": 2.6084, "step": 1266000 }, { "epoch": 2.41, "learning_rate": 4.396953498917782e-05, "loss": 2.6261, "step": 1266500 }, { "epoch": 2.41, "learning_rate": 4.3967153347254824e-05, "loss": 2.6127, "step": 1267000 }, { "epoch": 2.41, "learning_rate": 4.3964776468615676e-05, "loss": 2.5972, "step": 1267500 }, { "epoch": 2.42, "learning_rate": 4.396239482669268e-05, "loss": 2.6202, "step": 1268000 }, { "epoch": 2.42, "learning_rate": 4.396001318476969e-05, "loss": 2.5938, "step": 1268500 }, { "epoch": 2.42, "learning_rate": 4.39576315428467e-05, "loss": 2.5927, "step": 1269000 }, { "epoch": 2.42, "learning_rate": 4.39552499009237e-05, "loss": 2.6189, "step": 1269500 }, { "epoch": 2.42, "learning_rate": 4.395287302228455e-05, "loss": 2.6109, "step": 1270000 }, { "epoch": 2.42, "learning_rate": 4.3950491380361555e-05, "loss": 2.5877, "step": 1270500 }, { "epoch": 2.42, "learning_rate": 4.39481145017224e-05, "loss": 2.5975, "step": 1271000 }, { "epoch": 2.42, "learning_rate": 4.394573285979941e-05, "loss": 2.6069, "step": 1271500 }, { "epoch": 2.42, "learning_rate": 4.394335121787642e-05, "loss": 2.5736, "step": 1272000 }, { "epoch": 2.42, "learning_rate": 4.3940974339237265e-05, "loss": 2.6007, "step": 1272500 }, { "epoch": 2.43, "learning_rate": 4.3938592697314274e-05, "loss": 2.5931, "step": 1273000 }, { "epoch": 2.43, "learning_rate": 4.393621105539128e-05, "loss": 2.6144, "step": 1273500 }, { "epoch": 2.43, "learning_rate": 4.393382941346828e-05, "loss": 2.6177, "step": 1274000 }, { "epoch": 2.43, "learning_rate": 4.393144777154529e-05, "loss": 2.6001, "step": 1274500 }, { "epoch": 2.43, "learning_rate": 4.392906612962229e-05, "loss": 2.6023, "step": 1275000 }, { "epoch": 2.43, "learning_rate": 4.39266844876993e-05, "loss": 2.5801, "step": 1275500 }, { "epoch": 2.43, "learning_rate": 4.3924302845776304e-05, "loss": 2.601, "step": 1276000 }, { "epoch": 2.43, "learning_rate": 4.3921921203853306e-05, "loss": 2.606, "step": 1276500 }, { "epoch": 2.43, "learning_rate": 4.391953956193031e-05, "loss": 2.5866, "step": 1277000 }, { "epoch": 2.43, "learning_rate": 4.391715792000732e-05, "loss": 2.5943, "step": 1277500 }, { "epoch": 2.43, "learning_rate": 4.391477627808432e-05, "loss": 2.5934, "step": 1278000 }, { "epoch": 2.44, "learning_rate": 4.391239939944517e-05, "loss": 2.61, "step": 1278500 }, { "epoch": 2.44, "learning_rate": 4.391001775752218e-05, "loss": 2.6021, "step": 1279000 }, { "epoch": 2.44, "learning_rate": 4.3907636115599185e-05, "loss": 2.6068, "step": 1279500 }, { "epoch": 2.44, "learning_rate": 4.390525447367619e-05, "loss": 2.608, "step": 1280000 }, { "epoch": 2.44, "learning_rate": 4.39028728317532e-05, "loss": 2.5948, "step": 1280500 }, { "epoch": 2.44, "learning_rate": 4.39004911898302e-05, "loss": 2.5896, "step": 1281000 }, { "epoch": 2.44, "learning_rate": 4.389811431119105e-05, "loss": 2.5978, "step": 1281500 }, { "epoch": 2.44, "learning_rate": 4.3895732669268055e-05, "loss": 2.5916, "step": 1282000 }, { "epoch": 2.44, "learning_rate": 4.3893351027345064e-05, "loss": 2.5711, "step": 1282500 }, { "epoch": 2.44, "learning_rate": 4.389096938542207e-05, "loss": 2.5957, "step": 1283000 }, { "epoch": 2.45, "learning_rate": 4.388858774349907e-05, "loss": 2.6053, "step": 1283500 }, { "epoch": 2.45, "learning_rate": 4.388621086485992e-05, "loss": 2.5941, "step": 1284000 }, { "epoch": 2.45, "learning_rate": 4.388382922293693e-05, "loss": 2.5934, "step": 1284500 }, { "epoch": 2.45, "learning_rate": 4.3881447581013934e-05, "loss": 2.5846, "step": 1285000 }, { "epoch": 2.45, "learning_rate": 4.387906593909094e-05, "loss": 2.5871, "step": 1285500 }, { "epoch": 2.45, "learning_rate": 4.387668906045179e-05, "loss": 2.6027, "step": 1286000 }, { "epoch": 2.45, "learning_rate": 4.387430741852879e-05, "loss": 2.6252, "step": 1286500 }, { "epoch": 2.45, "learning_rate": 4.38719257766058e-05, "loss": 2.5987, "step": 1287000 }, { "epoch": 2.45, "learning_rate": 4.386954889796665e-05, "loss": 2.588, "step": 1287500 }, { "epoch": 2.45, "learning_rate": 4.3867167256043656e-05, "loss": 2.6128, "step": 1288000 }, { "epoch": 2.45, "learning_rate": 4.3864785614120665e-05, "loss": 2.5697, "step": 1288500 }, { "epoch": 2.46, "learning_rate": 4.386240397219767e-05, "loss": 2.5981, "step": 1289000 }, { "epoch": 2.46, "learning_rate": 4.386002233027467e-05, "loss": 2.5974, "step": 1289500 }, { "epoch": 2.46, "learning_rate": 4.385764068835167e-05, "loss": 2.5939, "step": 1290000 }, { "epoch": 2.46, "eval_accuracy": 0.5285150379475904, "eval_loss": 2.499438762664795, "eval_runtime": 4202.935, "eval_samples_per_second": 65.428, "eval_steps_per_second": 6.543, "step": 1290000 }, { "epoch": 2.46, "learning_rate": 4.385525904642868e-05, "loss": 2.5951, "step": 1290500 }, { "epoch": 2.46, "learning_rate": 4.3852877404505685e-05, "loss": 2.5867, "step": 1291000 }, { "epoch": 2.46, "learning_rate": 4.385050052586654e-05, "loss": 2.6069, "step": 1291500 }, { "epoch": 2.46, "learning_rate": 4.384811888394355e-05, "loss": 2.5826, "step": 1292000 }, { "epoch": 2.46, "learning_rate": 4.384573724202055e-05, "loss": 2.6035, "step": 1292500 }, { "epoch": 2.46, "learning_rate": 4.384335560009755e-05, "loss": 2.616, "step": 1293000 }, { "epoch": 2.46, "learning_rate": 4.384097395817456e-05, "loss": 2.5941, "step": 1293500 }, { "epoch": 2.47, "learning_rate": 4.3838592316251564e-05, "loss": 2.5873, "step": 1294000 }, { "epoch": 2.47, "learning_rate": 4.3836210674328573e-05, "loss": 2.588, "step": 1294500 }, { "epoch": 2.47, "learning_rate": 4.383383379568942e-05, "loss": 2.5972, "step": 1295000 }, { "epoch": 2.47, "learning_rate": 4.383145215376643e-05, "loss": 2.5989, "step": 1295500 }, { "epoch": 2.47, "learning_rate": 4.382907051184343e-05, "loss": 2.5881, "step": 1296000 }, { "epoch": 2.47, "learning_rate": 4.3826688869920434e-05, "loss": 2.5871, "step": 1296500 }, { "epoch": 2.47, "learning_rate": 4.3824311991281286e-05, "loss": 2.5889, "step": 1297000 }, { "epoch": 2.47, "learning_rate": 4.382193511264214e-05, "loss": 2.5865, "step": 1297500 }, { "epoch": 2.47, "learning_rate": 4.381955347071914e-05, "loss": 2.5993, "step": 1298000 }, { "epoch": 2.47, "learning_rate": 4.381717182879615e-05, "loss": 2.5939, "step": 1298500 }, { "epoch": 2.48, "learning_rate": 4.381479018687315e-05, "loss": 2.5716, "step": 1299000 }, { "epoch": 2.48, "learning_rate": 4.3812408544950155e-05, "loss": 2.6038, "step": 1299500 }, { "epoch": 2.48, "learning_rate": 4.3810026903027165e-05, "loss": 2.5975, "step": 1300000 }, { "epoch": 2.48, "learning_rate": 4.380765002438802e-05, "loss": 2.6075, "step": 1300500 }, { "epoch": 2.48, "learning_rate": 4.380526838246502e-05, "loss": 2.5933, "step": 1301000 }, { "epoch": 2.48, "learning_rate": 4.380288674054203e-05, "loss": 2.5961, "step": 1301500 }, { "epoch": 2.48, "learning_rate": 4.380050509861903e-05, "loss": 2.5888, "step": 1302000 }, { "epoch": 2.48, "learning_rate": 4.379812345669604e-05, "loss": 2.6132, "step": 1302500 }, { "epoch": 2.48, "learning_rate": 4.379574181477304e-05, "loss": 2.5982, "step": 1303000 }, { "epoch": 2.48, "learning_rate": 4.3793360172850046e-05, "loss": 2.5794, "step": 1303500 }, { "epoch": 2.48, "learning_rate": 4.379097853092705e-05, "loss": 2.6052, "step": 1304000 }, { "epoch": 2.49, "learning_rate": 4.378859688900406e-05, "loss": 2.6105, "step": 1304500 }, { "epoch": 2.49, "learning_rate": 4.378622001036491e-05, "loss": 2.5992, "step": 1305000 }, { "epoch": 2.49, "learning_rate": 4.3783838368441913e-05, "loss": 2.6161, "step": 1305500 }, { "epoch": 2.49, "learning_rate": 4.3781456726518916e-05, "loss": 2.5948, "step": 1306000 }, { "epoch": 2.49, "learning_rate": 4.377907508459592e-05, "loss": 2.5915, "step": 1306500 }, { "epoch": 2.49, "learning_rate": 4.377669344267293e-05, "loss": 2.6084, "step": 1307000 }, { "epoch": 2.49, "learning_rate": 4.377431656403378e-05, "loss": 2.5941, "step": 1307500 }, { "epoch": 2.49, "learning_rate": 4.377193492211078e-05, "loss": 2.5907, "step": 1308000 }, { "epoch": 2.49, "learning_rate": 4.3769558043471635e-05, "loss": 2.5963, "step": 1308500 }, { "epoch": 2.49, "learning_rate": 4.3767176401548645e-05, "loss": 2.5853, "step": 1309000 }, { "epoch": 2.5, "learning_rate": 4.376479475962564e-05, "loss": 2.5988, "step": 1309500 }, { "epoch": 2.5, "learning_rate": 4.376241311770265e-05, "loss": 2.5922, "step": 1310000 }, { "epoch": 2.5, "learning_rate": 4.37600362390635e-05, "loss": 2.5951, "step": 1310500 }, { "epoch": 2.5, "learning_rate": 4.3757654597140505e-05, "loss": 2.5788, "step": 1311000 }, { "epoch": 2.5, "learning_rate": 4.3755272955217514e-05, "loss": 2.6015, "step": 1311500 }, { "epoch": 2.5, "learning_rate": 4.375289131329452e-05, "loss": 2.5781, "step": 1312000 }, { "epoch": 2.5, "learning_rate": 4.3750509671371526e-05, "loss": 2.6077, "step": 1312500 }, { "epoch": 2.5, "learning_rate": 4.374812802944853e-05, "loss": 2.6002, "step": 1313000 }, { "epoch": 2.5, "learning_rate": 4.374574638752553e-05, "loss": 2.5846, "step": 1313500 }, { "epoch": 2.5, "learning_rate": 4.374336474560254e-05, "loss": 2.6083, "step": 1314000 }, { "epoch": 2.5, "learning_rate": 4.374098786696339e-05, "loss": 2.587, "step": 1314500 }, { "epoch": 2.51, "learning_rate": 4.3738606225040396e-05, "loss": 2.5914, "step": 1315000 }, { "epoch": 2.51, "learning_rate": 4.3736224583117405e-05, "loss": 2.5991, "step": 1315500 }, { "epoch": 2.51, "learning_rate": 4.37338429411944e-05, "loss": 2.6034, "step": 1316000 }, { "epoch": 2.51, "learning_rate": 4.373146129927141e-05, "loss": 2.585, "step": 1316500 }, { "epoch": 2.51, "learning_rate": 4.372907965734841e-05, "loss": 2.6069, "step": 1317000 }, { "epoch": 2.51, "learning_rate": 4.372669801542542e-05, "loss": 2.5942, "step": 1317500 }, { "epoch": 2.51, "learning_rate": 4.3724316373502425e-05, "loss": 2.5981, "step": 1318000 }, { "epoch": 2.51, "learning_rate": 4.372193949486328e-05, "loss": 2.5976, "step": 1318500 }, { "epoch": 2.51, "learning_rate": 4.371955785294028e-05, "loss": 2.576, "step": 1319000 }, { "epoch": 2.51, "learning_rate": 4.371717621101728e-05, "loss": 2.5797, "step": 1319500 }, { "epoch": 2.52, "learning_rate": 4.371479456909429e-05, "loss": 2.6115, "step": 1320000 }, { "epoch": 2.52, "eval_accuracy": 0.5288668101624866, "eval_loss": 2.4972872734069824, "eval_runtime": 4197.1442, "eval_samples_per_second": 65.518, "eval_steps_per_second": 6.552, "step": 1320000 }, { "epoch": 2.52, "learning_rate": 4.3712417690455144e-05, "loss": 2.6138, "step": 1320500 }, { "epoch": 2.52, "learning_rate": 4.371003604853215e-05, "loss": 2.6158, "step": 1321000 }, { "epoch": 2.52, "learning_rate": 4.3707654406609156e-05, "loss": 2.6034, "step": 1321500 }, { "epoch": 2.52, "learning_rate": 4.370527276468616e-05, "loss": 2.5866, "step": 1322000 }, { "epoch": 2.52, "learning_rate": 4.3702895886047005e-05, "loss": 2.5865, "step": 1322500 }, { "epoch": 2.52, "learning_rate": 4.3700514244124014e-05, "loss": 2.6015, "step": 1323000 }, { "epoch": 2.52, "learning_rate": 4.3698132602201017e-05, "loss": 2.5811, "step": 1323500 }, { "epoch": 2.52, "learning_rate": 4.3695750960278026e-05, "loss": 2.5876, "step": 1324000 }, { "epoch": 2.52, "learning_rate": 4.369337408163888e-05, "loss": 2.5929, "step": 1324500 }, { "epoch": 2.52, "learning_rate": 4.369099720299973e-05, "loss": 2.5764, "step": 1325000 }, { "epoch": 2.53, "learning_rate": 4.368861556107673e-05, "loss": 2.5895, "step": 1325500 }, { "epoch": 2.53, "learning_rate": 4.368623391915374e-05, "loss": 2.5891, "step": 1326000 }, { "epoch": 2.53, "learning_rate": 4.368385227723074e-05, "loss": 2.6106, "step": 1326500 }, { "epoch": 2.53, "learning_rate": 4.368147063530775e-05, "loss": 2.5979, "step": 1327000 }, { "epoch": 2.53, "learning_rate": 4.367908899338475e-05, "loss": 2.5737, "step": 1327500 }, { "epoch": 2.53, "learning_rate": 4.367670735146176e-05, "loss": 2.5928, "step": 1328000 }, { "epoch": 2.53, "learning_rate": 4.367432570953877e-05, "loss": 2.5985, "step": 1328500 }, { "epoch": 2.53, "learning_rate": 4.3671944067615765e-05, "loss": 2.5958, "step": 1329000 }, { "epoch": 2.53, "learning_rate": 4.366956718897662e-05, "loss": 2.5856, "step": 1329500 }, { "epoch": 2.53, "learning_rate": 4.366718554705363e-05, "loss": 2.6069, "step": 1330000 }, { "epoch": 2.54, "learning_rate": 4.366480390513063e-05, "loss": 2.5895, "step": 1330500 }, { "epoch": 2.54, "learning_rate": 4.366242226320764e-05, "loss": 2.5961, "step": 1331000 }, { "epoch": 2.54, "learning_rate": 4.3660045384568484e-05, "loss": 2.5993, "step": 1331500 }, { "epoch": 2.54, "learning_rate": 4.3657663742645494e-05, "loss": 2.599, "step": 1332000 }, { "epoch": 2.54, "learning_rate": 4.3655282100722496e-05, "loss": 2.5972, "step": 1332500 }, { "epoch": 2.54, "learning_rate": 4.36529004587995e-05, "loss": 2.5868, "step": 1333000 }, { "epoch": 2.54, "learning_rate": 4.365051881687651e-05, "loss": 2.5896, "step": 1333500 }, { "epoch": 2.54, "learning_rate": 4.364813717495351e-05, "loss": 2.5909, "step": 1334000 }, { "epoch": 2.54, "learning_rate": 4.364576029631436e-05, "loss": 2.602, "step": 1334500 }, { "epoch": 2.54, "learning_rate": 4.364337865439137e-05, "loss": 2.5964, "step": 1335000 }, { "epoch": 2.54, "learning_rate": 4.3640997012468375e-05, "loss": 2.5871, "step": 1335500 }, { "epoch": 2.55, "learning_rate": 4.363861537054538e-05, "loss": 2.5948, "step": 1336000 }, { "epoch": 2.55, "learning_rate": 4.363623849190623e-05, "loss": 2.5899, "step": 1336500 }, { "epoch": 2.55, "learning_rate": 4.363385684998323e-05, "loss": 2.5944, "step": 1337000 }, { "epoch": 2.55, "learning_rate": 4.363147520806024e-05, "loss": 2.6039, "step": 1337500 }, { "epoch": 2.55, "learning_rate": 4.3629093566137245e-05, "loss": 2.599, "step": 1338000 }, { "epoch": 2.55, "learning_rate": 4.3626711924214254e-05, "loss": 2.6073, "step": 1338500 }, { "epoch": 2.55, "learning_rate": 4.3624335045575107e-05, "loss": 2.5828, "step": 1339000 }, { "epoch": 2.55, "learning_rate": 4.36219534036521e-05, "loss": 2.5957, "step": 1339500 }, { "epoch": 2.55, "learning_rate": 4.3619576525012955e-05, "loss": 2.5893, "step": 1340000 }, { "epoch": 2.55, "learning_rate": 4.3617194883089964e-05, "loss": 2.5783, "step": 1340500 }, { "epoch": 2.56, "learning_rate": 4.361481324116697e-05, "loss": 2.5926, "step": 1341000 }, { "epoch": 2.56, "learning_rate": 4.3612431599243976e-05, "loss": 2.5988, "step": 1341500 }, { "epoch": 2.56, "learning_rate": 4.361004995732098e-05, "loss": 2.6109, "step": 1342000 }, { "epoch": 2.56, "learning_rate": 4.360766831539798e-05, "loss": 2.5939, "step": 1342500 }, { "epoch": 2.56, "learning_rate": 4.360528667347499e-05, "loss": 2.5943, "step": 1343000 }, { "epoch": 2.56, "learning_rate": 4.3602905031551993e-05, "loss": 2.5954, "step": 1343500 }, { "epoch": 2.56, "learning_rate": 4.3600523389629e-05, "loss": 2.5869, "step": 1344000 }, { "epoch": 2.56, "learning_rate": 4.359814651098985e-05, "loss": 2.6093, "step": 1344500 }, { "epoch": 2.56, "learning_rate": 4.359576486906686e-05, "loss": 2.5845, "step": 1345000 }, { "epoch": 2.56, "learning_rate": 4.359338799042771e-05, "loss": 2.596, "step": 1345500 }, { "epoch": 2.56, "learning_rate": 4.359100634850471e-05, "loss": 2.5866, "step": 1346000 }, { "epoch": 2.57, "learning_rate": 4.3588624706581715e-05, "loss": 2.6111, "step": 1346500 }, { "epoch": 2.57, "learning_rate": 4.3586243064658725e-05, "loss": 2.5892, "step": 1347000 }, { "epoch": 2.57, "learning_rate": 4.358386142273573e-05, "loss": 2.6114, "step": 1347500 }, { "epoch": 2.57, "learning_rate": 4.358147978081274e-05, "loss": 2.5814, "step": 1348000 }, { "epoch": 2.57, "learning_rate": 4.357909813888974e-05, "loss": 2.5842, "step": 1348500 }, { "epoch": 2.57, "learning_rate": 4.357671649696674e-05, "loss": 2.609, "step": 1349000 }, { "epoch": 2.57, "learning_rate": 4.3574334855043745e-05, "loss": 2.5922, "step": 1349500 }, { "epoch": 2.57, "learning_rate": 4.3571962739688447e-05, "loss": 2.5972, "step": 1350000 }, { "epoch": 2.57, "eval_accuracy": 0.5288839121262072, "eval_loss": 2.495377540588379, "eval_runtime": 4194.2815, "eval_samples_per_second": 65.563, "eval_steps_per_second": 6.556, "step": 1350000 }, { "epoch": 2.57, "learning_rate": 4.356958109776545e-05, "loss": 2.6004, "step": 1350500 }, { "epoch": 2.57, "learning_rate": 4.356719945584246e-05, "loss": 2.603, "step": 1351000 }, { "epoch": 2.58, "learning_rate": 4.356481781391946e-05, "loss": 2.5794, "step": 1351500 }, { "epoch": 2.58, "learning_rate": 4.356243617199647e-05, "loss": 2.6013, "step": 1352000 }, { "epoch": 2.58, "learning_rate": 4.3560054530073466e-05, "loss": 2.6137, "step": 1352500 }, { "epoch": 2.58, "learning_rate": 4.3557672888150476e-05, "loss": 2.6018, "step": 1353000 }, { "epoch": 2.58, "learning_rate": 4.355529124622748e-05, "loss": 2.6076, "step": 1353500 }, { "epoch": 2.58, "learning_rate": 4.355290960430449e-05, "loss": 2.5947, "step": 1354000 }, { "epoch": 2.58, "learning_rate": 4.355052796238149e-05, "loss": 2.6038, "step": 1354500 }, { "epoch": 2.58, "learning_rate": 4.354815108374234e-05, "loss": 2.6021, "step": 1355000 }, { "epoch": 2.58, "learning_rate": 4.3545769441819345e-05, "loss": 2.6005, "step": 1355500 }, { "epoch": 2.58, "learning_rate": 4.354338779989635e-05, "loss": 2.5876, "step": 1356000 }, { "epoch": 2.58, "learning_rate": 4.35410109212572e-05, "loss": 2.5928, "step": 1356500 }, { "epoch": 2.59, "learning_rate": 4.353862927933421e-05, "loss": 2.5843, "step": 1357000 }, { "epoch": 2.59, "learning_rate": 4.353624763741121e-05, "loss": 2.6077, "step": 1357500 }, { "epoch": 2.59, "learning_rate": 4.353386599548822e-05, "loss": 2.5951, "step": 1358000 }, { "epoch": 2.59, "learning_rate": 4.3531484353565224e-05, "loss": 2.5996, "step": 1358500 }, { "epoch": 2.59, "learning_rate": 4.352910271164223e-05, "loss": 2.5936, "step": 1359000 }, { "epoch": 2.59, "learning_rate": 4.3526721069719236e-05, "loss": 2.5895, "step": 1359500 }, { "epoch": 2.59, "learning_rate": 4.352433942779624e-05, "loss": 2.5978, "step": 1360000 }, { "epoch": 2.59, "learning_rate": 4.352195778587325e-05, "loss": 2.6033, "step": 1360500 }, { "epoch": 2.59, "learning_rate": 4.351957614395025e-05, "loss": 2.5882, "step": 1361000 }, { "epoch": 2.59, "learning_rate": 4.3517199265311103e-05, "loss": 2.5827, "step": 1361500 }, { "epoch": 2.6, "learning_rate": 4.3514817623388106e-05, "loss": 2.6041, "step": 1362000 }, { "epoch": 2.6, "learning_rate": 4.351243598146511e-05, "loss": 2.594, "step": 1362500 }, { "epoch": 2.6, "learning_rate": 4.351005433954212e-05, "loss": 2.6126, "step": 1363000 }, { "epoch": 2.6, "learning_rate": 4.350767269761912e-05, "loss": 2.5821, "step": 1363500 }, { "epoch": 2.6, "learning_rate": 4.350529581897997e-05, "loss": 2.578, "step": 1364000 }, { "epoch": 2.6, "learning_rate": 4.350291417705698e-05, "loss": 2.5917, "step": 1364500 }, { "epoch": 2.6, "learning_rate": 4.350053253513398e-05, "loss": 2.5918, "step": 1365000 }, { "epoch": 2.6, "learning_rate": 4.349815565649483e-05, "loss": 2.5748, "step": 1365500 }, { "epoch": 2.6, "learning_rate": 4.349577401457184e-05, "loss": 2.5746, "step": 1366000 }, { "epoch": 2.6, "learning_rate": 4.349339237264884e-05, "loss": 2.5943, "step": 1366500 }, { "epoch": 2.6, "learning_rate": 4.349101073072585e-05, "loss": 2.587, "step": 1367000 }, { "epoch": 2.61, "learning_rate": 4.3488629088802855e-05, "loss": 2.6032, "step": 1367500 }, { "epoch": 2.61, "learning_rate": 4.3486247446879864e-05, "loss": 2.5978, "step": 1368000 }, { "epoch": 2.61, "learning_rate": 4.3483865804956867e-05, "loss": 2.5566, "step": 1368500 }, { "epoch": 2.61, "learning_rate": 4.348148416303387e-05, "loss": 2.5974, "step": 1369000 }, { "epoch": 2.61, "learning_rate": 4.347910252111088e-05, "loss": 2.5803, "step": 1369500 }, { "epoch": 2.61, "learning_rate": 4.347672087918788e-05, "loss": 2.5974, "step": 1370000 }, { "epoch": 2.61, "learning_rate": 4.3474344000548734e-05, "loss": 2.5847, "step": 1370500 }, { "epoch": 2.61, "learning_rate": 4.347196235862574e-05, "loss": 2.5976, "step": 1371000 }, { "epoch": 2.61, "learning_rate": 4.346958071670274e-05, "loss": 2.5968, "step": 1371500 }, { "epoch": 2.61, "learning_rate": 4.346719907477975e-05, "loss": 2.5704, "step": 1372000 }, { "epoch": 2.62, "learning_rate": 4.346481743285675e-05, "loss": 2.5723, "step": 1372500 }, { "epoch": 2.62, "learning_rate": 4.34624405542176e-05, "loss": 2.5982, "step": 1373000 }, { "epoch": 2.62, "learning_rate": 4.346005891229461e-05, "loss": 2.6013, "step": 1373500 }, { "epoch": 2.62, "learning_rate": 4.3457677270371615e-05, "loss": 2.5965, "step": 1374000 }, { "epoch": 2.62, "learning_rate": 4.3455295628448625e-05, "loss": 2.5905, "step": 1374500 }, { "epoch": 2.62, "learning_rate": 4.345291874980947e-05, "loss": 2.5997, "step": 1375000 }, { "epoch": 2.62, "learning_rate": 4.345053710788647e-05, "loss": 2.6085, "step": 1375500 }, { "epoch": 2.62, "learning_rate": 4.3448160229247325e-05, "loss": 2.6049, "step": 1376000 }, { "epoch": 2.62, "learning_rate": 4.3445778587324334e-05, "loss": 2.5974, "step": 1376500 }, { "epoch": 2.62, "learning_rate": 4.344339694540134e-05, "loss": 2.5837, "step": 1377000 }, { "epoch": 2.62, "learning_rate": 4.3441015303478346e-05, "loss": 2.6066, "step": 1377500 }, { "epoch": 2.63, "learning_rate": 4.343863366155535e-05, "loss": 2.6115, "step": 1378000 }, { "epoch": 2.63, "learning_rate": 4.343625201963235e-05, "loss": 2.6032, "step": 1378500 }, { "epoch": 2.63, "learning_rate": 4.3433870377709354e-05, "loss": 2.5853, "step": 1379000 }, { "epoch": 2.63, "learning_rate": 4.3431488735786364e-05, "loss": 2.5898, "step": 1379500 }, { "epoch": 2.63, "learning_rate": 4.3429111857147216e-05, "loss": 2.5787, "step": 1380000 }, { "epoch": 2.63, "eval_accuracy": 0.5295491291037372, "eval_loss": 2.4926772117614746, "eval_runtime": 4198.0038, "eval_samples_per_second": 65.504, "eval_steps_per_second": 6.55, "step": 1380000 }, { "epoch": 2.63, "learning_rate": 4.342673021522422e-05, "loss": 2.5934, "step": 1380500 }, { "epoch": 2.63, "learning_rate": 4.342434857330123e-05, "loss": 2.5866, "step": 1381000 }, { "epoch": 2.63, "learning_rate": 4.342196693137823e-05, "loss": 2.5857, "step": 1381500 }, { "epoch": 2.63, "learning_rate": 4.341958528945523e-05, "loss": 2.6079, "step": 1382000 }, { "epoch": 2.63, "learning_rate": 4.3417208410816086e-05, "loss": 2.5817, "step": 1382500 }, { "epoch": 2.64, "learning_rate": 4.341482676889309e-05, "loss": 2.6145, "step": 1383000 }, { "epoch": 2.64, "learning_rate": 4.34124451269701e-05, "loss": 2.5886, "step": 1383500 }, { "epoch": 2.64, "learning_rate": 4.34100634850471e-05, "loss": 2.5836, "step": 1384000 }, { "epoch": 2.64, "learning_rate": 4.34076818431241e-05, "loss": 2.5809, "step": 1384500 }, { "epoch": 2.64, "learning_rate": 4.340530020120111e-05, "loss": 2.5983, "step": 1385000 }, { "epoch": 2.64, "learning_rate": 4.3402923322561964e-05, "loss": 2.5778, "step": 1385500 }, { "epoch": 2.64, "learning_rate": 4.340054168063897e-05, "loss": 2.6203, "step": 1386000 }, { "epoch": 2.64, "learning_rate": 4.3398160038715977e-05, "loss": 2.5865, "step": 1386500 }, { "epoch": 2.64, "learning_rate": 4.339577839679298e-05, "loss": 2.615, "step": 1387000 }, { "epoch": 2.64, "learning_rate": 4.339339675486999e-05, "loss": 2.584, "step": 1387500 }, { "epoch": 2.64, "learning_rate": 4.3391015112946984e-05, "loss": 2.5967, "step": 1388000 }, { "epoch": 2.65, "learning_rate": 4.3388633471023994e-05, "loss": 2.5783, "step": 1388500 }, { "epoch": 2.65, "learning_rate": 4.3386251829100996e-05, "loss": 2.5742, "step": 1389000 }, { "epoch": 2.65, "learning_rate": 4.338387495046185e-05, "loss": 2.582, "step": 1389500 }, { "epoch": 2.65, "learning_rate": 4.338149330853886e-05, "loss": 2.5886, "step": 1390000 }, { "epoch": 2.65, "learning_rate": 4.337911642989971e-05, "loss": 2.5937, "step": 1390500 }, { "epoch": 2.65, "learning_rate": 4.337673478797671e-05, "loss": 2.5865, "step": 1391000 }, { "epoch": 2.65, "learning_rate": 4.3374353146053716e-05, "loss": 2.5891, "step": 1391500 }, { "epoch": 2.65, "learning_rate": 4.337197150413072e-05, "loss": 2.5983, "step": 1392000 }, { "epoch": 2.65, "learning_rate": 4.336958986220773e-05, "loss": 2.6024, "step": 1392500 }, { "epoch": 2.65, "learning_rate": 4.336721298356858e-05, "loss": 2.6076, "step": 1393000 }, { "epoch": 2.66, "learning_rate": 4.336483134164558e-05, "loss": 2.5904, "step": 1393500 }, { "epoch": 2.66, "learning_rate": 4.336244969972259e-05, "loss": 2.6042, "step": 1394000 }, { "epoch": 2.66, "learning_rate": 4.336006805779959e-05, "loss": 2.604, "step": 1394500 }, { "epoch": 2.66, "learning_rate": 4.335769117916044e-05, "loss": 2.5809, "step": 1395000 }, { "epoch": 2.66, "learning_rate": 4.335530953723745e-05, "loss": 2.5837, "step": 1395500 }, { "epoch": 2.66, "learning_rate": 4.335292789531445e-05, "loss": 2.6078, "step": 1396000 }, { "epoch": 2.66, "learning_rate": 4.335054625339146e-05, "loss": 2.5989, "step": 1396500 }, { "epoch": 2.66, "learning_rate": 4.3348164611468464e-05, "loss": 2.5968, "step": 1397000 }, { "epoch": 2.66, "learning_rate": 4.3345787732829316e-05, "loss": 2.6007, "step": 1397500 }, { "epoch": 2.66, "learning_rate": 4.334340609090632e-05, "loss": 2.5648, "step": 1398000 }, { "epoch": 2.66, "learning_rate": 4.334102444898332e-05, "loss": 2.5932, "step": 1398500 }, { "epoch": 2.67, "learning_rate": 4.333864280706033e-05, "loss": 2.5731, "step": 1399000 }, { "epoch": 2.67, "learning_rate": 4.3336265928421183e-05, "loss": 2.5934, "step": 1399500 }, { "epoch": 2.67, "learning_rate": 4.3333884286498186e-05, "loss": 2.6047, "step": 1400000 }, { "epoch": 2.67, "learning_rate": 4.3331502644575195e-05, "loss": 2.6091, "step": 1400500 }, { "epoch": 2.67, "learning_rate": 4.33291210026522e-05, "loss": 2.5883, "step": 1401000 }, { "epoch": 2.67, "learning_rate": 4.33267393607292e-05, "loss": 2.5688, "step": 1401500 }, { "epoch": 2.67, "learning_rate": 4.332436248209005e-05, "loss": 2.6, "step": 1402000 }, { "epoch": 2.67, "learning_rate": 4.332198084016706e-05, "loss": 2.5852, "step": 1402500 }, { "epoch": 2.67, "learning_rate": 4.3319599198244065e-05, "loss": 2.6016, "step": 1403000 }, { "epoch": 2.67, "learning_rate": 4.3317217556321074e-05, "loss": 2.5871, "step": 1403500 }, { "epoch": 2.68, "learning_rate": 4.331484067768192e-05, "loss": 2.5811, "step": 1404000 }, { "epoch": 2.68, "learning_rate": 4.331245903575893e-05, "loss": 2.5853, "step": 1404500 }, { "epoch": 2.68, "learning_rate": 4.331007739383593e-05, "loss": 2.5923, "step": 1405000 }, { "epoch": 2.68, "learning_rate": 4.3307695751912935e-05, "loss": 2.5882, "step": 1405500 }, { "epoch": 2.68, "learning_rate": 4.3305314109989944e-05, "loss": 2.5915, "step": 1406000 }, { "epoch": 2.68, "learning_rate": 4.3302937231350796e-05, "loss": 2.6105, "step": 1406500 }, { "epoch": 2.68, "learning_rate": 4.33005555894278e-05, "loss": 2.5915, "step": 1407000 }, { "epoch": 2.68, "learning_rate": 4.329817394750481e-05, "loss": 2.596, "step": 1407500 }, { "epoch": 2.68, "learning_rate": 4.3295792305581804e-05, "loss": 2.5757, "step": 1408000 }, { "epoch": 2.68, "learning_rate": 4.3293415426942656e-05, "loss": 2.5994, "step": 1408500 }, { "epoch": 2.68, "learning_rate": 4.329103854830351e-05, "loss": 2.5982, "step": 1409000 }, { "epoch": 2.69, "learning_rate": 4.328865690638052e-05, "loss": 2.5832, "step": 1409500 }, { "epoch": 2.69, "learning_rate": 4.328627526445752e-05, "loss": 2.5756, "step": 1410000 }, { "epoch": 2.69, "eval_accuracy": 0.5298230377947639, "eval_loss": 2.4899096488952637, "eval_runtime": 4197.3727, "eval_samples_per_second": 65.514, "eval_steps_per_second": 6.551, "step": 1410000 }, { "epoch": 2.69, "learning_rate": 4.328389362253453e-05, "loss": 2.5935, "step": 1410500 }, { "epoch": 2.69, "learning_rate": 4.328151198061153e-05, "loss": 2.5985, "step": 1411000 }, { "epoch": 2.69, "learning_rate": 4.327913033868854e-05, "loss": 2.5979, "step": 1411500 }, { "epoch": 2.69, "learning_rate": 4.327674869676554e-05, "loss": 2.6039, "step": 1412000 }, { "epoch": 2.69, "learning_rate": 4.327436705484255e-05, "loss": 2.5976, "step": 1412500 }, { "epoch": 2.69, "learning_rate": 4.327198541291955e-05, "loss": 2.6041, "step": 1413000 }, { "epoch": 2.69, "learning_rate": 4.326960377099656e-05, "loss": 2.5897, "step": 1413500 }, { "epoch": 2.69, "learning_rate": 4.326722689235741e-05, "loss": 2.5809, "step": 1414000 }, { "epoch": 2.7, "learning_rate": 4.3264845250434414e-05, "loss": 2.6017, "step": 1414500 }, { "epoch": 2.7, "learning_rate": 4.326246360851142e-05, "loss": 2.5969, "step": 1415000 }, { "epoch": 2.7, "learning_rate": 4.326008196658842e-05, "loss": 2.5926, "step": 1415500 }, { "epoch": 2.7, "learning_rate": 4.325770508794927e-05, "loss": 2.558, "step": 1416000 }, { "epoch": 2.7, "learning_rate": 4.325532344602628e-05, "loss": 2.5898, "step": 1416500 }, { "epoch": 2.7, "learning_rate": 4.3252941804103284e-05, "loss": 2.587, "step": 1417000 }, { "epoch": 2.7, "learning_rate": 4.325056016218029e-05, "loss": 2.5845, "step": 1417500 }, { "epoch": 2.7, "learning_rate": 4.3248183283541146e-05, "loss": 2.5818, "step": 1418000 }, { "epoch": 2.7, "learning_rate": 4.324580164161814e-05, "loss": 2.6039, "step": 1418500 }, { "epoch": 2.7, "learning_rate": 4.324341999969515e-05, "loss": 2.6115, "step": 1419000 }, { "epoch": 2.7, "learning_rate": 4.3241038357772154e-05, "loss": 2.5866, "step": 1419500 }, { "epoch": 2.71, "learning_rate": 4.323865671584916e-05, "loss": 2.5784, "step": 1420000 }, { "epoch": 2.71, "learning_rate": 4.3236279837210015e-05, "loss": 2.5717, "step": 1420500 }, { "epoch": 2.71, "learning_rate": 4.323389819528702e-05, "loss": 2.5727, "step": 1421000 }, { "epoch": 2.71, "learning_rate": 4.323151655336402e-05, "loss": 2.5999, "step": 1421500 }, { "epoch": 2.71, "learning_rate": 4.322913491144103e-05, "loss": 2.5992, "step": 1422000 }, { "epoch": 2.71, "learning_rate": 4.3226758032801875e-05, "loss": 2.5907, "step": 1422500 }, { "epoch": 2.71, "learning_rate": 4.3224376390878885e-05, "loss": 2.5883, "step": 1423000 }, { "epoch": 2.71, "learning_rate": 4.322199474895589e-05, "loss": 2.5936, "step": 1423500 }, { "epoch": 2.71, "learning_rate": 4.32196131070329e-05, "loss": 2.5778, "step": 1424000 }, { "epoch": 2.71, "learning_rate": 4.3217231465109906e-05, "loss": 2.6003, "step": 1424500 }, { "epoch": 2.72, "learning_rate": 4.32148498231869e-05, "loss": 2.5847, "step": 1425000 }, { "epoch": 2.72, "learning_rate": 4.321246818126391e-05, "loss": 2.5809, "step": 1425500 }, { "epoch": 2.72, "learning_rate": 4.3210086539340914e-05, "loss": 2.5879, "step": 1426000 }, { "epoch": 2.72, "learning_rate": 4.3207709660701766e-05, "loss": 2.5907, "step": 1426500 }, { "epoch": 2.72, "learning_rate": 4.3205328018778776e-05, "loss": 2.597, "step": 1427000 }, { "epoch": 2.72, "learning_rate": 4.320294637685578e-05, "loss": 2.5792, "step": 1427500 }, { "epoch": 2.72, "learning_rate": 4.320056473493278e-05, "loss": 2.6153, "step": 1428000 }, { "epoch": 2.72, "learning_rate": 4.3198183093009784e-05, "loss": 2.5838, "step": 1428500 }, { "epoch": 2.72, "learning_rate": 4.3195806214370636e-05, "loss": 2.6102, "step": 1429000 }, { "epoch": 2.72, "learning_rate": 4.3193424572447645e-05, "loss": 2.5979, "step": 1429500 }, { "epoch": 2.72, "learning_rate": 4.319104293052465e-05, "loss": 2.5918, "step": 1430000 }, { "epoch": 2.73, "learning_rate": 4.31886660518855e-05, "loss": 2.6125, "step": 1430500 }, { "epoch": 2.73, "learning_rate": 4.318628440996251e-05, "loss": 2.6127, "step": 1431000 }, { "epoch": 2.73, "learning_rate": 4.3183902768039506e-05, "loss": 2.5838, "step": 1431500 }, { "epoch": 2.73, "learning_rate": 4.3181521126116515e-05, "loss": 2.5819, "step": 1432000 }, { "epoch": 2.73, "learning_rate": 4.317913948419352e-05, "loss": 2.5854, "step": 1432500 }, { "epoch": 2.73, "learning_rate": 4.317675784227053e-05, "loss": 2.5911, "step": 1433000 }, { "epoch": 2.73, "learning_rate": 4.317437620034753e-05, "loss": 2.6097, "step": 1433500 }, { "epoch": 2.73, "learning_rate": 4.317199455842454e-05, "loss": 2.5892, "step": 1434000 }, { "epoch": 2.73, "learning_rate": 4.316961291650154e-05, "loss": 2.5886, "step": 1434500 }, { "epoch": 2.73, "learning_rate": 4.3167236037862394e-05, "loss": 2.5938, "step": 1435000 }, { "epoch": 2.74, "learning_rate": 4.3164854395939397e-05, "loss": 2.5743, "step": 1435500 }, { "epoch": 2.74, "learning_rate": 4.3162472754016406e-05, "loss": 2.5838, "step": 1436000 }, { "epoch": 2.74, "learning_rate": 4.316009111209341e-05, "loss": 2.6014, "step": 1436500 }, { "epoch": 2.74, "learning_rate": 4.315770947017042e-05, "loss": 2.5794, "step": 1437000 }, { "epoch": 2.74, "learning_rate": 4.3155332591531263e-05, "loss": 2.583, "step": 1437500 }, { "epoch": 2.74, "learning_rate": 4.3152950949608266e-05, "loss": 2.6076, "step": 1438000 }, { "epoch": 2.74, "learning_rate": 4.3150569307685275e-05, "loss": 2.5736, "step": 1438500 }, { "epoch": 2.74, "learning_rate": 4.314818766576228e-05, "loss": 2.5965, "step": 1439000 }, { "epoch": 2.74, "learning_rate": 4.314581078712313e-05, "loss": 2.5871, "step": 1439500 }, { "epoch": 2.74, "learning_rate": 4.314342914520014e-05, "loss": 2.5856, "step": 1440000 }, { "epoch": 2.74, "eval_accuracy": 0.530232418495226, "eval_loss": 2.488772392272949, "eval_runtime": 4200.3078, "eval_samples_per_second": 65.469, "eval_steps_per_second": 6.547, "step": 1440000 }, { "epoch": 2.74, "learning_rate": 4.314104750327714e-05, "loss": 2.5609, "step": 1440500 }, { "epoch": 2.75, "learning_rate": 4.3138665861354145e-05, "loss": 2.5844, "step": 1441000 }, { "epoch": 2.75, "learning_rate": 4.3136288982715e-05, "loss": 2.6029, "step": 1441500 }, { "epoch": 2.75, "learning_rate": 4.3133907340792e-05, "loss": 2.5919, "step": 1442000 }, { "epoch": 2.75, "learning_rate": 4.313152569886901e-05, "loss": 2.6041, "step": 1442500 }, { "epoch": 2.75, "learning_rate": 4.312914405694601e-05, "loss": 2.5945, "step": 1443000 }, { "epoch": 2.75, "learning_rate": 4.312676241502302e-05, "loss": 2.6009, "step": 1443500 }, { "epoch": 2.75, "learning_rate": 4.3124380773100024e-05, "loss": 2.5941, "step": 1444000 }, { "epoch": 2.75, "learning_rate": 4.312199913117703e-05, "loss": 2.6121, "step": 1444500 }, { "epoch": 2.75, "learning_rate": 4.311962225253788e-05, "loss": 2.5774, "step": 1445000 }, { "epoch": 2.75, "learning_rate": 4.311724061061488e-05, "loss": 2.5844, "step": 1445500 }, { "epoch": 2.76, "learning_rate": 4.311485896869189e-05, "loss": 2.5917, "step": 1446000 }, { "epoch": 2.76, "learning_rate": 4.3112477326768894e-05, "loss": 2.6085, "step": 1446500 }, { "epoch": 2.76, "learning_rate": 4.3110100448129746e-05, "loss": 2.5789, "step": 1447000 }, { "epoch": 2.76, "learning_rate": 4.3107718806206755e-05, "loss": 2.5673, "step": 1447500 }, { "epoch": 2.76, "learning_rate": 4.310533716428375e-05, "loss": 2.5991, "step": 1448000 }, { "epoch": 2.76, "learning_rate": 4.310295552236076e-05, "loss": 2.5844, "step": 1448500 }, { "epoch": 2.76, "learning_rate": 4.310057388043777e-05, "loss": 2.5746, "step": 1449000 }, { "epoch": 2.76, "learning_rate": 4.3098197001798615e-05, "loss": 2.6032, "step": 1449500 }, { "epoch": 2.76, "learning_rate": 4.3095815359875625e-05, "loss": 2.6013, "step": 1450000 }, { "epoch": 2.76, "learning_rate": 4.309343371795263e-05, "loss": 2.5859, "step": 1450500 }, { "epoch": 2.76, "learning_rate": 4.309105207602963e-05, "loss": 2.5889, "step": 1451000 }, { "epoch": 2.77, "learning_rate": 4.308867519739048e-05, "loss": 2.5876, "step": 1451500 }, { "epoch": 2.77, "learning_rate": 4.308629355546749e-05, "loss": 2.6006, "step": 1452000 }, { "epoch": 2.77, "learning_rate": 4.3083911913544494e-05, "loss": 2.5972, "step": 1452500 }, { "epoch": 2.77, "learning_rate": 4.3081530271621504e-05, "loss": 2.5901, "step": 1453000 }, { "epoch": 2.77, "learning_rate": 4.3079148629698506e-05, "loss": 2.579, "step": 1453500 }, { "epoch": 2.77, "learning_rate": 4.3076766987775516e-05, "loss": 2.5871, "step": 1454000 }, { "epoch": 2.77, "learning_rate": 4.307438534585251e-05, "loss": 2.5768, "step": 1454500 }, { "epoch": 2.77, "learning_rate": 4.307200370392952e-05, "loss": 2.6085, "step": 1455000 }, { "epoch": 2.77, "learning_rate": 4.3069622062006524e-05, "loss": 2.5711, "step": 1455500 }, { "epoch": 2.77, "learning_rate": 4.3067245183367376e-05, "loss": 2.5904, "step": 1456000 }, { "epoch": 2.78, "learning_rate": 4.3064863541444385e-05, "loss": 2.5783, "step": 1456500 }, { "epoch": 2.78, "learning_rate": 4.306248189952139e-05, "loss": 2.5802, "step": 1457000 }, { "epoch": 2.78, "learning_rate": 4.306010502088224e-05, "loss": 2.5629, "step": 1457500 }, { "epoch": 2.78, "learning_rate": 4.305772337895924e-05, "loss": 2.5862, "step": 1458000 }, { "epoch": 2.78, "learning_rate": 4.3055341737036246e-05, "loss": 2.581, "step": 1458500 }, { "epoch": 2.78, "learning_rate": 4.3052960095113255e-05, "loss": 2.585, "step": 1459000 }, { "epoch": 2.78, "learning_rate": 4.305057845319026e-05, "loss": 2.6061, "step": 1459500 }, { "epoch": 2.78, "learning_rate": 4.304819681126727e-05, "loss": 2.6007, "step": 1460000 }, { "epoch": 2.78, "learning_rate": 4.304581516934427e-05, "loss": 2.6044, "step": 1460500 }, { "epoch": 2.78, "learning_rate": 4.304343352742127e-05, "loss": 2.5664, "step": 1461000 }, { "epoch": 2.78, "learning_rate": 4.304105188549828e-05, "loss": 2.5726, "step": 1461500 }, { "epoch": 2.79, "learning_rate": 4.303867500685913e-05, "loss": 2.5954, "step": 1462000 }, { "epoch": 2.79, "learning_rate": 4.3036293364936137e-05, "loss": 2.5901, "step": 1462500 }, { "epoch": 2.79, "learning_rate": 4.3033911723013146e-05, "loss": 2.58, "step": 1463000 }, { "epoch": 2.79, "learning_rate": 4.303153008109015e-05, "loss": 2.5922, "step": 1463500 }, { "epoch": 2.79, "learning_rate": 4.3029153202450994e-05, "loss": 2.6011, "step": 1464000 }, { "epoch": 2.79, "learning_rate": 4.3026771560528004e-05, "loss": 2.5798, "step": 1464500 }, { "epoch": 2.79, "learning_rate": 4.3024389918605006e-05, "loss": 2.5929, "step": 1465000 }, { "epoch": 2.79, "learning_rate": 4.3022008276682016e-05, "loss": 2.5918, "step": 1465500 }, { "epoch": 2.79, "learning_rate": 4.301962663475902e-05, "loss": 2.5864, "step": 1466000 }, { "epoch": 2.79, "learning_rate": 4.301724499283603e-05, "loss": 2.5981, "step": 1466500 }, { "epoch": 2.8, "learning_rate": 4.3014863350913023e-05, "loss": 2.591, "step": 1467000 }, { "epoch": 2.8, "learning_rate": 4.3012486472273876e-05, "loss": 2.5865, "step": 1467500 }, { "epoch": 2.8, "learning_rate": 4.3010104830350885e-05, "loss": 2.5682, "step": 1468000 }, { "epoch": 2.8, "learning_rate": 4.300772318842789e-05, "loss": 2.5785, "step": 1468500 }, { "epoch": 2.8, "learning_rate": 4.30053415465049e-05, "loss": 2.5907, "step": 1469000 }, { "epoch": 2.8, "learning_rate": 4.30029599045819e-05, "loss": 2.5727, "step": 1469500 }, { "epoch": 2.8, "learning_rate": 4.30005782626589e-05, "loss": 2.5787, "step": 1470000 }, { "epoch": 2.8, "eval_accuracy": 0.5305517641640696, "eval_loss": 2.4862098693847656, "eval_runtime": 4193.7704, "eval_samples_per_second": 65.571, "eval_steps_per_second": 6.557, "step": 1470000 }, { "epoch": 2.8, "learning_rate": 4.299819662073591e-05, "loss": 2.5858, "step": 1470500 }, { "epoch": 2.8, "learning_rate": 4.2995814978812914e-05, "loss": 2.612, "step": 1471000 }, { "epoch": 2.8, "learning_rate": 4.299343810017377e-05, "loss": 2.603, "step": 1471500 }, { "epoch": 2.8, "learning_rate": 4.299105645825077e-05, "loss": 2.5928, "step": 1472000 }, { "epoch": 2.81, "learning_rate": 4.298867481632778e-05, "loss": 2.5759, "step": 1472500 }, { "epoch": 2.81, "learning_rate": 4.298629317440478e-05, "loss": 2.5816, "step": 1473000 }, { "epoch": 2.81, "learning_rate": 4.2983911532481784e-05, "loss": 2.5849, "step": 1473500 }, { "epoch": 2.81, "learning_rate": 4.2981534653842636e-05, "loss": 2.578, "step": 1474000 }, { "epoch": 2.81, "learning_rate": 4.2979153011919646e-05, "loss": 2.5702, "step": 1474500 }, { "epoch": 2.81, "learning_rate": 4.297677136999665e-05, "loss": 2.5799, "step": 1475000 }, { "epoch": 2.81, "learning_rate": 4.297438972807366e-05, "loss": 2.5633, "step": 1475500 }, { "epoch": 2.81, "learning_rate": 4.297200808615066e-05, "loss": 2.5961, "step": 1476000 }, { "epoch": 2.81, "learning_rate": 4.296963120751151e-05, "loss": 2.5765, "step": 1476500 }, { "epoch": 2.81, "learning_rate": 4.2967249565588515e-05, "loss": 2.6078, "step": 1477000 }, { "epoch": 2.82, "learning_rate": 4.296486792366552e-05, "loss": 2.5705, "step": 1477500 }, { "epoch": 2.82, "learning_rate": 4.296248628174253e-05, "loss": 2.5824, "step": 1478000 }, { "epoch": 2.82, "learning_rate": 4.296010463981953e-05, "loss": 2.5765, "step": 1478500 }, { "epoch": 2.82, "learning_rate": 4.295772299789654e-05, "loss": 2.5657, "step": 1479000 }, { "epoch": 2.82, "learning_rate": 4.2955341355973535e-05, "loss": 2.5785, "step": 1479500 }, { "epoch": 2.82, "learning_rate": 4.295296447733439e-05, "loss": 2.5853, "step": 1480000 }, { "epoch": 2.82, "learning_rate": 4.29505828354114e-05, "loss": 2.5855, "step": 1480500 }, { "epoch": 2.82, "learning_rate": 4.29482011934884e-05, "loss": 2.59, "step": 1481000 }, { "epoch": 2.82, "learning_rate": 4.294582431484925e-05, "loss": 2.611, "step": 1481500 }, { "epoch": 2.82, "learning_rate": 4.294344267292626e-05, "loss": 2.5883, "step": 1482000 }, { "epoch": 2.82, "learning_rate": 4.2941061031003264e-05, "loss": 2.6061, "step": 1482500 }, { "epoch": 2.83, "learning_rate": 4.293867938908027e-05, "loss": 2.5785, "step": 1483000 }, { "epoch": 2.83, "learning_rate": 4.2936297747157276e-05, "loss": 2.5746, "step": 1483500 }, { "epoch": 2.83, "learning_rate": 4.293391610523428e-05, "loss": 2.5881, "step": 1484000 }, { "epoch": 2.83, "learning_rate": 4.293153446331129e-05, "loss": 2.5747, "step": 1484500 }, { "epoch": 2.83, "learning_rate": 4.292915282138829e-05, "loss": 2.5903, "step": 1485000 }, { "epoch": 2.83, "learning_rate": 4.29267711794653e-05, "loss": 2.5967, "step": 1485500 }, { "epoch": 2.83, "learning_rate": 4.2924389537542296e-05, "loss": 2.5776, "step": 1486000 }, { "epoch": 2.83, "learning_rate": 4.292201265890315e-05, "loss": 2.5825, "step": 1486500 }, { "epoch": 2.83, "learning_rate": 4.291963101698016e-05, "loss": 2.5898, "step": 1487000 }, { "epoch": 2.83, "learning_rate": 4.291724937505716e-05, "loss": 2.6056, "step": 1487500 }, { "epoch": 2.84, "learning_rate": 4.291486773313417e-05, "loss": 2.5751, "step": 1488000 }, { "epoch": 2.84, "learning_rate": 4.291249085449502e-05, "loss": 2.5801, "step": 1488500 }, { "epoch": 2.84, "learning_rate": 4.2910109212572024e-05, "loss": 2.5893, "step": 1489000 }, { "epoch": 2.84, "learning_rate": 4.290772757064903e-05, "loss": 2.5758, "step": 1489500 }, { "epoch": 2.84, "learning_rate": 4.290534592872603e-05, "loss": 2.5908, "step": 1490000 }, { "epoch": 2.84, "learning_rate": 4.290296428680304e-05, "loss": 2.5863, "step": 1490500 }, { "epoch": 2.84, "learning_rate": 4.290058264488004e-05, "loss": 2.5726, "step": 1491000 }, { "epoch": 2.84, "learning_rate": 4.289820100295705e-05, "loss": 2.5799, "step": 1491500 }, { "epoch": 2.84, "learning_rate": 4.2895819361034054e-05, "loss": 2.5978, "step": 1492000 }, { "epoch": 2.84, "learning_rate": 4.2893437719111056e-05, "loss": 2.5788, "step": 1492500 }, { "epoch": 2.84, "learning_rate": 4.2891056077188066e-05, "loss": 2.5794, "step": 1493000 }, { "epoch": 2.85, "learning_rate": 4.288867919854891e-05, "loss": 2.5767, "step": 1493500 }, { "epoch": 2.85, "learning_rate": 4.288629755662592e-05, "loss": 2.5807, "step": 1494000 }, { "epoch": 2.85, "learning_rate": 4.288391591470293e-05, "loss": 2.6031, "step": 1494500 }, { "epoch": 2.85, "learning_rate": 4.288153427277993e-05, "loss": 2.5781, "step": 1495000 }, { "epoch": 2.85, "learning_rate": 4.2879152630856935e-05, "loss": 2.5609, "step": 1495500 }, { "epoch": 2.85, "learning_rate": 4.287677098893394e-05, "loss": 2.5851, "step": 1496000 }, { "epoch": 2.85, "learning_rate": 4.287438934701095e-05, "loss": 2.5839, "step": 1496500 }, { "epoch": 2.85, "learning_rate": 4.28720124683718e-05, "loss": 2.5803, "step": 1497000 }, { "epoch": 2.85, "learning_rate": 4.28696308264488e-05, "loss": 2.5819, "step": 1497500 }, { "epoch": 2.85, "learning_rate": 4.286724918452581e-05, "loss": 2.5758, "step": 1498000 }, { "epoch": 2.86, "learning_rate": 4.286486754260281e-05, "loss": 2.5804, "step": 1498500 }, { "epoch": 2.86, "learning_rate": 4.286248590067982e-05, "loss": 2.5808, "step": 1499000 }, { "epoch": 2.86, "learning_rate": 4.286010902204067e-05, "loss": 2.5999, "step": 1499500 }, { "epoch": 2.86, "learning_rate": 4.285772738011767e-05, "loss": 2.5622, "step": 1500000 }, { "epoch": 2.86, "eval_accuracy": 0.5307001115242835, "eval_loss": 2.4858310222625732, "eval_runtime": 4196.4728, "eval_samples_per_second": 65.528, "eval_steps_per_second": 6.553, "step": 1500000 }, { "epoch": 2.86, "learning_rate": 4.285534573819468e-05, "loss": 2.5842, "step": 1500500 }, { "epoch": 2.86, "learning_rate": 4.2852964096271684e-05, "loss": 2.581, "step": 1501000 }, { "epoch": 2.86, "learning_rate": 4.285058245434869e-05, "loss": 2.5911, "step": 1501500 }, { "epoch": 2.86, "learning_rate": 4.284821033899339e-05, "loss": 2.5903, "step": 1502000 }, { "epoch": 2.86, "learning_rate": 4.284582869707039e-05, "loss": 2.5587, "step": 1502500 }, { "epoch": 2.86, "learning_rate": 4.2843447055147394e-05, "loss": 2.6171, "step": 1503000 }, { "epoch": 2.86, "learning_rate": 4.28410654132244e-05, "loss": 2.5996, "step": 1503500 }, { "epoch": 2.87, "learning_rate": 4.2838683771301406e-05, "loss": 2.5774, "step": 1504000 }, { "epoch": 2.87, "learning_rate": 4.2836302129378415e-05, "loss": 2.569, "step": 1504500 }, { "epoch": 2.87, "learning_rate": 4.283392048745542e-05, "loss": 2.5709, "step": 1505000 }, { "epoch": 2.87, "learning_rate": 4.283153884553242e-05, "loss": 2.578, "step": 1505500 }, { "epoch": 2.87, "learning_rate": 4.282915720360943e-05, "loss": 2.5648, "step": 1506000 }, { "epoch": 2.87, "learning_rate": 4.2826780324970275e-05, "loss": 2.5998, "step": 1506500 }, { "epoch": 2.87, "learning_rate": 4.2824398683047285e-05, "loss": 2.5792, "step": 1507000 }, { "epoch": 2.87, "learning_rate": 4.282201704112429e-05, "loss": 2.5764, "step": 1507500 }, { "epoch": 2.87, "learning_rate": 4.28196353992013e-05, "loss": 2.5963, "step": 1508000 }, { "epoch": 2.87, "learning_rate": 4.281725852056215e-05, "loss": 2.5847, "step": 1508500 }, { "epoch": 2.88, "learning_rate": 4.281487687863915e-05, "loss": 2.5895, "step": 1509000 }, { "epoch": 2.88, "learning_rate": 4.2812495236716154e-05, "loss": 2.5654, "step": 1509500 }, { "epoch": 2.88, "learning_rate": 4.2810113594793164e-05, "loss": 2.5774, "step": 1510000 }, { "epoch": 2.88, "learning_rate": 4.2807731952870166e-05, "loss": 2.5847, "step": 1510500 }, { "epoch": 2.88, "learning_rate": 4.280535507423102e-05, "loss": 2.5873, "step": 1511000 }, { "epoch": 2.88, "learning_rate": 4.280297343230802e-05, "loss": 2.5827, "step": 1511500 }, { "epoch": 2.88, "learning_rate": 4.2800591790385024e-05, "loss": 2.598, "step": 1512000 }, { "epoch": 2.88, "learning_rate": 4.279821014846203e-05, "loss": 2.6004, "step": 1512500 }, { "epoch": 2.88, "learning_rate": 4.2795828506539036e-05, "loss": 2.5833, "step": 1513000 }, { "epoch": 2.88, "learning_rate": 4.279345162789989e-05, "loss": 2.5756, "step": 1513500 }, { "epoch": 2.88, "learning_rate": 4.27910699859769e-05, "loss": 2.5552, "step": 1514000 }, { "epoch": 2.89, "learning_rate": 4.27886883440539e-05, "loss": 2.5891, "step": 1514500 }, { "epoch": 2.89, "learning_rate": 4.278630670213091e-05, "loss": 2.5883, "step": 1515000 }, { "epoch": 2.89, "learning_rate": 4.278392982349176e-05, "loss": 2.591, "step": 1515500 }, { "epoch": 2.89, "learning_rate": 4.278154818156876e-05, "loss": 2.6019, "step": 1516000 }, { "epoch": 2.89, "learning_rate": 4.277916653964577e-05, "loss": 2.5903, "step": 1516500 }, { "epoch": 2.89, "learning_rate": 4.277678489772277e-05, "loss": 2.5775, "step": 1517000 }, { "epoch": 2.89, "learning_rate": 4.277440801908362e-05, "loss": 2.5947, "step": 1517500 }, { "epoch": 2.89, "learning_rate": 4.277202637716063e-05, "loss": 2.5739, "step": 1518000 }, { "epoch": 2.89, "learning_rate": 4.2769644735237634e-05, "loss": 2.5955, "step": 1518500 }, { "epoch": 2.89, "learning_rate": 4.276726309331464e-05, "loss": 2.5723, "step": 1519000 }, { "epoch": 2.9, "learning_rate": 4.276488621467549e-05, "loss": 2.5783, "step": 1519500 }, { "epoch": 2.9, "learning_rate": 4.276250457275249e-05, "loss": 2.6112, "step": 1520000 }, { "epoch": 2.9, "learning_rate": 4.27601229308295e-05, "loss": 2.5954, "step": 1520500 }, { "epoch": 2.9, "learning_rate": 4.2757741288906504e-05, "loss": 2.5772, "step": 1521000 }, { "epoch": 2.9, "learning_rate": 4.275535964698351e-05, "loss": 2.5683, "step": 1521500 }, { "epoch": 2.9, "learning_rate": 4.275297800506051e-05, "loss": 2.568, "step": 1522000 }, { "epoch": 2.9, "learning_rate": 4.275059636313752e-05, "loss": 2.5941, "step": 1522500 }, { "epoch": 2.9, "learning_rate": 4.274821472121453e-05, "loss": 2.6026, "step": 1523000 }, { "epoch": 2.9, "learning_rate": 4.274583784257537e-05, "loss": 2.5847, "step": 1523500 }, { "epoch": 2.9, "learning_rate": 4.274345620065238e-05, "loss": 2.5876, "step": 1524000 }, { "epoch": 2.9, "learning_rate": 4.2741074558729385e-05, "loss": 2.5932, "step": 1524500 }, { "epoch": 2.91, "learning_rate": 4.2738692916806395e-05, "loss": 2.5781, "step": 1525000 }, { "epoch": 2.91, "learning_rate": 4.27363112748834e-05, "loss": 2.5825, "step": 1525500 }, { "epoch": 2.91, "learning_rate": 4.273393439624425e-05, "loss": 2.5904, "step": 1526000 }, { "epoch": 2.91, "learning_rate": 4.2731557517605095e-05, "loss": 2.5754, "step": 1526500 }, { "epoch": 2.91, "learning_rate": 4.2729175875682104e-05, "loss": 2.5918, "step": 1527000 }, { "epoch": 2.91, "learning_rate": 4.272679423375911e-05, "loss": 2.5777, "step": 1527500 }, { "epoch": 2.91, "learning_rate": 4.2724412591836116e-05, "loss": 2.5681, "step": 1528000 }, { "epoch": 2.91, "learning_rate": 4.272203094991312e-05, "loss": 2.5715, "step": 1528500 }, { "epoch": 2.91, "learning_rate": 4.271964930799012e-05, "loss": 2.5639, "step": 1529000 }, { "epoch": 2.91, "learning_rate": 4.271726766606713e-05, "loss": 2.5517, "step": 1529500 }, { "epoch": 2.92, "learning_rate": 4.2714886024144134e-05, "loss": 2.5852, "step": 1530000 }, { "epoch": 2.92, "eval_accuracy": 0.5310495304862215, "eval_loss": 2.4834811687469482, "eval_runtime": 4201.9414, "eval_samples_per_second": 65.443, "eval_steps_per_second": 6.544, "step": 1530000 }, { "epoch": 2.92, "learning_rate": 4.271250438222114e-05, "loss": 2.5873, "step": 1530500 }, { "epoch": 2.92, "learning_rate": 4.2710127503581995e-05, "loss": 2.5908, "step": 1531000 }, { "epoch": 2.92, "learning_rate": 4.2707745861659e-05, "loss": 2.5784, "step": 1531500 }, { "epoch": 2.92, "learning_rate": 4.2705364219736e-05, "loss": 2.582, "step": 1532000 }, { "epoch": 2.92, "learning_rate": 4.2702982577813e-05, "loss": 2.5787, "step": 1532500 }, { "epoch": 2.92, "learning_rate": 4.270060093589001e-05, "loss": 2.5816, "step": 1533000 }, { "epoch": 2.92, "learning_rate": 4.2698219293967015e-05, "loss": 2.5487, "step": 1533500 }, { "epoch": 2.92, "learning_rate": 4.269584241532787e-05, "loss": 2.5813, "step": 1534000 }, { "epoch": 2.92, "learning_rate": 4.269346077340488e-05, "loss": 2.5685, "step": 1534500 }, { "epoch": 2.92, "learning_rate": 4.269108389476573e-05, "loss": 2.5808, "step": 1535000 }, { "epoch": 2.93, "learning_rate": 4.2688702252842725e-05, "loss": 2.5889, "step": 1535500 }, { "epoch": 2.93, "learning_rate": 4.2686320610919735e-05, "loss": 2.5949, "step": 1536000 }, { "epoch": 2.93, "learning_rate": 4.268393896899674e-05, "loss": 2.5816, "step": 1536500 }, { "epoch": 2.93, "learning_rate": 4.2681557327073747e-05, "loss": 2.5919, "step": 1537000 }, { "epoch": 2.93, "learning_rate": 4.267917568515075e-05, "loss": 2.5788, "step": 1537500 }, { "epoch": 2.93, "learning_rate": 4.26767988065116e-05, "loss": 2.5732, "step": 1538000 }, { "epoch": 2.93, "learning_rate": 4.267441716458861e-05, "loss": 2.5802, "step": 1538500 }, { "epoch": 2.93, "learning_rate": 4.267203552266561e-05, "loss": 2.5841, "step": 1539000 }, { "epoch": 2.93, "learning_rate": 4.2669653880742616e-05, "loss": 2.6093, "step": 1539500 }, { "epoch": 2.93, "learning_rate": 4.2667272238819626e-05, "loss": 2.571, "step": 1540000 }, { "epoch": 2.94, "learning_rate": 4.266489536018047e-05, "loss": 2.5935, "step": 1540500 }, { "epoch": 2.94, "learning_rate": 4.266251371825748e-05, "loss": 2.5858, "step": 1541000 }, { "epoch": 2.94, "learning_rate": 4.266013207633448e-05, "loss": 2.5809, "step": 1541500 }, { "epoch": 2.94, "learning_rate": 4.2657750434411486e-05, "loss": 2.6055, "step": 1542000 }, { "epoch": 2.94, "learning_rate": 4.2655368792488495e-05, "loss": 2.5893, "step": 1542500 }, { "epoch": 2.94, "learning_rate": 4.26529871505655e-05, "loss": 2.6022, "step": 1543000 }, { "epoch": 2.94, "learning_rate": 4.265060550864251e-05, "loss": 2.572, "step": 1543500 }, { "epoch": 2.94, "learning_rate": 4.264822386671951e-05, "loss": 2.5903, "step": 1544000 }, { "epoch": 2.94, "learning_rate": 4.264584222479652e-05, "loss": 2.5707, "step": 1544500 }, { "epoch": 2.94, "learning_rate": 4.2643465346157365e-05, "loss": 2.5722, "step": 1545000 }, { "epoch": 2.94, "learning_rate": 4.264108370423437e-05, "loss": 2.5945, "step": 1545500 }, { "epoch": 2.95, "learning_rate": 4.263870206231138e-05, "loss": 2.5886, "step": 1546000 }, { "epoch": 2.95, "learning_rate": 4.263632042038838e-05, "loss": 2.5925, "step": 1546500 }, { "epoch": 2.95, "learning_rate": 4.263394354174923e-05, "loss": 2.5746, "step": 1547000 }, { "epoch": 2.95, "learning_rate": 4.263156189982624e-05, "loss": 2.563, "step": 1547500 }, { "epoch": 2.95, "learning_rate": 4.262918502118709e-05, "loss": 2.5796, "step": 1548000 }, { "epoch": 2.95, "learning_rate": 4.2626803379264096e-05, "loss": 2.5797, "step": 1548500 }, { "epoch": 2.95, "learning_rate": 4.26244217373411e-05, "loss": 2.5885, "step": 1549000 }, { "epoch": 2.95, "learning_rate": 4.26220400954181e-05, "loss": 2.5726, "step": 1549500 }, { "epoch": 2.95, "learning_rate": 4.261965845349511e-05, "loss": 2.5838, "step": 1550000 }, { "epoch": 2.95, "learning_rate": 4.261727681157211e-05, "loss": 2.5953, "step": 1550500 }, { "epoch": 2.96, "learning_rate": 4.261489516964912e-05, "loss": 2.5822, "step": 1551000 }, { "epoch": 2.96, "learning_rate": 4.2612513527726125e-05, "loss": 2.5676, "step": 1551500 }, { "epoch": 2.96, "learning_rate": 4.261013664908697e-05, "loss": 2.5859, "step": 1552000 }, { "epoch": 2.96, "learning_rate": 4.260775500716398e-05, "loss": 2.5698, "step": 1552500 }, { "epoch": 2.96, "learning_rate": 4.260537336524098e-05, "loss": 2.5803, "step": 1553000 }, { "epoch": 2.96, "learning_rate": 4.260299172331799e-05, "loss": 2.5842, "step": 1553500 }, { "epoch": 2.96, "learning_rate": 4.2600610081394995e-05, "loss": 2.5917, "step": 1554000 }, { "epoch": 2.96, "learning_rate": 4.2598228439472e-05, "loss": 2.5859, "step": 1554500 }, { "epoch": 2.96, "learning_rate": 4.259584679754901e-05, "loss": 2.5831, "step": 1555000 }, { "epoch": 2.96, "learning_rate": 4.259346515562601e-05, "loss": 2.5685, "step": 1555500 }, { "epoch": 2.96, "learning_rate": 4.259108827698686e-05, "loss": 2.598, "step": 1556000 }, { "epoch": 2.97, "learning_rate": 4.258870663506387e-05, "loss": 2.6127, "step": 1556500 }, { "epoch": 2.97, "learning_rate": 4.2586324993140874e-05, "loss": 2.5848, "step": 1557000 }, { "epoch": 2.97, "learning_rate": 4.2583948114501726e-05, "loss": 2.5716, "step": 1557500 }, { "epoch": 2.97, "learning_rate": 4.2581566472578735e-05, "loss": 2.5617, "step": 1558000 }, { "epoch": 2.97, "learning_rate": 4.257918483065573e-05, "loss": 2.5796, "step": 1558500 }, { "epoch": 2.97, "learning_rate": 4.257680318873274e-05, "loss": 2.5845, "step": 1559000 }, { "epoch": 2.97, "learning_rate": 4.257442154680974e-05, "loss": 2.5832, "step": 1559500 }, { "epoch": 2.97, "learning_rate": 4.257203990488675e-05, "loss": 2.5861, "step": 1560000 }, { "epoch": 2.97, "eval_accuracy": 0.5309852781490254, "eval_loss": 2.4809675216674805, "eval_runtime": 4200.6778, "eval_samples_per_second": 65.463, "eval_steps_per_second": 6.546, "step": 1560000 }, { "epoch": 2.97, "learning_rate": 4.2569658262963755e-05, "loss": 2.56, "step": 1560500 }, { "epoch": 2.97, "learning_rate": 4.256727662104076e-05, "loss": 2.5882, "step": 1561000 }, { "epoch": 2.98, "learning_rate": 4.256489974240161e-05, "loss": 2.5813, "step": 1561500 }, { "epoch": 2.98, "learning_rate": 4.256251810047861e-05, "loss": 2.5885, "step": 1562000 }, { "epoch": 2.98, "learning_rate": 4.256013645855562e-05, "loss": 2.5675, "step": 1562500 }, { "epoch": 2.98, "learning_rate": 4.2557754816632625e-05, "loss": 2.5763, "step": 1563000 }, { "epoch": 2.98, "learning_rate": 4.255537793799348e-05, "loss": 2.5944, "step": 1563500 }, { "epoch": 2.98, "learning_rate": 4.255299629607049e-05, "loss": 2.5837, "step": 1564000 }, { "epoch": 2.98, "learning_rate": 4.255061465414749e-05, "loss": 2.5842, "step": 1564500 }, { "epoch": 2.98, "learning_rate": 4.254823301222449e-05, "loss": 2.5908, "step": 1565000 }, { "epoch": 2.98, "learning_rate": 4.2545856133585344e-05, "loss": 2.5746, "step": 1565500 }, { "epoch": 2.98, "learning_rate": 4.2543479254946196e-05, "loss": 2.5789, "step": 1566000 }, { "epoch": 2.98, "learning_rate": 4.25410976130232e-05, "loss": 2.5535, "step": 1566500 }, { "epoch": 2.99, "learning_rate": 4.253871597110021e-05, "loss": 2.6014, "step": 1567000 }, { "epoch": 2.99, "learning_rate": 4.253633432917721e-05, "loss": 2.5832, "step": 1567500 }, { "epoch": 2.99, "learning_rate": 4.253395268725422e-05, "loss": 2.5727, "step": 1568000 }, { "epoch": 2.99, "learning_rate": 4.253157104533122e-05, "loss": 2.5859, "step": 1568500 }, { "epoch": 2.99, "learning_rate": 4.2529189403408226e-05, "loss": 2.5605, "step": 1569000 }, { "epoch": 2.99, "learning_rate": 4.2526807761485235e-05, "loss": 2.5801, "step": 1569500 }, { "epoch": 2.99, "learning_rate": 4.252443088284608e-05, "loss": 2.5789, "step": 1570000 }, { "epoch": 2.99, "learning_rate": 4.252204924092309e-05, "loss": 2.5918, "step": 1570500 }, { "epoch": 2.99, "learning_rate": 4.251966759900009e-05, "loss": 2.5786, "step": 1571000 }, { "epoch": 2.99, "learning_rate": 4.2517285957077095e-05, "loss": 2.597, "step": 1571500 }, { "epoch": 3.0, "learning_rate": 4.2514904315154105e-05, "loss": 2.5879, "step": 1572000 }, { "epoch": 3.0, "learning_rate": 4.251252743651496e-05, "loss": 2.5569, "step": 1572500 }, { "epoch": 3.0, "learning_rate": 4.251014579459196e-05, "loss": 2.5657, "step": 1573000 }, { "epoch": 3.0, "learning_rate": 4.250776415266897e-05, "loss": 2.5896, "step": 1573500 }, { "epoch": 3.0, "learning_rate": 4.250538251074597e-05, "loss": 2.6047, "step": 1574000 }, { "epoch": 3.0, "learning_rate": 4.2503005632106824e-05, "loss": 2.5853, "step": 1574500 }, { "epoch": 3.0, "learning_rate": 4.2500623990183827e-05, "loss": 2.5717, "step": 1575000 }, { "epoch": 3.0, "learning_rate": 4.249824234826083e-05, "loss": 2.5652, "step": 1575500 }, { "epoch": 3.0, "learning_rate": 4.249586070633784e-05, "loss": 2.5602, "step": 1576000 }, { "epoch": 3.0, "learning_rate": 4.249348382769869e-05, "loss": 2.5845, "step": 1576500 }, { "epoch": 3.0, "learning_rate": 4.2491102185775694e-05, "loss": 2.5637, "step": 1577000 }, { "epoch": 3.01, "learning_rate": 4.24887205438527e-05, "loss": 2.5616, "step": 1577500 }, { "epoch": 3.01, "learning_rate": 4.24863389019297e-05, "loss": 2.5797, "step": 1578000 }, { "epoch": 3.01, "learning_rate": 4.248396202329055e-05, "loss": 2.5784, "step": 1578500 }, { "epoch": 3.01, "learning_rate": 4.248158038136756e-05, "loss": 2.5826, "step": 1579000 }, { "epoch": 3.01, "learning_rate": 4.247919873944456e-05, "loss": 2.5742, "step": 1579500 }, { "epoch": 3.01, "learning_rate": 4.247681709752157e-05, "loss": 2.5782, "step": 1580000 }, { "epoch": 3.01, "learning_rate": 4.2474435455598575e-05, "loss": 2.5635, "step": 1580500 }, { "epoch": 3.01, "learning_rate": 4.247205857695943e-05, "loss": 2.5638, "step": 1581000 }, { "epoch": 3.01, "learning_rate": 4.246967693503644e-05, "loss": 2.579, "step": 1581500 }, { "epoch": 3.01, "learning_rate": 4.246729529311343e-05, "loss": 2.5738, "step": 1582000 }, { "epoch": 3.02, "learning_rate": 4.246491365119044e-05, "loss": 2.5743, "step": 1582500 }, { "epoch": 3.02, "learning_rate": 4.2462532009267445e-05, "loss": 2.5615, "step": 1583000 }, { "epoch": 3.02, "learning_rate": 4.24601551306283e-05, "loss": 2.5795, "step": 1583500 }, { "epoch": 3.02, "learning_rate": 4.2457773488705306e-05, "loss": 2.588, "step": 1584000 }, { "epoch": 3.02, "learning_rate": 4.245539184678231e-05, "loss": 2.5716, "step": 1584500 }, { "epoch": 3.02, "learning_rate": 4.245301020485931e-05, "loss": 2.5877, "step": 1585000 }, { "epoch": 3.02, "learning_rate": 4.2450633326220164e-05, "loss": 2.5897, "step": 1585500 }, { "epoch": 3.02, "learning_rate": 4.2448251684297167e-05, "loss": 2.5775, "step": 1586000 }, { "epoch": 3.02, "learning_rate": 4.2445870042374176e-05, "loss": 2.5723, "step": 1586500 }, { "epoch": 3.02, "learning_rate": 4.244348840045118e-05, "loss": 2.563, "step": 1587000 }, { "epoch": 3.02, "learning_rate": 4.244111152181203e-05, "loss": 2.5815, "step": 1587500 }, { "epoch": 3.03, "learning_rate": 4.243872987988904e-05, "loss": 2.5925, "step": 1588000 }, { "epoch": 3.03, "learning_rate": 4.2436348237966036e-05, "loss": 2.5556, "step": 1588500 }, { "epoch": 3.03, "learning_rate": 4.2433966596043046e-05, "loss": 2.5885, "step": 1589000 }, { "epoch": 3.03, "learning_rate": 4.2431584954120055e-05, "loss": 2.5866, "step": 1589500 }, { "epoch": 3.03, "learning_rate": 4.24292080754809e-05, "loss": 2.5726, "step": 1590000 }, { "epoch": 3.03, "eval_accuracy": 0.531423896773329, "eval_loss": 2.479975938796997, "eval_runtime": 4198.711, "eval_samples_per_second": 65.493, "eval_steps_per_second": 6.549, "step": 1590000 }, { "epoch": 3.03, "learning_rate": 4.242682643355791e-05, "loss": 2.5583, "step": 1590500 }, { "epoch": 3.03, "learning_rate": 4.242444479163491e-05, "loss": 2.5686, "step": 1591000 }, { "epoch": 3.03, "learning_rate": 4.2422063149711915e-05, "loss": 2.5983, "step": 1591500 }, { "epoch": 3.03, "learning_rate": 4.2419681507788925e-05, "loss": 2.5548, "step": 1592000 }, { "epoch": 3.03, "learning_rate": 4.241730462914977e-05, "loss": 2.5857, "step": 1592500 }, { "epoch": 3.04, "learning_rate": 4.241492298722678e-05, "loss": 2.5697, "step": 1593000 }, { "epoch": 3.04, "learning_rate": 4.241254134530379e-05, "loss": 2.5651, "step": 1593500 }, { "epoch": 3.04, "learning_rate": 4.241015970338079e-05, "loss": 2.5564, "step": 1594000 }, { "epoch": 3.04, "learning_rate": 4.2407782824741644e-05, "loss": 2.5968, "step": 1594500 }, { "epoch": 3.04, "learning_rate": 4.2405401182818646e-05, "loss": 2.5761, "step": 1595000 }, { "epoch": 3.04, "learning_rate": 4.240301954089565e-05, "loss": 2.5706, "step": 1595500 }, { "epoch": 3.04, "learning_rate": 4.240063789897266e-05, "loss": 2.5596, "step": 1596000 }, { "epoch": 3.04, "learning_rate": 4.239825625704966e-05, "loss": 2.5713, "step": 1596500 }, { "epoch": 3.04, "learning_rate": 4.239587461512667e-05, "loss": 2.5603, "step": 1597000 }, { "epoch": 3.04, "learning_rate": 4.239349297320367e-05, "loss": 2.5768, "step": 1597500 }, { "epoch": 3.04, "learning_rate": 4.2391111331280676e-05, "loss": 2.565, "step": 1598000 }, { "epoch": 3.05, "learning_rate": 4.238873445264153e-05, "loss": 2.5531, "step": 1598500 }, { "epoch": 3.05, "learning_rate": 4.238635281071853e-05, "loss": 2.586, "step": 1599000 }, { "epoch": 3.05, "learning_rate": 4.238397116879554e-05, "loss": 2.5862, "step": 1599500 }, { "epoch": 3.05, "learning_rate": 4.238158952687254e-05, "loss": 2.5732, "step": 1600000 }, { "epoch": 3.05, "learning_rate": 4.237920788494955e-05, "loss": 2.5736, "step": 1600500 }, { "epoch": 3.05, "learning_rate": 4.2376826243026555e-05, "loss": 2.5984, "step": 1601000 }, { "epoch": 3.05, "learning_rate": 4.23744493643874e-05, "loss": 2.5567, "step": 1601500 }, { "epoch": 3.05, "learning_rate": 4.237206772246441e-05, "loss": 2.5734, "step": 1602000 }, { "epoch": 3.05, "learning_rate": 4.236968608054141e-05, "loss": 2.5836, "step": 1602500 }, { "epoch": 3.05, "learning_rate": 4.236730443861842e-05, "loss": 2.5854, "step": 1603000 }, { "epoch": 3.06, "learning_rate": 4.2364927559979274e-05, "loss": 2.5812, "step": 1603500 }, { "epoch": 3.06, "learning_rate": 4.2362545918056276e-05, "loss": 2.5687, "step": 1604000 }, { "epoch": 3.06, "learning_rate": 4.2360164276133286e-05, "loss": 2.5676, "step": 1604500 }, { "epoch": 3.06, "learning_rate": 4.235778263421029e-05, "loss": 2.5577, "step": 1605000 }, { "epoch": 3.06, "learning_rate": 4.235540099228729e-05, "loss": 2.5743, "step": 1605500 }, { "epoch": 3.06, "learning_rate": 4.2353024113648143e-05, "loss": 2.5803, "step": 1606000 }, { "epoch": 3.06, "learning_rate": 4.2350642471725146e-05, "loss": 2.5852, "step": 1606500 }, { "epoch": 3.06, "learning_rate": 4.2348260829802155e-05, "loss": 2.5903, "step": 1607000 }, { "epoch": 3.06, "learning_rate": 4.2345879187879165e-05, "loss": 2.5737, "step": 1607500 }, { "epoch": 3.06, "learning_rate": 4.234350230924001e-05, "loss": 2.5691, "step": 1608000 }, { "epoch": 3.06, "learning_rate": 4.234112066731701e-05, "loss": 2.589, "step": 1608500 }, { "epoch": 3.07, "learning_rate": 4.233873902539402e-05, "loss": 2.5729, "step": 1609000 }, { "epoch": 3.07, "learning_rate": 4.2336357383471025e-05, "loss": 2.5666, "step": 1609500 }, { "epoch": 3.07, "learning_rate": 4.233398050483188e-05, "loss": 2.5737, "step": 1610000 }, { "epoch": 3.07, "learning_rate": 4.233160362619273e-05, "loss": 2.588, "step": 1610500 }, { "epoch": 3.07, "learning_rate": 4.232922198426973e-05, "loss": 2.5858, "step": 1611000 }, { "epoch": 3.07, "learning_rate": 4.232684034234674e-05, "loss": 2.5752, "step": 1611500 }, { "epoch": 3.07, "learning_rate": 4.2324458700423744e-05, "loss": 2.5889, "step": 1612000 }, { "epoch": 3.07, "learning_rate": 4.232207705850075e-05, "loss": 2.5638, "step": 1612500 }, { "epoch": 3.07, "learning_rate": 4.2319695416577756e-05, "loss": 2.5784, "step": 1613000 }, { "epoch": 3.07, "learning_rate": 4.23173185379386e-05, "loss": 2.5727, "step": 1613500 }, { "epoch": 3.08, "learning_rate": 4.231493689601561e-05, "loss": 2.5704, "step": 1614000 }, { "epoch": 3.08, "learning_rate": 4.231255525409262e-05, "loss": 2.5751, "step": 1614500 }, { "epoch": 3.08, "learning_rate": 4.2310173612169616e-05, "loss": 2.5762, "step": 1615000 }, { "epoch": 3.08, "learning_rate": 4.2307791970246626e-05, "loss": 2.5674, "step": 1615500 }, { "epoch": 3.08, "learning_rate": 4.230541032832363e-05, "loss": 2.5735, "step": 1616000 }, { "epoch": 3.08, "learning_rate": 4.230302868640064e-05, "loss": 2.5691, "step": 1616500 }, { "epoch": 3.08, "learning_rate": 4.230064704447764e-05, "loss": 2.5574, "step": 1617000 }, { "epoch": 3.08, "learning_rate": 4.229827016583849e-05, "loss": 2.5578, "step": 1617500 }, { "epoch": 3.08, "learning_rate": 4.2295893287199345e-05, "loss": 2.576, "step": 1618000 }, { "epoch": 3.08, "learning_rate": 4.2293511645276354e-05, "loss": 2.556, "step": 1618500 }, { "epoch": 3.08, "learning_rate": 4.229113000335335e-05, "loss": 2.5606, "step": 1619000 }, { "epoch": 3.09, "learning_rate": 4.228874836143036e-05, "loss": 2.5616, "step": 1619500 }, { "epoch": 3.09, "learning_rate": 4.228636671950736e-05, "loss": 2.5874, "step": 1620000 }, { "epoch": 3.09, "eval_accuracy": 0.5317677027648419, "eval_loss": 2.4780540466308594, "eval_runtime": 4185.501, "eval_samples_per_second": 65.7, "eval_steps_per_second": 6.57, "step": 1620000 }, { "epoch": 3.09, "learning_rate": 4.228398507758437e-05, "loss": 2.5762, "step": 1620500 }, { "epoch": 3.09, "learning_rate": 4.2281603435661374e-05, "loss": 2.5901, "step": 1621000 }, { "epoch": 3.09, "learning_rate": 4.227922179373838e-05, "loss": 2.5713, "step": 1621500 }, { "epoch": 3.09, "learning_rate": 4.2276840151815386e-05, "loss": 2.5794, "step": 1622000 }, { "epoch": 3.09, "learning_rate": 4.227446327317623e-05, "loss": 2.5685, "step": 1622500 }, { "epoch": 3.09, "learning_rate": 4.227208163125324e-05, "loss": 2.5632, "step": 1623000 }, { "epoch": 3.09, "learning_rate": 4.2269699989330244e-05, "loss": 2.578, "step": 1623500 }, { "epoch": 3.09, "learning_rate": 4.226731834740725e-05, "loss": 2.5846, "step": 1624000 }, { "epoch": 3.1, "learning_rate": 4.2264941468768106e-05, "loss": 2.5771, "step": 1624500 }, { "epoch": 3.1, "learning_rate": 4.226255982684511e-05, "loss": 2.5788, "step": 1625000 }, { "epoch": 3.1, "learning_rate": 4.226017818492211e-05, "loss": 2.5732, "step": 1625500 }, { "epoch": 3.1, "learning_rate": 4.225780130628296e-05, "loss": 2.5856, "step": 1626000 }, { "epoch": 3.1, "learning_rate": 4.2255419664359966e-05, "loss": 2.5583, "step": 1626500 }, { "epoch": 3.1, "learning_rate": 4.2253038022436975e-05, "loss": 2.5851, "step": 1627000 }, { "epoch": 3.1, "learning_rate": 4.225065638051398e-05, "loss": 2.5595, "step": 1627500 }, { "epoch": 3.1, "learning_rate": 4.224827473859099e-05, "loss": 2.5724, "step": 1628000 }, { "epoch": 3.1, "learning_rate": 4.224589785995184e-05, "loss": 2.57, "step": 1628500 }, { "epoch": 3.1, "learning_rate": 4.224351621802884e-05, "loss": 2.5653, "step": 1629000 }, { "epoch": 3.1, "learning_rate": 4.2241134576105845e-05, "loss": 2.5727, "step": 1629500 }, { "epoch": 3.11, "learning_rate": 4.2238752934182854e-05, "loss": 2.5823, "step": 1630000 }, { "epoch": 3.11, "learning_rate": 4.223637129225986e-05, "loss": 2.5675, "step": 1630500 }, { "epoch": 3.11, "learning_rate": 4.2233989650336866e-05, "loss": 2.5573, "step": 1631000 }, { "epoch": 3.11, "learning_rate": 4.223160800841386e-05, "loss": 2.5556, "step": 1631500 }, { "epoch": 3.11, "learning_rate": 4.2229231129774714e-05, "loss": 2.5691, "step": 1632000 }, { "epoch": 3.11, "learning_rate": 4.2226849487851724e-05, "loss": 2.5629, "step": 1632500 }, { "epoch": 3.11, "learning_rate": 4.2224467845928726e-05, "loss": 2.558, "step": 1633000 }, { "epoch": 3.11, "learning_rate": 4.2222086204005736e-05, "loss": 2.5789, "step": 1633500 }, { "epoch": 3.11, "learning_rate": 4.221970456208274e-05, "loss": 2.552, "step": 1634000 }, { "epoch": 3.11, "learning_rate": 4.221732292015974e-05, "loss": 2.5816, "step": 1634500 }, { "epoch": 3.12, "learning_rate": 4.2214941278236744e-05, "loss": 2.5767, "step": 1635000 }, { "epoch": 3.12, "learning_rate": 4.221255963631375e-05, "loss": 2.5531, "step": 1635500 }, { "epoch": 3.12, "learning_rate": 4.2210182757674605e-05, "loss": 2.5895, "step": 1636000 }, { "epoch": 3.12, "learning_rate": 4.220780111575161e-05, "loss": 2.5979, "step": 1636500 }, { "epoch": 3.12, "learning_rate": 4.220541947382862e-05, "loss": 2.5733, "step": 1637000 }, { "epoch": 3.12, "learning_rate": 4.220303783190562e-05, "loss": 2.6027, "step": 1637500 }, { "epoch": 3.12, "learning_rate": 4.220066095326647e-05, "loss": 2.5751, "step": 1638000 }, { "epoch": 3.12, "learning_rate": 4.2198279311343475e-05, "loss": 2.5782, "step": 1638500 }, { "epoch": 3.12, "learning_rate": 4.2195897669420484e-05, "loss": 2.5746, "step": 1639000 }, { "epoch": 3.12, "learning_rate": 4.219351602749749e-05, "loss": 2.5748, "step": 1639500 }, { "epoch": 3.12, "learning_rate": 4.2191134385574496e-05, "loss": 2.5773, "step": 1640000 }, { "epoch": 3.13, "learning_rate": 4.218875750693534e-05, "loss": 2.5822, "step": 1640500 }, { "epoch": 3.13, "learning_rate": 4.218637586501235e-05, "loss": 2.5654, "step": 1641000 }, { "epoch": 3.13, "learning_rate": 4.2183994223089354e-05, "loss": 2.5531, "step": 1641500 }, { "epoch": 3.13, "learning_rate": 4.2181612581166357e-05, "loss": 2.5546, "step": 1642000 }, { "epoch": 3.13, "learning_rate": 4.2179230939243366e-05, "loss": 2.5495, "step": 1642500 }, { "epoch": 3.13, "learning_rate": 4.217685406060422e-05, "loss": 2.5607, "step": 1643000 }, { "epoch": 3.13, "learning_rate": 4.217447241868122e-05, "loss": 2.5772, "step": 1643500 }, { "epoch": 3.13, "learning_rate": 4.217209077675823e-05, "loss": 2.5707, "step": 1644000 }, { "epoch": 3.13, "learning_rate": 4.2169709134835226e-05, "loss": 2.5887, "step": 1644500 }, { "epoch": 3.13, "learning_rate": 4.2167327492912235e-05, "loss": 2.5893, "step": 1645000 }, { "epoch": 3.14, "learning_rate": 4.216495061427309e-05, "loss": 2.5857, "step": 1645500 }, { "epoch": 3.14, "learning_rate": 4.216256897235009e-05, "loss": 2.5968, "step": 1646000 }, { "epoch": 3.14, "learning_rate": 4.21601873304271e-05, "loss": 2.5712, "step": 1646500 }, { "epoch": 3.14, "learning_rate": 4.21578056885041e-05, "loss": 2.5959, "step": 1647000 }, { "epoch": 3.14, "learning_rate": 4.215542404658111e-05, "loss": 2.5598, "step": 1647500 }, { "epoch": 3.14, "learning_rate": 4.215304240465811e-05, "loss": 2.5755, "step": 1648000 }, { "epoch": 3.14, "learning_rate": 4.215066552601896e-05, "loss": 2.58, "step": 1648500 }, { "epoch": 3.14, "learning_rate": 4.214828388409597e-05, "loss": 2.5651, "step": 1649000 }, { "epoch": 3.14, "learning_rate": 4.214590224217297e-05, "loss": 2.5937, "step": 1649500 }, { "epoch": 3.14, "learning_rate": 4.214352060024998e-05, "loss": 2.5625, "step": 1650000 }, { "epoch": 3.14, "eval_accuracy": 0.5316894268884901, "eval_loss": 2.477104902267456, "eval_runtime": 4182.3259, "eval_samples_per_second": 65.75, "eval_steps_per_second": 6.575, "step": 1650000 }, { "epoch": 3.14, "learning_rate": 4.2141143721610834e-05, "loss": 2.5839, "step": 1650500 }, { "epoch": 3.15, "learning_rate": 4.2138762079687836e-05, "loss": 2.5813, "step": 1651000 }, { "epoch": 3.15, "learning_rate": 4.213638043776484e-05, "loss": 2.5729, "step": 1651500 }, { "epoch": 3.15, "learning_rate": 4.213399879584184e-05, "loss": 2.5603, "step": 1652000 }, { "epoch": 3.15, "learning_rate": 4.2131621917202694e-05, "loss": 2.544, "step": 1652500 }, { "epoch": 3.15, "learning_rate": 4.21292402752797e-05, "loss": 2.5924, "step": 1653000 }, { "epoch": 3.15, "learning_rate": 4.2126858633356706e-05, "loss": 2.5575, "step": 1653500 }, { "epoch": 3.15, "learning_rate": 4.2124476991433715e-05, "loss": 2.5612, "step": 1654000 }, { "epoch": 3.15, "learning_rate": 4.212210487607841e-05, "loss": 2.5652, "step": 1654500 }, { "epoch": 3.15, "learning_rate": 4.211972323415542e-05, "loss": 2.5664, "step": 1655000 }, { "epoch": 3.15, "learning_rate": 4.2117341592232416e-05, "loss": 2.561, "step": 1655500 }, { "epoch": 3.16, "learning_rate": 4.2114959950309425e-05, "loss": 2.5774, "step": 1656000 }, { "epoch": 3.16, "learning_rate": 4.211257830838643e-05, "loss": 2.5599, "step": 1656500 }, { "epoch": 3.16, "learning_rate": 4.211019666646344e-05, "loss": 2.5674, "step": 1657000 }, { "epoch": 3.16, "learning_rate": 4.210781502454044e-05, "loss": 2.5592, "step": 1657500 }, { "epoch": 3.16, "learning_rate": 4.210543338261744e-05, "loss": 2.5669, "step": 1658000 }, { "epoch": 3.16, "learning_rate": 4.210305174069445e-05, "loss": 2.5684, "step": 1658500 }, { "epoch": 3.16, "learning_rate": 4.2100670098771454e-05, "loss": 2.5712, "step": 1659000 }, { "epoch": 3.16, "learning_rate": 4.2098288456848464e-05, "loss": 2.5542, "step": 1659500 }, { "epoch": 3.16, "learning_rate": 4.2095906814925466e-05, "loss": 2.5905, "step": 1660000 }, { "epoch": 3.16, "learning_rate": 4.209352993628632e-05, "loss": 2.5429, "step": 1660500 }, { "epoch": 3.16, "learning_rate": 4.209114829436333e-05, "loss": 2.5791, "step": 1661000 }, { "epoch": 3.17, "learning_rate": 4.2088766652440324e-05, "loss": 2.5807, "step": 1661500 }, { "epoch": 3.17, "learning_rate": 4.2086385010517333e-05, "loss": 2.5692, "step": 1662000 }, { "epoch": 3.17, "learning_rate": 4.2084003368594336e-05, "loss": 2.5751, "step": 1662500 }, { "epoch": 3.17, "learning_rate": 4.208162648995519e-05, "loss": 2.5797, "step": 1663000 }, { "epoch": 3.17, "learning_rate": 4.20792448480322e-05, "loss": 2.5684, "step": 1663500 }, { "epoch": 3.17, "learning_rate": 4.20768632061092e-05, "loss": 2.5806, "step": 1664000 }, { "epoch": 3.17, "learning_rate": 4.20744815641862e-05, "loss": 2.5673, "step": 1664500 }, { "epoch": 3.17, "learning_rate": 4.2072104685547055e-05, "loss": 2.5669, "step": 1665000 }, { "epoch": 3.17, "learning_rate": 4.206972304362406e-05, "loss": 2.5815, "step": 1665500 }, { "epoch": 3.17, "learning_rate": 4.206734140170107e-05, "loss": 2.5511, "step": 1666000 }, { "epoch": 3.18, "learning_rate": 4.206495975977807e-05, "loss": 2.5732, "step": 1666500 }, { "epoch": 3.18, "learning_rate": 4.206257811785508e-05, "loss": 2.5602, "step": 1667000 }, { "epoch": 3.18, "learning_rate": 4.206019647593208e-05, "loss": 2.5645, "step": 1667500 }, { "epoch": 3.18, "learning_rate": 4.205781959729293e-05, "loss": 2.5572, "step": 1668000 }, { "epoch": 3.18, "learning_rate": 4.205543795536994e-05, "loss": 2.5766, "step": 1668500 }, { "epoch": 3.18, "learning_rate": 4.205306107673079e-05, "loss": 2.5758, "step": 1669000 }, { "epoch": 3.18, "learning_rate": 4.205067943480779e-05, "loss": 2.5695, "step": 1669500 }, { "epoch": 3.18, "learning_rate": 4.20482977928848e-05, "loss": 2.5856, "step": 1670000 }, { "epoch": 3.18, "learning_rate": 4.2045916150961804e-05, "loss": 2.579, "step": 1670500 }, { "epoch": 3.18, "learning_rate": 4.204353450903881e-05, "loss": 2.5806, "step": 1671000 }, { "epoch": 3.18, "learning_rate": 4.2041152867115816e-05, "loss": 2.5671, "step": 1671500 }, { "epoch": 3.19, "learning_rate": 4.203877122519282e-05, "loss": 2.5835, "step": 1672000 }, { "epoch": 3.19, "learning_rate": 4.203638958326983e-05, "loss": 2.5728, "step": 1672500 }, { "epoch": 3.19, "learning_rate": 4.203400794134683e-05, "loss": 2.5576, "step": 1673000 }, { "epoch": 3.19, "learning_rate": 4.203162629942384e-05, "loss": 2.5564, "step": 1673500 }, { "epoch": 3.19, "learning_rate": 4.2029244657500836e-05, "loss": 2.5781, "step": 1674000 }, { "epoch": 3.19, "learning_rate": 4.2026863015577845e-05, "loss": 2.58, "step": 1674500 }, { "epoch": 3.19, "learning_rate": 4.20244861369387e-05, "loss": 2.5807, "step": 1675000 }, { "epoch": 3.19, "learning_rate": 4.20221044950157e-05, "loss": 2.5699, "step": 1675500 }, { "epoch": 3.19, "learning_rate": 4.201972285309271e-05, "loss": 2.5771, "step": 1676000 }, { "epoch": 3.19, "learning_rate": 4.201734121116971e-05, "loss": 2.5884, "step": 1676500 }, { "epoch": 3.2, "learning_rate": 4.2014964332530564e-05, "loss": 2.5761, "step": 1677000 }, { "epoch": 3.2, "learning_rate": 4.201258269060757e-05, "loss": 2.5756, "step": 1677500 }, { "epoch": 3.2, "learning_rate": 4.201020581196842e-05, "loss": 2.5801, "step": 1678000 }, { "epoch": 3.2, "learning_rate": 4.200782417004542e-05, "loss": 2.5705, "step": 1678500 }, { "epoch": 3.2, "learning_rate": 4.200544252812243e-05, "loss": 2.5743, "step": 1679000 }, { "epoch": 3.2, "learning_rate": 4.2003060886199434e-05, "loss": 2.5787, "step": 1679500 }, { "epoch": 3.2, "learning_rate": 4.200067924427644e-05, "loss": 2.5399, "step": 1680000 }, { "epoch": 3.2, "eval_accuracy": 0.5322091296517395, "eval_loss": 2.475282669067383, "eval_runtime": 4173.1051, "eval_samples_per_second": 65.895, "eval_steps_per_second": 6.59, "step": 1680000 }, { "epoch": 3.2, "learning_rate": 4.1998297602353446e-05, "loss": 2.5603, "step": 1680500 }, { "epoch": 3.2, "learning_rate": 4.199591596043045e-05, "loss": 2.5805, "step": 1681000 }, { "epoch": 3.2, "learning_rate": 4.199353431850746e-05, "loss": 2.576, "step": 1681500 }, { "epoch": 3.2, "learning_rate": 4.1991157439868303e-05, "loss": 2.5778, "step": 1682000 }, { "epoch": 3.21, "learning_rate": 4.198877579794531e-05, "loss": 2.5823, "step": 1682500 }, { "epoch": 3.21, "learning_rate": 4.1986394156022316e-05, "loss": 2.5677, "step": 1683000 }, { "epoch": 3.21, "learning_rate": 4.1984012514099325e-05, "loss": 2.5624, "step": 1683500 }, { "epoch": 3.21, "learning_rate": 4.198163087217633e-05, "loss": 2.5739, "step": 1684000 }, { "epoch": 3.21, "learning_rate": 4.197925399353718e-05, "loss": 2.5809, "step": 1684500 }, { "epoch": 3.21, "learning_rate": 4.197687235161418e-05, "loss": 2.6049, "step": 1685000 }, { "epoch": 3.21, "learning_rate": 4.1974495472975035e-05, "loss": 2.5684, "step": 1685500 }, { "epoch": 3.21, "learning_rate": 4.197211383105204e-05, "loss": 2.5622, "step": 1686000 }, { "epoch": 3.21, "learning_rate": 4.196973218912905e-05, "loss": 2.5871, "step": 1686500 }, { "epoch": 3.21, "learning_rate": 4.196735054720605e-05, "loss": 2.5848, "step": 1687000 }, { "epoch": 3.22, "learning_rate": 4.196496890528305e-05, "loss": 2.5786, "step": 1687500 }, { "epoch": 3.22, "learning_rate": 4.196258726336006e-05, "loss": 2.571, "step": 1688000 }, { "epoch": 3.22, "learning_rate": 4.1960205621437064e-05, "loss": 2.574, "step": 1688500 }, { "epoch": 3.22, "learning_rate": 4.1957823979514073e-05, "loss": 2.5656, "step": 1689000 }, { "epoch": 3.22, "learning_rate": 4.1955442337591076e-05, "loss": 2.5671, "step": 1689500 }, { "epoch": 3.22, "learning_rate": 4.195306545895193e-05, "loss": 2.595, "step": 1690000 }, { "epoch": 3.22, "learning_rate": 4.195068381702893e-05, "loss": 2.5623, "step": 1690500 }, { "epoch": 3.22, "learning_rate": 4.1948302175105934e-05, "loss": 2.576, "step": 1691000 }, { "epoch": 3.22, "learning_rate": 4.194592053318294e-05, "loss": 2.5814, "step": 1691500 }, { "epoch": 3.22, "learning_rate": 4.1943543654543795e-05, "loss": 2.5838, "step": 1692000 }, { "epoch": 3.22, "learning_rate": 4.19411620126208e-05, "loss": 2.565, "step": 1692500 }, { "epoch": 3.23, "learning_rate": 4.193878037069781e-05, "loss": 2.5625, "step": 1693000 }, { "epoch": 3.23, "learning_rate": 4.193639872877481e-05, "loss": 2.5724, "step": 1693500 }, { "epoch": 3.23, "learning_rate": 4.193401708685181e-05, "loss": 2.5729, "step": 1694000 }, { "epoch": 3.23, "learning_rate": 4.1931635444928815e-05, "loss": 2.5679, "step": 1694500 }, { "epoch": 3.23, "learning_rate": 4.1929253803005825e-05, "loss": 2.5886, "step": 1695000 }, { "epoch": 3.23, "learning_rate": 4.1926872161082834e-05, "loss": 2.5595, "step": 1695500 }, { "epoch": 3.23, "learning_rate": 4.192450004572753e-05, "loss": 2.5611, "step": 1696000 }, { "epoch": 3.23, "learning_rate": 4.192211840380453e-05, "loss": 2.5656, "step": 1696500 }, { "epoch": 3.23, "learning_rate": 4.191973676188154e-05, "loss": 2.5875, "step": 1697000 }, { "epoch": 3.23, "learning_rate": 4.191735511995854e-05, "loss": 2.5704, "step": 1697500 }, { "epoch": 3.24, "learning_rate": 4.1914973478035546e-05, "loss": 2.5614, "step": 1698000 }, { "epoch": 3.24, "learning_rate": 4.19125965993964e-05, "loss": 2.5793, "step": 1698500 }, { "epoch": 3.24, "learning_rate": 4.19102149574734e-05, "loss": 2.5742, "step": 1699000 }, { "epoch": 3.24, "learning_rate": 4.1907838078834254e-05, "loss": 2.5615, "step": 1699500 }, { "epoch": 3.24, "learning_rate": 4.190545643691126e-05, "loss": 2.5667, "step": 1700000 }, { "epoch": 3.24, "learning_rate": 4.1903074794988266e-05, "loss": 2.5558, "step": 1700500 }, { "epoch": 3.24, "learning_rate": 4.190069315306527e-05, "loss": 2.5646, "step": 1701000 }, { "epoch": 3.24, "learning_rate": 4.189831151114227e-05, "loss": 2.5557, "step": 1701500 }, { "epoch": 3.24, "learning_rate": 4.189592986921928e-05, "loss": 2.5488, "step": 1702000 }, { "epoch": 3.24, "learning_rate": 4.189354822729629e-05, "loss": 2.5705, "step": 1702500 }, { "epoch": 3.24, "learning_rate": 4.189116658537329e-05, "loss": 2.5806, "step": 1703000 }, { "epoch": 3.25, "learning_rate": 4.1888789706734145e-05, "loss": 2.5715, "step": 1703500 }, { "epoch": 3.25, "learning_rate": 4.188640806481115e-05, "loss": 2.5607, "step": 1704000 }, { "epoch": 3.25, "learning_rate": 4.188402642288815e-05, "loss": 2.5845, "step": 1704500 }, { "epoch": 3.25, "learning_rate": 4.188164478096516e-05, "loss": 2.5581, "step": 1705000 }, { "epoch": 3.25, "learning_rate": 4.187926313904216e-05, "loss": 2.5786, "step": 1705500 }, { "epoch": 3.25, "learning_rate": 4.1876886260403014e-05, "loss": 2.5782, "step": 1706000 }, { "epoch": 3.25, "learning_rate": 4.1874504618480024e-05, "loss": 2.5679, "step": 1706500 }, { "epoch": 3.25, "learning_rate": 4.1872122976557026e-05, "loss": 2.5679, "step": 1707000 }, { "epoch": 3.25, "learning_rate": 4.186974133463403e-05, "loss": 2.5778, "step": 1707500 }, { "epoch": 3.25, "learning_rate": 4.186735969271103e-05, "loss": 2.572, "step": 1708000 }, { "epoch": 3.26, "learning_rate": 4.1864982814071884e-05, "loss": 2.5619, "step": 1708500 }, { "epoch": 3.26, "learning_rate": 4.186260117214889e-05, "loss": 2.5575, "step": 1709000 }, { "epoch": 3.26, "learning_rate": 4.1860219530225896e-05, "loss": 2.5953, "step": 1709500 }, { "epoch": 3.26, "learning_rate": 4.1857837888302905e-05, "loss": 2.5791, "step": 1710000 }, { "epoch": 3.26, "eval_accuracy": 0.5321771083486433, "eval_loss": 2.4739527702331543, "eval_runtime": 4177.3845, "eval_samples_per_second": 65.828, "eval_steps_per_second": 6.583, "step": 1710000 }, { "epoch": 3.26, "learning_rate": 4.18554562463799e-05, "loss": 2.5642, "step": 1710500 }, { "epoch": 3.26, "learning_rate": 4.18530841310246e-05, "loss": 2.5698, "step": 1711000 }, { "epoch": 3.26, "learning_rate": 4.1850702489101606e-05, "loss": 2.5671, "step": 1711500 }, { "epoch": 3.26, "learning_rate": 4.1848320847178615e-05, "loss": 2.5772, "step": 1712000 }, { "epoch": 3.26, "learning_rate": 4.184593920525562e-05, "loss": 2.5688, "step": 1712500 }, { "epoch": 3.26, "learning_rate": 4.184355756333263e-05, "loss": 2.5635, "step": 1713000 }, { "epoch": 3.26, "learning_rate": 4.184117592140963e-05, "loss": 2.5662, "step": 1713500 }, { "epoch": 3.27, "learning_rate": 4.183879427948663e-05, "loss": 2.5734, "step": 1714000 }, { "epoch": 3.27, "learning_rate": 4.1836412637563635e-05, "loss": 2.5849, "step": 1714500 }, { "epoch": 3.27, "learning_rate": 4.1834030995640644e-05, "loss": 2.5795, "step": 1715000 }, { "epoch": 3.27, "learning_rate": 4.18316541170015e-05, "loss": 2.5906, "step": 1715500 }, { "epoch": 3.27, "learning_rate": 4.18292724750785e-05, "loss": 2.5632, "step": 1716000 }, { "epoch": 3.27, "learning_rate": 4.182689083315551e-05, "loss": 2.5684, "step": 1716500 }, { "epoch": 3.27, "learning_rate": 4.182450919123251e-05, "loss": 2.5701, "step": 1717000 }, { "epoch": 3.27, "learning_rate": 4.1822132312593364e-05, "loss": 2.5631, "step": 1717500 }, { "epoch": 3.27, "learning_rate": 4.1819750670670366e-05, "loss": 2.5895, "step": 1718000 }, { "epoch": 3.27, "learning_rate": 4.181736902874737e-05, "loss": 2.5875, "step": 1718500 }, { "epoch": 3.28, "learning_rate": 4.181499215010822e-05, "loss": 2.5654, "step": 1719000 }, { "epoch": 3.28, "learning_rate": 4.181261050818523e-05, "loss": 2.5653, "step": 1719500 }, { "epoch": 3.28, "learning_rate": 4.181022886626223e-05, "loss": 2.5629, "step": 1720000 }, { "epoch": 3.28, "learning_rate": 4.180784722433924e-05, "loss": 2.5763, "step": 1720500 }, { "epoch": 3.28, "learning_rate": 4.1805465582416245e-05, "loss": 2.5786, "step": 1721000 }, { "epoch": 3.28, "learning_rate": 4.180308394049325e-05, "loss": 2.5706, "step": 1721500 }, { "epoch": 3.28, "learning_rate": 4.180070229857026e-05, "loss": 2.5918, "step": 1722000 }, { "epoch": 3.28, "learning_rate": 4.179832065664726e-05, "loss": 2.5585, "step": 1722500 }, { "epoch": 3.28, "learning_rate": 4.179594377800811e-05, "loss": 2.5684, "step": 1723000 }, { "epoch": 3.28, "learning_rate": 4.179356213608512e-05, "loss": 2.5625, "step": 1723500 }, { "epoch": 3.28, "learning_rate": 4.179118049416212e-05, "loss": 2.5675, "step": 1724000 }, { "epoch": 3.29, "learning_rate": 4.178879885223913e-05, "loss": 2.5833, "step": 1724500 }, { "epoch": 3.29, "learning_rate": 4.178642197359998e-05, "loss": 2.5461, "step": 1725000 }, { "epoch": 3.29, "learning_rate": 4.178404033167698e-05, "loss": 2.5823, "step": 1725500 }, { "epoch": 3.29, "learning_rate": 4.178165868975399e-05, "loss": 2.5657, "step": 1726000 }, { "epoch": 3.29, "learning_rate": 4.1779277047830994e-05, "loss": 2.5756, "step": 1726500 }, { "epoch": 3.29, "learning_rate": 4.1776895405908e-05, "loss": 2.5819, "step": 1727000 }, { "epoch": 3.29, "learning_rate": 4.177451852726885e-05, "loss": 2.5696, "step": 1727500 }, { "epoch": 3.29, "learning_rate": 4.177213688534585e-05, "loss": 2.5716, "step": 1728000 }, { "epoch": 3.29, "learning_rate": 4.176975524342286e-05, "loss": 2.5767, "step": 1728500 }, { "epoch": 3.29, "learning_rate": 4.176737360149986e-05, "loss": 2.5765, "step": 1729000 }, { "epoch": 3.3, "learning_rate": 4.176499195957687e-05, "loss": 2.5878, "step": 1729500 }, { "epoch": 3.3, "learning_rate": 4.1762615080937725e-05, "loss": 2.5901, "step": 1730000 }, { "epoch": 3.3, "learning_rate": 4.176023343901473e-05, "loss": 2.5792, "step": 1730500 }, { "epoch": 3.3, "learning_rate": 4.175785179709173e-05, "loss": 2.5814, "step": 1731000 }, { "epoch": 3.3, "learning_rate": 4.175547015516873e-05, "loss": 2.5606, "step": 1731500 }, { "epoch": 3.3, "learning_rate": 4.175308851324574e-05, "loss": 2.5739, "step": 1732000 }, { "epoch": 3.3, "learning_rate": 4.175071639789044e-05, "loss": 2.5714, "step": 1732500 }, { "epoch": 3.3, "learning_rate": 4.174833475596745e-05, "loss": 2.5641, "step": 1733000 }, { "epoch": 3.3, "learning_rate": 4.174595311404445e-05, "loss": 2.5921, "step": 1733500 }, { "epoch": 3.3, "learning_rate": 4.174357147212146e-05, "loss": 2.5514, "step": 1734000 }, { "epoch": 3.3, "learning_rate": 4.174119459348231e-05, "loss": 2.5769, "step": 1734500 }, { "epoch": 3.31, "learning_rate": 4.173881295155931e-05, "loss": 2.5849, "step": 1735000 }, { "epoch": 3.31, "learning_rate": 4.1736431309636316e-05, "loss": 2.5613, "step": 1735500 }, { "epoch": 3.31, "learning_rate": 4.173404966771332e-05, "loss": 2.5741, "step": 1736000 }, { "epoch": 3.31, "learning_rate": 4.173166802579033e-05, "loss": 2.5871, "step": 1736500 }, { "epoch": 3.31, "learning_rate": 4.172928638386733e-05, "loss": 2.5574, "step": 1737000 }, { "epoch": 3.31, "learning_rate": 4.1726904741944334e-05, "loss": 2.5621, "step": 1737500 }, { "epoch": 3.31, "learning_rate": 4.172452310002134e-05, "loss": 2.5683, "step": 1738000 }, { "epoch": 3.31, "learning_rate": 4.1722141458098346e-05, "loss": 2.5555, "step": 1738500 }, { "epoch": 3.31, "learning_rate": 4.17197645794592e-05, "loss": 2.5713, "step": 1739000 }, { "epoch": 3.31, "learning_rate": 4.17173829375362e-05, "loss": 2.5788, "step": 1739500 }, { "epoch": 3.32, "learning_rate": 4.171500605889705e-05, "loss": 2.5802, "step": 1740000 }, { "epoch": 3.32, "eval_accuracy": 0.5321772683129683, "eval_loss": 2.472776174545288, "eval_runtime": 4199.8221, "eval_samples_per_second": 65.476, "eval_steps_per_second": 6.548, "step": 1740000 }, { "epoch": 3.32, "learning_rate": 4.171262441697406e-05, "loss": 2.5624, "step": 1740500 }, { "epoch": 3.32, "learning_rate": 4.1710242775051065e-05, "loss": 2.5824, "step": 1741000 }, { "epoch": 3.32, "learning_rate": 4.170786113312807e-05, "loss": 2.5795, "step": 1741500 }, { "epoch": 3.32, "learning_rate": 4.170547949120508e-05, "loss": 2.5737, "step": 1742000 }, { "epoch": 3.32, "learning_rate": 4.170309784928208e-05, "loss": 2.5484, "step": 1742500 }, { "epoch": 3.32, "learning_rate": 4.170071620735909e-05, "loss": 2.5688, "step": 1743000 }, { "epoch": 3.32, "learning_rate": 4.169833456543609e-05, "loss": 2.5591, "step": 1743500 }, { "epoch": 3.32, "learning_rate": 4.1695952923513094e-05, "loss": 2.5651, "step": 1744000 }, { "epoch": 3.32, "learning_rate": 4.16935712815901e-05, "loss": 2.5775, "step": 1744500 }, { "epoch": 3.32, "learning_rate": 4.169119440295095e-05, "loss": 2.5791, "step": 1745000 }, { "epoch": 3.33, "learning_rate": 4.168881276102796e-05, "loss": 2.5645, "step": 1745500 }, { "epoch": 3.33, "learning_rate": 4.168643111910496e-05, "loss": 2.5493, "step": 1746000 }, { "epoch": 3.33, "learning_rate": 4.168404947718197e-05, "loss": 2.5557, "step": 1746500 }, { "epoch": 3.33, "learning_rate": 4.168167259854282e-05, "loss": 2.5561, "step": 1747000 }, { "epoch": 3.33, "learning_rate": 4.167929095661982e-05, "loss": 2.5906, "step": 1747500 }, { "epoch": 3.33, "learning_rate": 4.167690931469683e-05, "loss": 2.5731, "step": 1748000 }, { "epoch": 3.33, "learning_rate": 4.167452767277383e-05, "loss": 2.5711, "step": 1748500 }, { "epoch": 3.33, "learning_rate": 4.167214603085084e-05, "loss": 2.5845, "step": 1749000 }, { "epoch": 3.33, "learning_rate": 4.166976915221169e-05, "loss": 2.5858, "step": 1749500 }, { "epoch": 3.33, "learning_rate": 4.1667387510288695e-05, "loss": 2.5674, "step": 1750000 }, { "epoch": 3.34, "learning_rate": 4.1665005868365705e-05, "loss": 2.56, "step": 1750500 }, { "epoch": 3.34, "learning_rate": 4.16626242264427e-05, "loss": 2.5771, "step": 1751000 }, { "epoch": 3.34, "learning_rate": 4.166024734780355e-05, "loss": 2.5518, "step": 1751500 }, { "epoch": 3.34, "learning_rate": 4.165786570588056e-05, "loss": 2.5797, "step": 1752000 }, { "epoch": 3.34, "learning_rate": 4.1655484063957565e-05, "loss": 2.5639, "step": 1752500 }, { "epoch": 3.34, "learning_rate": 4.1653102422034574e-05, "loss": 2.5623, "step": 1753000 }, { "epoch": 3.34, "learning_rate": 4.1650725543395426e-05, "loss": 2.5549, "step": 1753500 }, { "epoch": 3.34, "learning_rate": 4.164834390147243e-05, "loss": 2.5746, "step": 1754000 }, { "epoch": 3.34, "learning_rate": 4.164596225954943e-05, "loss": 2.5891, "step": 1754500 }, { "epoch": 3.34, "learning_rate": 4.1643585380910284e-05, "loss": 2.5682, "step": 1755000 }, { "epoch": 3.34, "learning_rate": 4.1641203738987287e-05, "loss": 2.567, "step": 1755500 }, { "epoch": 3.35, "learning_rate": 4.1638822097064296e-05, "loss": 2.5705, "step": 1756000 }, { "epoch": 3.35, "learning_rate": 4.16364404551413e-05, "loss": 2.5636, "step": 1756500 }, { "epoch": 3.35, "learning_rate": 4.163405881321831e-05, "loss": 2.5534, "step": 1757000 }, { "epoch": 3.35, "learning_rate": 4.163167717129531e-05, "loss": 2.5772, "step": 1757500 }, { "epoch": 3.35, "learning_rate": 4.162929552937231e-05, "loss": 2.5712, "step": 1758000 }, { "epoch": 3.35, "learning_rate": 4.1626918650733165e-05, "loss": 2.5564, "step": 1758500 }, { "epoch": 3.35, "learning_rate": 4.1624537008810175e-05, "loss": 2.5579, "step": 1759000 }, { "epoch": 3.35, "learning_rate": 4.162215536688718e-05, "loss": 2.5701, "step": 1759500 }, { "epoch": 3.35, "learning_rate": 4.161977372496419e-05, "loss": 2.5471, "step": 1760000 }, { "epoch": 3.35, "learning_rate": 4.161739208304118e-05, "loss": 2.5695, "step": 1760500 }, { "epoch": 3.36, "learning_rate": 4.161501044111819e-05, "loss": 2.5455, "step": 1761000 }, { "epoch": 3.36, "learning_rate": 4.1612628799195195e-05, "loss": 2.5675, "step": 1761500 }, { "epoch": 3.36, "learning_rate": 4.1610247157272204e-05, "loss": 2.5809, "step": 1762000 }, { "epoch": 3.36, "learning_rate": 4.1607870278633056e-05, "loss": 2.5878, "step": 1762500 }, { "epoch": 3.36, "learning_rate": 4.160548863671006e-05, "loss": 2.5613, "step": 1763000 }, { "epoch": 3.36, "learning_rate": 4.160310699478707e-05, "loss": 2.5619, "step": 1763500 }, { "epoch": 3.36, "learning_rate": 4.1600725352864064e-05, "loss": 2.5779, "step": 1764000 }, { "epoch": 3.36, "learning_rate": 4.1598343710941074e-05, "loss": 2.5784, "step": 1764500 }, { "epoch": 3.36, "learning_rate": 4.1595962069018076e-05, "loss": 2.5791, "step": 1765000 }, { "epoch": 3.36, "learning_rate": 4.159358519037893e-05, "loss": 2.5802, "step": 1765500 }, { "epoch": 3.36, "learning_rate": 4.159120831173978e-05, "loss": 2.5804, "step": 1766000 }, { "epoch": 3.37, "learning_rate": 4.158882666981679e-05, "loss": 2.56, "step": 1766500 }, { "epoch": 3.37, "learning_rate": 4.158644502789379e-05, "loss": 2.5659, "step": 1767000 }, { "epoch": 3.37, "learning_rate": 4.1584063385970796e-05, "loss": 2.57, "step": 1767500 }, { "epoch": 3.37, "learning_rate": 4.15816817440478e-05, "loss": 2.576, "step": 1768000 }, { "epoch": 3.37, "learning_rate": 4.157930010212481e-05, "loss": 2.5697, "step": 1768500 }, { "epoch": 3.37, "learning_rate": 4.157691846020181e-05, "loss": 2.5474, "step": 1769000 }, { "epoch": 3.37, "learning_rate": 4.157453681827882e-05, "loss": 2.581, "step": 1769500 }, { "epoch": 3.37, "learning_rate": 4.157215517635582e-05, "loss": 2.5714, "step": 1770000 }, { "epoch": 3.37, "eval_accuracy": 0.5327915171018002, "eval_loss": 2.4706990718841553, "eval_runtime": 4199.4131, "eval_samples_per_second": 65.482, "eval_steps_per_second": 6.548, "step": 1770000 }, { "epoch": 3.37, "learning_rate": 4.1569778297716675e-05, "loss": 2.5705, "step": 1770500 }, { "epoch": 3.37, "learning_rate": 4.156739665579368e-05, "loss": 2.5697, "step": 1771000 }, { "epoch": 3.38, "learning_rate": 4.156501501387069e-05, "loss": 2.5529, "step": 1771500 }, { "epoch": 3.38, "learning_rate": 4.156263337194769e-05, "loss": 2.5702, "step": 1772000 }, { "epoch": 3.38, "learning_rate": 4.15602517300247e-05, "loss": 2.5635, "step": 1772500 }, { "epoch": 3.38, "learning_rate": 4.155787485138555e-05, "loss": 2.586, "step": 1773000 }, { "epoch": 3.38, "learning_rate": 4.1555493209462554e-05, "loss": 2.5596, "step": 1773500 }, { "epoch": 3.38, "learning_rate": 4.1553111567539556e-05, "loss": 2.5716, "step": 1774000 }, { "epoch": 3.38, "learning_rate": 4.155072992561656e-05, "loss": 2.5615, "step": 1774500 }, { "epoch": 3.38, "learning_rate": 4.154835304697741e-05, "loss": 2.5767, "step": 1775000 }, { "epoch": 3.38, "learning_rate": 4.154597140505442e-05, "loss": 2.5778, "step": 1775500 }, { "epoch": 3.38, "learning_rate": 4.154358976313142e-05, "loss": 2.5832, "step": 1776000 }, { "epoch": 3.38, "learning_rate": 4.154120812120843e-05, "loss": 2.5539, "step": 1776500 }, { "epoch": 3.39, "learning_rate": 4.1538831242569285e-05, "loss": 2.5591, "step": 1777000 }, { "epoch": 3.39, "learning_rate": 4.153644960064628e-05, "loss": 2.5457, "step": 1777500 }, { "epoch": 3.39, "learning_rate": 4.153406795872329e-05, "loss": 2.5665, "step": 1778000 }, { "epoch": 3.39, "learning_rate": 4.153168631680029e-05, "loss": 2.5609, "step": 1778500 }, { "epoch": 3.39, "learning_rate": 4.15293046748773e-05, "loss": 2.557, "step": 1779000 }, { "epoch": 3.39, "learning_rate": 4.1526923032954305e-05, "loss": 2.5522, "step": 1779500 }, { "epoch": 3.39, "learning_rate": 4.152454615431516e-05, "loss": 2.5651, "step": 1780000 }, { "epoch": 3.39, "learning_rate": 4.152216451239216e-05, "loss": 2.586, "step": 1780500 }, { "epoch": 3.39, "learning_rate": 4.151978287046916e-05, "loss": 2.5808, "step": 1781000 }, { "epoch": 3.39, "learning_rate": 4.151740122854617e-05, "loss": 2.5459, "step": 1781500 }, { "epoch": 3.4, "learning_rate": 4.1515019586623174e-05, "loss": 2.5558, "step": 1782000 }, { "epoch": 3.4, "learning_rate": 4.1512637944700184e-05, "loss": 2.5515, "step": 1782500 }, { "epoch": 3.4, "learning_rate": 4.1510261066061036e-05, "loss": 2.5479, "step": 1783000 }, { "epoch": 3.4, "learning_rate": 4.150787942413804e-05, "loss": 2.5866, "step": 1783500 }, { "epoch": 3.4, "learning_rate": 4.150549778221504e-05, "loss": 2.567, "step": 1784000 }, { "epoch": 3.4, "learning_rate": 4.150311614029205e-05, "loss": 2.5536, "step": 1784500 }, { "epoch": 3.4, "learning_rate": 4.1500739261652896e-05, "loss": 2.5872, "step": 1785000 }, { "epoch": 3.4, "learning_rate": 4.1498357619729906e-05, "loss": 2.5589, "step": 1785500 }, { "epoch": 3.4, "learning_rate": 4.149597597780691e-05, "loss": 2.5788, "step": 1786000 }, { "epoch": 3.4, "learning_rate": 4.149359433588392e-05, "loss": 2.5574, "step": 1786500 }, { "epoch": 3.4, "learning_rate": 4.149121269396092e-05, "loss": 2.5887, "step": 1787000 }, { "epoch": 3.41, "learning_rate": 4.148883581532177e-05, "loss": 2.5446, "step": 1787500 }, { "epoch": 3.41, "learning_rate": 4.1486454173398775e-05, "loss": 2.542, "step": 1788000 }, { "epoch": 3.41, "learning_rate": 4.1484072531475785e-05, "loss": 2.565, "step": 1788500 }, { "epoch": 3.41, "learning_rate": 4.148169088955279e-05, "loss": 2.5814, "step": 1789000 }, { "epoch": 3.41, "learning_rate": 4.1479309247629797e-05, "loss": 2.5535, "step": 1789500 }, { "epoch": 3.41, "learning_rate": 4.147693236899064e-05, "loss": 2.5838, "step": 1790000 }, { "epoch": 3.41, "learning_rate": 4.1474550727067645e-05, "loss": 2.5662, "step": 1790500 }, { "epoch": 3.41, "learning_rate": 4.1472169085144654e-05, "loss": 2.5617, "step": 1791000 }, { "epoch": 3.41, "learning_rate": 4.146978744322166e-05, "loss": 2.561, "step": 1791500 }, { "epoch": 3.41, "learning_rate": 4.146741056458251e-05, "loss": 2.5712, "step": 1792000 }, { "epoch": 3.42, "learning_rate": 4.146502892265952e-05, "loss": 2.5762, "step": 1792500 }, { "epoch": 3.42, "learning_rate": 4.146264728073652e-05, "loss": 2.5769, "step": 1793000 }, { "epoch": 3.42, "learning_rate": 4.1460265638813524e-05, "loss": 2.5741, "step": 1793500 }, { "epoch": 3.42, "learning_rate": 4.1457883996890526e-05, "loss": 2.5713, "step": 1794000 }, { "epoch": 3.42, "learning_rate": 4.145551188153523e-05, "loss": 2.5677, "step": 1794500 }, { "epoch": 3.42, "learning_rate": 4.145313023961223e-05, "loss": 2.5784, "step": 1795000 }, { "epoch": 3.42, "learning_rate": 4.145074859768924e-05, "loss": 2.5608, "step": 1795500 }, { "epoch": 3.42, "learning_rate": 4.144836695576624e-05, "loss": 2.5504, "step": 1796000 }, { "epoch": 3.42, "learning_rate": 4.144598531384325e-05, "loss": 2.5428, "step": 1796500 }, { "epoch": 3.42, "learning_rate": 4.1443603671920255e-05, "loss": 2.5555, "step": 1797000 }, { "epoch": 3.42, "learning_rate": 4.144122202999726e-05, "loss": 2.5885, "step": 1797500 }, { "epoch": 3.43, "learning_rate": 4.143884038807426e-05, "loss": 2.559, "step": 1798000 }, { "epoch": 3.43, "learning_rate": 4.143645874615127e-05, "loss": 2.5622, "step": 1798500 }, { "epoch": 3.43, "learning_rate": 4.143407710422827e-05, "loss": 2.5742, "step": 1799000 }, { "epoch": 3.43, "learning_rate": 4.143169546230528e-05, "loss": 2.566, "step": 1799500 }, { "epoch": 3.43, "learning_rate": 4.1429318583666134e-05, "loss": 2.5711, "step": 1800000 }, { "epoch": 3.43, "eval_accuracy": 0.5331321913473064, "eval_loss": 2.4682931900024414, "eval_runtime": 4190.8515, "eval_samples_per_second": 65.616, "eval_steps_per_second": 6.562, "step": 1800000 }, { "epoch": 3.43, "learning_rate": 4.142693694174313e-05, "loss": 2.5734, "step": 1800500 }, { "epoch": 3.43, "learning_rate": 4.142456006310398e-05, "loss": 2.5757, "step": 1801000 }, { "epoch": 3.43, "learning_rate": 4.142217842118099e-05, "loss": 2.5657, "step": 1801500 }, { "epoch": 3.43, "learning_rate": 4.1419796779257994e-05, "loss": 2.5604, "step": 1802000 }, { "epoch": 3.43, "learning_rate": 4.1417415137335003e-05, "loss": 2.5568, "step": 1802500 }, { "epoch": 3.44, "learning_rate": 4.1415033495412006e-05, "loss": 2.5587, "step": 1803000 }, { "epoch": 3.44, "learning_rate": 4.141265185348901e-05, "loss": 2.565, "step": 1803500 }, { "epoch": 3.44, "learning_rate": 4.141027021156602e-05, "loss": 2.5901, "step": 1804000 }, { "epoch": 3.44, "learning_rate": 4.140788856964302e-05, "loss": 2.5744, "step": 1804500 }, { "epoch": 3.44, "learning_rate": 4.140550692772003e-05, "loss": 2.5632, "step": 1805000 }, { "epoch": 3.44, "learning_rate": 4.140313004908088e-05, "loss": 2.5576, "step": 1805500 }, { "epoch": 3.44, "learning_rate": 4.1400748407157885e-05, "loss": 2.5673, "step": 1806000 }, { "epoch": 3.44, "learning_rate": 4.1398366765234894e-05, "loss": 2.5578, "step": 1806500 }, { "epoch": 3.44, "learning_rate": 4.139598512331189e-05, "loss": 2.5792, "step": 1807000 }, { "epoch": 3.44, "learning_rate": 4.139360824467274e-05, "loss": 2.5713, "step": 1807500 }, { "epoch": 3.44, "learning_rate": 4.139122660274975e-05, "loss": 2.568, "step": 1808000 }, { "epoch": 3.45, "learning_rate": 4.1388844960826755e-05, "loss": 2.5682, "step": 1808500 }, { "epoch": 3.45, "learning_rate": 4.1386463318903764e-05, "loss": 2.5758, "step": 1809000 }, { "epoch": 3.45, "learning_rate": 4.1384086440264616e-05, "loss": 2.5589, "step": 1809500 }, { "epoch": 3.45, "learning_rate": 4.138170479834162e-05, "loss": 2.5701, "step": 1810000 }, { "epoch": 3.45, "learning_rate": 4.137932315641862e-05, "loss": 2.5716, "step": 1810500 }, { "epoch": 3.45, "learning_rate": 4.1376941514495624e-05, "loss": 2.5616, "step": 1811000 }, { "epoch": 3.45, "learning_rate": 4.1374564635856476e-05, "loss": 2.5667, "step": 1811500 }, { "epoch": 3.45, "learning_rate": 4.1372182993933486e-05, "loss": 2.563, "step": 1812000 }, { "epoch": 3.45, "learning_rate": 4.136980135201049e-05, "loss": 2.557, "step": 1812500 }, { "epoch": 3.45, "learning_rate": 4.13674197100875e-05, "loss": 2.5595, "step": 1813000 }, { "epoch": 3.46, "learning_rate": 4.1365038068164494e-05, "loss": 2.5514, "step": 1813500 }, { "epoch": 3.46, "learning_rate": 4.1362661189525346e-05, "loss": 2.5563, "step": 1814000 }, { "epoch": 3.46, "learning_rate": 4.1360279547602355e-05, "loss": 2.5629, "step": 1814500 }, { "epoch": 3.46, "learning_rate": 4.135789790567936e-05, "loss": 2.5545, "step": 1815000 }, { "epoch": 3.46, "learning_rate": 4.135551626375637e-05, "loss": 2.5746, "step": 1815500 }, { "epoch": 3.46, "learning_rate": 4.135313462183337e-05, "loss": 2.554, "step": 1816000 }, { "epoch": 3.46, "learning_rate": 4.135075297991038e-05, "loss": 2.573, "step": 1816500 }, { "epoch": 3.46, "learning_rate": 4.134837133798738e-05, "loss": 2.5662, "step": 1817000 }, { "epoch": 3.46, "learning_rate": 4.134599445934823e-05, "loss": 2.5726, "step": 1817500 }, { "epoch": 3.46, "learning_rate": 4.134361281742524e-05, "loss": 2.5676, "step": 1818000 }, { "epoch": 3.46, "learning_rate": 4.134123117550224e-05, "loss": 2.571, "step": 1818500 }, { "epoch": 3.47, "learning_rate": 4.133884953357925e-05, "loss": 2.5885, "step": 1819000 }, { "epoch": 3.47, "learning_rate": 4.13364726549401e-05, "loss": 2.5675, "step": 1819500 }, { "epoch": 3.47, "learning_rate": 4.1334091013017104e-05, "loss": 2.5598, "step": 1820000 }, { "epoch": 3.47, "learning_rate": 4.133170937109411e-05, "loss": 2.5548, "step": 1820500 }, { "epoch": 3.47, "learning_rate": 4.1329327729171116e-05, "loss": 2.5564, "step": 1821000 }, { "epoch": 3.47, "learning_rate": 4.132694608724812e-05, "loss": 2.5468, "step": 1821500 }, { "epoch": 3.47, "learning_rate": 4.132456444532513e-05, "loss": 2.5706, "step": 1822000 }, { "epoch": 3.47, "learning_rate": 4.132218280340213e-05, "loss": 2.5686, "step": 1822500 }, { "epoch": 3.47, "learning_rate": 4.131980116147913e-05, "loss": 2.5675, "step": 1823000 }, { "epoch": 3.47, "learning_rate": 4.1317419519556136e-05, "loss": 2.5649, "step": 1823500 }, { "epoch": 3.48, "learning_rate": 4.131504264091699e-05, "loss": 2.5621, "step": 1824000 }, { "epoch": 3.48, "learning_rate": 4.131266576227784e-05, "loss": 2.5718, "step": 1824500 }, { "epoch": 3.48, "learning_rate": 4.131028412035485e-05, "loss": 2.5751, "step": 1825000 }, { "epoch": 3.48, "learning_rate": 4.130790247843185e-05, "loss": 2.5604, "step": 1825500 }, { "epoch": 3.48, "learning_rate": 4.130552083650886e-05, "loss": 2.5775, "step": 1826000 }, { "epoch": 3.48, "learning_rate": 4.130313919458586e-05, "loss": 2.5561, "step": 1826500 }, { "epoch": 3.48, "learning_rate": 4.130075755266287e-05, "loss": 2.5745, "step": 1827000 }, { "epoch": 3.48, "learning_rate": 4.129837591073987e-05, "loss": 2.5474, "step": 1827500 }, { "epoch": 3.48, "learning_rate": 4.129599426881688e-05, "loss": 2.5848, "step": 1828000 }, { "epoch": 3.48, "learning_rate": 4.129361262689388e-05, "loss": 2.5804, "step": 1828500 }, { "epoch": 3.48, "learning_rate": 4.1291235748254734e-05, "loss": 2.5745, "step": 1829000 }, { "epoch": 3.49, "learning_rate": 4.1288854106331744e-05, "loss": 2.572, "step": 1829500 }, { "epoch": 3.49, "learning_rate": 4.1286472464408746e-05, "loss": 2.5549, "step": 1830000 }, { "epoch": 3.49, "eval_accuracy": 0.5332925644699953, "eval_loss": 2.468489408493042, "eval_runtime": 4196.711, "eval_samples_per_second": 65.525, "eval_steps_per_second": 6.553, "step": 1830000 }, { "epoch": 3.49, "learning_rate": 4.128409082248575e-05, "loss": 2.5747, "step": 1830500 }, { "epoch": 3.49, "learning_rate": 4.12817139438466e-05, "loss": 2.5479, "step": 1831000 }, { "epoch": 3.49, "learning_rate": 4.1279332301923604e-05, "loss": 2.57, "step": 1831500 }, { "epoch": 3.49, "learning_rate": 4.127695066000061e-05, "loss": 2.5773, "step": 1832000 }, { "epoch": 3.49, "learning_rate": 4.1274569018077616e-05, "loss": 2.5637, "step": 1832500 }, { "epoch": 3.49, "learning_rate": 4.127219213943847e-05, "loss": 2.5469, "step": 1833000 }, { "epoch": 3.49, "learning_rate": 4.126981049751547e-05, "loss": 2.5833, "step": 1833500 }, { "epoch": 3.49, "learning_rate": 4.126742885559248e-05, "loss": 2.5758, "step": 1834000 }, { "epoch": 3.5, "learning_rate": 4.126504721366948e-05, "loss": 2.563, "step": 1834500 }, { "epoch": 3.5, "learning_rate": 4.126266557174649e-05, "loss": 2.5892, "step": 1835000 }, { "epoch": 3.5, "learning_rate": 4.1260283929823495e-05, "loss": 2.5554, "step": 1835500 }, { "epoch": 3.5, "learning_rate": 4.125791181446819e-05, "loss": 2.5525, "step": 1836000 }, { "epoch": 3.5, "learning_rate": 4.12555301725452e-05, "loss": 2.5526, "step": 1836500 }, { "epoch": 3.5, "learning_rate": 4.12531485306222e-05, "loss": 2.5649, "step": 1837000 }, { "epoch": 3.5, "learning_rate": 4.1250766888699205e-05, "loss": 2.5705, "step": 1837500 }, { "epoch": 3.5, "learning_rate": 4.1248385246776214e-05, "loss": 2.5603, "step": 1838000 }, { "epoch": 3.5, "learning_rate": 4.1246003604853217e-05, "loss": 2.5695, "step": 1838500 }, { "epoch": 3.5, "learning_rate": 4.1243621962930226e-05, "loss": 2.5725, "step": 1839000 }, { "epoch": 3.5, "learning_rate": 4.124124032100723e-05, "loss": 2.5845, "step": 1839500 }, { "epoch": 3.51, "learning_rate": 4.1238863442368074e-05, "loss": 2.5603, "step": 1840000 }, { "epoch": 3.51, "learning_rate": 4.1236481800445083e-05, "loss": 2.5867, "step": 1840500 }, { "epoch": 3.51, "learning_rate": 4.1234100158522086e-05, "loss": 2.5582, "step": 1841000 }, { "epoch": 3.51, "learning_rate": 4.1231718516599096e-05, "loss": 2.5751, "step": 1841500 }, { "epoch": 3.51, "learning_rate": 4.12293368746761e-05, "loss": 2.5741, "step": 1842000 }, { "epoch": 3.51, "learning_rate": 4.122695999603695e-05, "loss": 2.5745, "step": 1842500 }, { "epoch": 3.51, "learning_rate": 4.122457835411396e-05, "loss": 2.5593, "step": 1843000 }, { "epoch": 3.51, "learning_rate": 4.1222196712190956e-05, "loss": 2.5559, "step": 1843500 }, { "epoch": 3.51, "learning_rate": 4.1219815070267965e-05, "loss": 2.5873, "step": 1844000 }, { "epoch": 3.51, "learning_rate": 4.121743342834497e-05, "loss": 2.5636, "step": 1844500 }, { "epoch": 3.52, "learning_rate": 4.121505654970582e-05, "loss": 2.5655, "step": 1845000 }, { "epoch": 3.52, "learning_rate": 4.121267490778283e-05, "loss": 2.5679, "step": 1845500 }, { "epoch": 3.52, "learning_rate": 4.121029326585983e-05, "loss": 2.58, "step": 1846000 }, { "epoch": 3.52, "learning_rate": 4.1207911623936835e-05, "loss": 2.5774, "step": 1846500 }, { "epoch": 3.52, "learning_rate": 4.120553474529769e-05, "loss": 2.5707, "step": 1847000 }, { "epoch": 3.52, "learning_rate": 4.120315310337469e-05, "loss": 2.5809, "step": 1847500 }, { "epoch": 3.52, "learning_rate": 4.12007714614517e-05, "loss": 2.5822, "step": 1848000 }, { "epoch": 3.52, "learning_rate": 4.11983898195287e-05, "loss": 2.5563, "step": 1848500 }, { "epoch": 3.52, "learning_rate": 4.1196012940889554e-05, "loss": 2.5644, "step": 1849000 }, { "epoch": 3.52, "learning_rate": 4.119363129896656e-05, "loss": 2.5685, "step": 1849500 }, { "epoch": 3.52, "learning_rate": 4.119124965704356e-05, "loss": 2.5782, "step": 1850000 }, { "epoch": 3.53, "learning_rate": 4.118886801512057e-05, "loss": 2.5577, "step": 1850500 }, { "epoch": 3.53, "learning_rate": 4.118648637319758e-05, "loss": 2.5694, "step": 1851000 }, { "epoch": 3.53, "learning_rate": 4.118410473127458e-05, "loss": 2.5628, "step": 1851500 }, { "epoch": 3.53, "learning_rate": 4.118172308935159e-05, "loss": 2.5676, "step": 1852000 }, { "epoch": 3.53, "learning_rate": 4.117934144742859e-05, "loss": 2.5671, "step": 1852500 }, { "epoch": 3.53, "learning_rate": 4.1176964568789445e-05, "loss": 2.5631, "step": 1853000 }, { "epoch": 3.53, "learning_rate": 4.117458292686645e-05, "loss": 2.5754, "step": 1853500 }, { "epoch": 3.53, "learning_rate": 4.117220128494345e-05, "loss": 2.5639, "step": 1854000 }, { "epoch": 3.53, "learning_rate": 4.116981964302046e-05, "loss": 2.5439, "step": 1854500 }, { "epoch": 3.53, "learning_rate": 4.116744276438131e-05, "loss": 2.5704, "step": 1855000 }, { "epoch": 3.54, "learning_rate": 4.1165061122458314e-05, "loss": 2.578, "step": 1855500 }, { "epoch": 3.54, "learning_rate": 4.1162679480535324e-05, "loss": 2.564, "step": 1856000 }, { "epoch": 3.54, "learning_rate": 4.116029783861232e-05, "loss": 2.5687, "step": 1856500 }, { "epoch": 3.54, "learning_rate": 4.115791619668933e-05, "loss": 2.5667, "step": 1857000 }, { "epoch": 3.54, "learning_rate": 4.115553455476633e-05, "loss": 2.5611, "step": 1857500 }, { "epoch": 3.54, "learning_rate": 4.115315291284334e-05, "loss": 2.5744, "step": 1858000 }, { "epoch": 3.54, "learning_rate": 4.1150771270920344e-05, "loss": 2.5583, "step": 1858500 }, { "epoch": 3.54, "learning_rate": 4.1148394392281196e-05, "loss": 2.5679, "step": 1859000 }, { "epoch": 3.54, "learning_rate": 4.11460127503582e-05, "loss": 2.5632, "step": 1859500 }, { "epoch": 3.54, "learning_rate": 4.11436311084352e-05, "loss": 2.5623, "step": 1860000 }, { "epoch": 3.54, "eval_accuracy": 0.5330924455446957, "eval_loss": 2.465792179107666, "eval_runtime": 4197.6709, "eval_samples_per_second": 65.51, "eval_steps_per_second": 6.551, "step": 1860000 }, { "epoch": 3.54, "learning_rate": 4.114124946651221e-05, "loss": 2.5785, "step": 1860500 }, { "epoch": 3.55, "learning_rate": 4.113886782458922e-05, "loss": 2.556, "step": 1861000 }, { "epoch": 3.55, "learning_rate": 4.1136490945950066e-05, "loss": 2.5521, "step": 1861500 }, { "epoch": 3.55, "learning_rate": 4.1134109304027075e-05, "loss": 2.575, "step": 1862000 }, { "epoch": 3.55, "learning_rate": 4.113172766210408e-05, "loss": 2.5524, "step": 1862500 }, { "epoch": 3.55, "learning_rate": 4.112934602018108e-05, "loss": 2.5637, "step": 1863000 }, { "epoch": 3.55, "learning_rate": 4.112696437825809e-05, "loss": 2.5654, "step": 1863500 }, { "epoch": 3.55, "learning_rate": 4.1124587499618935e-05, "loss": 2.5632, "step": 1864000 }, { "epoch": 3.55, "learning_rate": 4.1122205857695945e-05, "loss": 2.573, "step": 1864500 }, { "epoch": 3.55, "learning_rate": 4.1119824215772954e-05, "loss": 2.55, "step": 1865000 }, { "epoch": 3.55, "learning_rate": 4.111744257384996e-05, "loss": 2.554, "step": 1865500 }, { "epoch": 3.56, "learning_rate": 4.111506569521081e-05, "loss": 2.5809, "step": 1866000 }, { "epoch": 3.56, "learning_rate": 4.111268405328781e-05, "loss": 2.5686, "step": 1866500 }, { "epoch": 3.56, "learning_rate": 4.1110302411364814e-05, "loss": 2.5719, "step": 1867000 }, { "epoch": 3.56, "learning_rate": 4.1107920769441824e-05, "loss": 2.5775, "step": 1867500 }, { "epoch": 3.56, "learning_rate": 4.110554389080267e-05, "loss": 2.5688, "step": 1868000 }, { "epoch": 3.56, "learning_rate": 4.110316224887968e-05, "loss": 2.5748, "step": 1868500 }, { "epoch": 3.56, "learning_rate": 4.110078060695669e-05, "loss": 2.5549, "step": 1869000 }, { "epoch": 3.56, "learning_rate": 4.1098398965033684e-05, "loss": 2.5636, "step": 1869500 }, { "epoch": 3.56, "learning_rate": 4.109601732311069e-05, "loss": 2.5449, "step": 1870000 }, { "epoch": 3.56, "learning_rate": 4.1093635681187696e-05, "loss": 2.5589, "step": 1870500 }, { "epoch": 3.56, "learning_rate": 4.109125880254855e-05, "loss": 2.5582, "step": 1871000 }, { "epoch": 3.57, "learning_rate": 4.10888819239094e-05, "loss": 2.5652, "step": 1871500 }, { "epoch": 3.57, "learning_rate": 4.108650028198641e-05, "loss": 2.5579, "step": 1872000 }, { "epoch": 3.57, "learning_rate": 4.108411864006341e-05, "loss": 2.5741, "step": 1872500 }, { "epoch": 3.57, "learning_rate": 4.1081736998140415e-05, "loss": 2.5702, "step": 1873000 }, { "epoch": 3.57, "learning_rate": 4.107935535621742e-05, "loss": 2.5705, "step": 1873500 }, { "epoch": 3.57, "learning_rate": 4.107697371429443e-05, "loss": 2.5724, "step": 1874000 }, { "epoch": 3.57, "learning_rate": 4.107459207237143e-05, "loss": 2.5609, "step": 1874500 }, { "epoch": 3.57, "learning_rate": 4.107221043044844e-05, "loss": 2.5868, "step": 1875000 }, { "epoch": 3.57, "learning_rate": 4.106983355180929e-05, "loss": 2.5828, "step": 1875500 }, { "epoch": 3.57, "learning_rate": 4.1067451909886294e-05, "loss": 2.5503, "step": 1876000 }, { "epoch": 3.58, "learning_rate": 4.1065070267963297e-05, "loss": 2.5634, "step": 1876500 }, { "epoch": 3.58, "learning_rate": 4.10626886260403e-05, "loss": 2.5726, "step": 1877000 }, { "epoch": 3.58, "learning_rate": 4.106031174740115e-05, "loss": 2.559, "step": 1877500 }, { "epoch": 3.58, "learning_rate": 4.105793010547816e-05, "loss": 2.5598, "step": 1878000 }, { "epoch": 3.58, "learning_rate": 4.105555322683901e-05, "loss": 2.5639, "step": 1878500 }, { "epoch": 3.58, "learning_rate": 4.1053171584916016e-05, "loss": 2.5712, "step": 1879000 }, { "epoch": 3.58, "learning_rate": 4.1050789942993025e-05, "loss": 2.5741, "step": 1879500 }, { "epoch": 3.58, "learning_rate": 4.104840830107002e-05, "loss": 2.5635, "step": 1880000 }, { "epoch": 3.58, "learning_rate": 4.104602665914703e-05, "loss": 2.5629, "step": 1880500 }, { "epoch": 3.58, "learning_rate": 4.104364501722403e-05, "loss": 2.5471, "step": 1881000 }, { "epoch": 3.58, "learning_rate": 4.104126337530104e-05, "loss": 2.5545, "step": 1881500 }, { "epoch": 3.59, "learning_rate": 4.1038881733378045e-05, "loss": 2.5672, "step": 1882000 }, { "epoch": 3.59, "learning_rate": 4.103650009145505e-05, "loss": 2.5577, "step": 1882500 }, { "epoch": 3.59, "learning_rate": 4.10341232128159e-05, "loss": 2.5598, "step": 1883000 }, { "epoch": 3.59, "learning_rate": 4.103174157089291e-05, "loss": 2.55, "step": 1883500 }, { "epoch": 3.59, "learning_rate": 4.102935992896991e-05, "loss": 2.5687, "step": 1884000 }, { "epoch": 3.59, "learning_rate": 4.102697828704692e-05, "loss": 2.5745, "step": 1884500 }, { "epoch": 3.59, "learning_rate": 4.1024596645123924e-05, "loss": 2.5677, "step": 1885000 }, { "epoch": 3.59, "learning_rate": 4.1022219766484776e-05, "loss": 2.5625, "step": 1885500 }, { "epoch": 3.59, "learning_rate": 4.1019838124561786e-05, "loss": 2.5461, "step": 1886000 }, { "epoch": 3.59, "learning_rate": 4.101745648263878e-05, "loss": 2.5612, "step": 1886500 }, { "epoch": 3.6, "learning_rate": 4.101507484071579e-05, "loss": 2.5532, "step": 1887000 }, { "epoch": 3.6, "learning_rate": 4.101269796207664e-05, "loss": 2.5712, "step": 1887500 }, { "epoch": 3.6, "learning_rate": 4.1010316320153646e-05, "loss": 2.5609, "step": 1888000 }, { "epoch": 3.6, "learning_rate": 4.1007934678230655e-05, "loss": 2.5537, "step": 1888500 }, { "epoch": 3.6, "learning_rate": 4.100555303630766e-05, "loss": 2.5623, "step": 1889000 }, { "epoch": 3.6, "learning_rate": 4.100317139438466e-05, "loss": 2.5752, "step": 1889500 }, { "epoch": 3.6, "learning_rate": 4.100078975246166e-05, "loss": 2.5634, "step": 1890000 }, { "epoch": 3.6, "eval_accuracy": 0.5335756728870892, "eval_loss": 2.4654393196105957, "eval_runtime": 4198.9332, "eval_samples_per_second": 65.49, "eval_steps_per_second": 6.549, "step": 1890000 }, { "epoch": 3.6, "learning_rate": 4.099840811053867e-05, "loss": 2.5651, "step": 1890500 }, { "epoch": 3.6, "learning_rate": 4.0996026468615675e-05, "loss": 2.5706, "step": 1891000 }, { "epoch": 3.6, "learning_rate": 4.0993644826692685e-05, "loss": 2.5525, "step": 1891500 }, { "epoch": 3.6, "learning_rate": 4.099126794805354e-05, "loss": 2.5505, "step": 1892000 }, { "epoch": 3.61, "learning_rate": 4.098888630613053e-05, "loss": 2.5452, "step": 1892500 }, { "epoch": 3.61, "learning_rate": 4.098650466420754e-05, "loss": 2.5541, "step": 1893000 }, { "epoch": 3.61, "learning_rate": 4.098412302228455e-05, "loss": 2.5727, "step": 1893500 }, { "epoch": 3.61, "learning_rate": 4.0981741380361554e-05, "loss": 2.5761, "step": 1894000 }, { "epoch": 3.61, "learning_rate": 4.0979359738438564e-05, "loss": 2.5675, "step": 1894500 }, { "epoch": 3.61, "learning_rate": 4.0976978096515566e-05, "loss": 2.5556, "step": 1895000 }, { "epoch": 3.61, "learning_rate": 4.097460121787642e-05, "loss": 2.5638, "step": 1895500 }, { "epoch": 3.61, "learning_rate": 4.097221957595342e-05, "loss": 2.5791, "step": 1896000 }, { "epoch": 3.61, "learning_rate": 4.0969837934030424e-05, "loss": 2.5904, "step": 1896500 }, { "epoch": 3.61, "learning_rate": 4.096745629210743e-05, "loss": 2.5757, "step": 1897000 }, { "epoch": 3.62, "learning_rate": 4.0965074650184436e-05, "loss": 2.5529, "step": 1897500 }, { "epoch": 3.62, "learning_rate": 4.096269777154529e-05, "loss": 2.5565, "step": 1898000 }, { "epoch": 3.62, "learning_rate": 4.09603161296223e-05, "loss": 2.5507, "step": 1898500 }, { "epoch": 3.62, "learning_rate": 4.095793448769929e-05, "loss": 2.5743, "step": 1899000 }, { "epoch": 3.62, "learning_rate": 4.09555528457763e-05, "loss": 2.5637, "step": 1899500 }, { "epoch": 3.62, "learning_rate": 4.0953175967137155e-05, "loss": 2.5598, "step": 1900000 }, { "epoch": 3.62, "learning_rate": 4.095079432521416e-05, "loss": 2.5384, "step": 1900500 }, { "epoch": 3.62, "learning_rate": 4.094841268329117e-05, "loss": 2.5389, "step": 1901000 }, { "epoch": 3.62, "learning_rate": 4.094603104136817e-05, "loss": 2.5706, "step": 1901500 }, { "epoch": 3.62, "learning_rate": 4.094365416272902e-05, "loss": 2.5651, "step": 1902000 }, { "epoch": 3.62, "learning_rate": 4.0941272520806025e-05, "loss": 2.5733, "step": 1902500 }, { "epoch": 3.63, "learning_rate": 4.093889087888303e-05, "loss": 2.5574, "step": 1903000 }, { "epoch": 3.63, "learning_rate": 4.093650923696004e-05, "loss": 2.5705, "step": 1903500 }, { "epoch": 3.63, "learning_rate": 4.093412759503704e-05, "loss": 2.5712, "step": 1904000 }, { "epoch": 3.63, "learning_rate": 4.093175071639789e-05, "loss": 2.5471, "step": 1904500 }, { "epoch": 3.63, "learning_rate": 4.09293690744749e-05, "loss": 2.5573, "step": 1905000 }, { "epoch": 3.63, "learning_rate": 4.092699219583575e-05, "loss": 2.5747, "step": 1905500 }, { "epoch": 3.63, "learning_rate": 4.092461055391275e-05, "loss": 2.562, "step": 1906000 }, { "epoch": 3.63, "learning_rate": 4.092222891198976e-05, "loss": 2.5469, "step": 1906500 }, { "epoch": 3.63, "learning_rate": 4.091984727006676e-05, "loss": 2.5526, "step": 1907000 }, { "epoch": 3.63, "learning_rate": 4.091746562814377e-05, "loss": 2.5825, "step": 1907500 }, { "epoch": 3.64, "learning_rate": 4.091508398622077e-05, "loss": 2.5677, "step": 1908000 }, { "epoch": 3.64, "learning_rate": 4.0912707107581625e-05, "loss": 2.5711, "step": 1908500 }, { "epoch": 3.64, "learning_rate": 4.0910325465658635e-05, "loss": 2.557, "step": 1909000 }, { "epoch": 3.64, "learning_rate": 4.090794858701949e-05, "loss": 2.5591, "step": 1909500 }, { "epoch": 3.64, "learning_rate": 4.090556694509648e-05, "loss": 2.5453, "step": 1910000 }, { "epoch": 3.64, "learning_rate": 4.090318530317349e-05, "loss": 2.5789, "step": 1910500 }, { "epoch": 3.64, "learning_rate": 4.0900803661250495e-05, "loss": 2.5664, "step": 1911000 }, { "epoch": 3.64, "learning_rate": 4.0898422019327504e-05, "loss": 2.5598, "step": 1911500 }, { "epoch": 3.64, "learning_rate": 4.089604037740451e-05, "loss": 2.5486, "step": 1912000 }, { "epoch": 3.64, "learning_rate": 4.089365873548151e-05, "loss": 2.5611, "step": 1912500 }, { "epoch": 3.64, "learning_rate": 4.089127709355852e-05, "loss": 2.5691, "step": 1913000 }, { "epoch": 3.65, "learning_rate": 4.088889545163552e-05, "loss": 2.5676, "step": 1913500 }, { "epoch": 3.65, "learning_rate": 4.088651380971253e-05, "loss": 2.5701, "step": 1914000 }, { "epoch": 3.65, "learning_rate": 4.0884132167789534e-05, "loss": 2.5545, "step": 1914500 }, { "epoch": 3.65, "learning_rate": 4.088175052586654e-05, "loss": 2.5745, "step": 1915000 }, { "epoch": 3.65, "learning_rate": 4.087936888394354e-05, "loss": 2.5722, "step": 1915500 }, { "epoch": 3.65, "learning_rate": 4.087699200530439e-05, "loss": 2.5519, "step": 1916000 }, { "epoch": 3.65, "learning_rate": 4.0874615126665244e-05, "loss": 2.5537, "step": 1916500 }, { "epoch": 3.65, "learning_rate": 4.087223348474225e-05, "loss": 2.5495, "step": 1917000 }, { "epoch": 3.65, "learning_rate": 4.0869851842819256e-05, "loss": 2.5736, "step": 1917500 }, { "epoch": 3.65, "learning_rate": 4.0867470200896265e-05, "loss": 2.5681, "step": 1918000 }, { "epoch": 3.66, "learning_rate": 4.086508855897327e-05, "loss": 2.5566, "step": 1918500 }, { "epoch": 3.66, "learning_rate": 4.086270691705027e-05, "loss": 2.5748, "step": 1919000 }, { "epoch": 3.66, "learning_rate": 4.086032527512727e-05, "loss": 2.5731, "step": 1919500 }, { "epoch": 3.66, "learning_rate": 4.0857948396488125e-05, "loss": 2.5705, "step": 1920000 }, { "epoch": 3.66, "eval_accuracy": 0.5337034452803493, "eval_loss": 2.464621067047119, "eval_runtime": 4199.6664, "eval_samples_per_second": 65.479, "eval_steps_per_second": 6.548, "step": 1920000 }, { "epoch": 3.66, "learning_rate": 4.0855566754565135e-05, "loss": 2.5725, "step": 1920500 }, { "epoch": 3.66, "learning_rate": 4.085318511264214e-05, "loss": 2.5592, "step": 1921000 }, { "epoch": 3.66, "learning_rate": 4.0850803470719147e-05, "loss": 2.5507, "step": 1921500 }, { "epoch": 3.66, "learning_rate": 4.084842182879615e-05, "loss": 2.5816, "step": 1922000 }, { "epoch": 3.66, "learning_rate": 4.084604018687315e-05, "loss": 2.5561, "step": 1922500 }, { "epoch": 3.66, "learning_rate": 4.084365854495016e-05, "loss": 2.5685, "step": 1923000 }, { "epoch": 3.66, "learning_rate": 4.0841276903027164e-05, "loss": 2.5821, "step": 1923500 }, { "epoch": 3.67, "learning_rate": 4.0838900024388016e-05, "loss": 2.567, "step": 1924000 }, { "epoch": 3.67, "learning_rate": 4.083651838246502e-05, "loss": 2.5567, "step": 1924500 }, { "epoch": 3.67, "learning_rate": 4.083413674054203e-05, "loss": 2.5589, "step": 1925000 }, { "epoch": 3.67, "learning_rate": 4.083175509861903e-05, "loss": 2.5579, "step": 1925500 }, { "epoch": 3.67, "learning_rate": 4.0829373456696033e-05, "loss": 2.5568, "step": 1926000 }, { "epoch": 3.67, "learning_rate": 4.082699181477304e-05, "loss": 2.5693, "step": 1926500 }, { "epoch": 3.67, "learning_rate": 4.0824610172850045e-05, "loss": 2.5594, "step": 1927000 }, { "epoch": 3.67, "learning_rate": 4.0822228530927055e-05, "loss": 2.5592, "step": 1927500 }, { "epoch": 3.67, "learning_rate": 4.081985641557175e-05, "loss": 2.5536, "step": 1928000 }, { "epoch": 3.67, "learning_rate": 4.081747477364876e-05, "loss": 2.5684, "step": 1928500 }, { "epoch": 3.68, "learning_rate": 4.0815093131725755e-05, "loss": 2.5619, "step": 1929000 }, { "epoch": 3.68, "learning_rate": 4.0812711489802765e-05, "loss": 2.58, "step": 1929500 }, { "epoch": 3.68, "learning_rate": 4.081032984787977e-05, "loss": 2.5532, "step": 1930000 }, { "epoch": 3.68, "learning_rate": 4.080795296924062e-05, "loss": 2.5488, "step": 1930500 }, { "epoch": 3.68, "learning_rate": 4.080557132731763e-05, "loss": 2.5439, "step": 1931000 }, { "epoch": 3.68, "learning_rate": 4.080318968539463e-05, "loss": 2.5636, "step": 1931500 }, { "epoch": 3.68, "learning_rate": 4.0800808043471634e-05, "loss": 2.562, "step": 1932000 }, { "epoch": 3.68, "learning_rate": 4.079842640154864e-05, "loss": 2.5558, "step": 1932500 }, { "epoch": 3.68, "learning_rate": 4.0796044759625646e-05, "loss": 2.5502, "step": 1933000 }, { "epoch": 3.68, "learning_rate": 4.079366311770265e-05, "loss": 2.5685, "step": 1933500 }, { "epoch": 3.68, "learning_rate": 4.079128147577966e-05, "loss": 2.5891, "step": 1934000 }, { "epoch": 3.69, "learning_rate": 4.078890459714051e-05, "loss": 2.549, "step": 1934500 }, { "epoch": 3.69, "learning_rate": 4.078652295521751e-05, "loss": 2.5469, "step": 1935000 }, { "epoch": 3.69, "learning_rate": 4.0784141313294516e-05, "loss": 2.5644, "step": 1935500 }, { "epoch": 3.69, "learning_rate": 4.0781759671371525e-05, "loss": 2.5709, "step": 1936000 }, { "epoch": 3.69, "learning_rate": 4.077938279273237e-05, "loss": 2.5531, "step": 1936500 }, { "epoch": 3.69, "learning_rate": 4.077700591409322e-05, "loss": 2.5644, "step": 1937000 }, { "epoch": 3.69, "learning_rate": 4.077462427217023e-05, "loss": 2.5482, "step": 1937500 }, { "epoch": 3.69, "learning_rate": 4.0772242630247235e-05, "loss": 2.5613, "step": 1938000 }, { "epoch": 3.69, "learning_rate": 4.0769860988324244e-05, "loss": 2.5871, "step": 1938500 }, { "epoch": 3.69, "learning_rate": 4.076747934640125e-05, "loss": 2.5812, "step": 1939000 }, { "epoch": 3.7, "learning_rate": 4.076509770447825e-05, "loss": 2.5371, "step": 1939500 }, { "epoch": 3.7, "learning_rate": 4.076271606255526e-05, "loss": 2.5603, "step": 1940000 }, { "epoch": 3.7, "learning_rate": 4.076033442063226e-05, "loss": 2.543, "step": 1940500 }, { "epoch": 3.7, "learning_rate": 4.0757957541993114e-05, "loss": 2.5726, "step": 1941000 }, { "epoch": 3.7, "learning_rate": 4.075557590007012e-05, "loss": 2.5521, "step": 1941500 }, { "epoch": 3.7, "learning_rate": 4.075319425814712e-05, "loss": 2.5835, "step": 1942000 }, { "epoch": 3.7, "learning_rate": 4.075081261622413e-05, "loss": 2.5735, "step": 1942500 }, { "epoch": 3.7, "learning_rate": 4.074843573758498e-05, "loss": 2.5574, "step": 1943000 }, { "epoch": 3.7, "learning_rate": 4.0746054095661984e-05, "loss": 2.5484, "step": 1943500 }, { "epoch": 3.7, "learning_rate": 4.074367245373899e-05, "loss": 2.5528, "step": 1944000 }, { "epoch": 3.7, "learning_rate": 4.074129557509984e-05, "loss": 2.5471, "step": 1944500 }, { "epoch": 3.71, "learning_rate": 4.073891393317685e-05, "loss": 2.5563, "step": 1945000 }, { "epoch": 3.71, "learning_rate": 4.073653229125385e-05, "loss": 2.5597, "step": 1945500 }, { "epoch": 3.71, "learning_rate": 4.073415064933085e-05, "loss": 2.5654, "step": 1946000 }, { "epoch": 3.71, "learning_rate": 4.073176900740786e-05, "loss": 2.5692, "step": 1946500 }, { "epoch": 3.71, "learning_rate": 4.0729387365484865e-05, "loss": 2.565, "step": 1947000 }, { "epoch": 3.71, "learning_rate": 4.0727005723561875e-05, "loss": 2.5699, "step": 1947500 }, { "epoch": 3.71, "learning_rate": 4.072462408163888e-05, "loss": 2.5625, "step": 1948000 }, { "epoch": 3.71, "learning_rate": 4.072224720299972e-05, "loss": 2.5693, "step": 1948500 }, { "epoch": 3.71, "learning_rate": 4.071986556107673e-05, "loss": 2.542, "step": 1949000 }, { "epoch": 3.71, "learning_rate": 4.0717483919153735e-05, "loss": 2.5623, "step": 1949500 }, { "epoch": 3.72, "learning_rate": 4.071510704051459e-05, "loss": 2.5544, "step": 1950000 }, { "epoch": 3.72, "eval_accuracy": 0.5339716876795093, "eval_loss": 2.4630115032196045, "eval_runtime": 4192.7603, "eval_samples_per_second": 65.586, "eval_steps_per_second": 6.559, "step": 1950000 }, { "epoch": 3.72, "learning_rate": 4.0712725398591596e-05, "loss": 2.5668, "step": 1950500 }, { "epoch": 3.72, "learning_rate": 4.07103437566686e-05, "loss": 2.5676, "step": 1951000 }, { "epoch": 3.72, "learning_rate": 4.070796211474561e-05, "loss": 2.5628, "step": 1951500 }, { "epoch": 3.72, "learning_rate": 4.0705580472822604e-05, "loss": 2.5705, "step": 1952000 }, { "epoch": 3.72, "learning_rate": 4.0703198830899614e-05, "loss": 2.5467, "step": 1952500 }, { "epoch": 3.72, "learning_rate": 4.070081718897662e-05, "loss": 2.5402, "step": 1953000 }, { "epoch": 3.72, "learning_rate": 4.0698435547053626e-05, "loss": 2.5515, "step": 1953500 }, { "epoch": 3.72, "learning_rate": 4.069605866841448e-05, "loss": 2.564, "step": 1954000 }, { "epoch": 3.72, "learning_rate": 4.069367702649148e-05, "loss": 2.5575, "step": 1954500 }, { "epoch": 3.72, "learning_rate": 4.069129538456848e-05, "loss": 2.5409, "step": 1955000 }, { "epoch": 3.73, "learning_rate": 4.068891374264549e-05, "loss": 2.5466, "step": 1955500 }, { "epoch": 3.73, "learning_rate": 4.0686532100722495e-05, "loss": 2.5572, "step": 1956000 }, { "epoch": 3.73, "learning_rate": 4.068415522208335e-05, "loss": 2.5628, "step": 1956500 }, { "epoch": 3.73, "learning_rate": 4.068177358016036e-05, "loss": 2.5622, "step": 1957000 }, { "epoch": 3.73, "learning_rate": 4.067939193823736e-05, "loss": 2.5619, "step": 1957500 }, { "epoch": 3.73, "learning_rate": 4.067701505959821e-05, "loss": 2.5506, "step": 1958000 }, { "epoch": 3.73, "learning_rate": 4.0674633417675215e-05, "loss": 2.5431, "step": 1958500 }, { "epoch": 3.73, "learning_rate": 4.067225177575222e-05, "loss": 2.5687, "step": 1959000 }, { "epoch": 3.73, "learning_rate": 4.066987013382923e-05, "loss": 2.5723, "step": 1959500 }, { "epoch": 3.73, "learning_rate": 4.066748849190623e-05, "loss": 2.5734, "step": 1960000 }, { "epoch": 3.74, "learning_rate": 4.066510684998324e-05, "loss": 2.5636, "step": 1960500 }, { "epoch": 3.74, "learning_rate": 4.066272997134409e-05, "loss": 2.5496, "step": 1961000 }, { "epoch": 3.74, "learning_rate": 4.0660348329421094e-05, "loss": 2.5356, "step": 1961500 }, { "epoch": 3.74, "learning_rate": 4.0657966687498096e-05, "loss": 2.5874, "step": 1962000 }, { "epoch": 3.74, "learning_rate": 4.06555850455751e-05, "loss": 2.5651, "step": 1962500 }, { "epoch": 3.74, "learning_rate": 4.065320816693595e-05, "loss": 2.5647, "step": 1963000 }, { "epoch": 3.74, "learning_rate": 4.065082652501296e-05, "loss": 2.5739, "step": 1963500 }, { "epoch": 3.74, "learning_rate": 4.064844488308996e-05, "loss": 2.5657, "step": 1964000 }, { "epoch": 3.74, "learning_rate": 4.064606324116697e-05, "loss": 2.57, "step": 1964500 }, { "epoch": 3.74, "learning_rate": 4.064368159924397e-05, "loss": 2.5455, "step": 1965000 }, { "epoch": 3.74, "learning_rate": 4.064129995732098e-05, "loss": 2.5781, "step": 1965500 }, { "epoch": 3.75, "learning_rate": 4.063891831539798e-05, "loss": 2.5642, "step": 1966000 }, { "epoch": 3.75, "learning_rate": 4.063653667347499e-05, "loss": 2.5678, "step": 1966500 }, { "epoch": 3.75, "learning_rate": 4.063415979483584e-05, "loss": 2.5579, "step": 1967000 }, { "epoch": 3.75, "learning_rate": 4.0631778152912845e-05, "loss": 2.5559, "step": 1967500 }, { "epoch": 3.75, "learning_rate": 4.062939651098985e-05, "loss": 2.5752, "step": 1968000 }, { "epoch": 3.75, "learning_rate": 4.062701486906686e-05, "loss": 2.5655, "step": 1968500 }, { "epoch": 3.75, "learning_rate": 4.06246379904277e-05, "loss": 2.5465, "step": 1969000 }, { "epoch": 3.75, "learning_rate": 4.062225634850471e-05, "loss": 2.5717, "step": 1969500 }, { "epoch": 3.75, "learning_rate": 4.0619874706581714e-05, "loss": 2.5574, "step": 1970000 }, { "epoch": 3.75, "learning_rate": 4.0617493064658724e-05, "loss": 2.5543, "step": 1970500 }, { "epoch": 3.76, "learning_rate": 4.061511142273573e-05, "loss": 2.5642, "step": 1971000 }, { "epoch": 3.76, "learning_rate": 4.061273454409658e-05, "loss": 2.5659, "step": 1971500 }, { "epoch": 3.76, "learning_rate": 4.061035290217358e-05, "loss": 2.5572, "step": 1972000 }, { "epoch": 3.76, "learning_rate": 4.060797126025059e-05, "loss": 2.568, "step": 1972500 }, { "epoch": 3.76, "learning_rate": 4.060558961832759e-05, "loss": 2.5594, "step": 1973000 }, { "epoch": 3.76, "learning_rate": 4.06032079764046e-05, "loss": 2.5507, "step": 1973500 }, { "epoch": 3.76, "learning_rate": 4.0600826334481605e-05, "loss": 2.5859, "step": 1974000 }, { "epoch": 3.76, "learning_rate": 4.059844469255861e-05, "loss": 2.5486, "step": 1974500 }, { "epoch": 3.76, "learning_rate": 4.059606305063561e-05, "loss": 2.5383, "step": 1975000 }, { "epoch": 3.76, "learning_rate": 4.059368617199646e-05, "loss": 2.5514, "step": 1975500 }, { "epoch": 3.76, "learning_rate": 4.059130453007347e-05, "loss": 2.558, "step": 1976000 }, { "epoch": 3.77, "learning_rate": 4.0588922888150475e-05, "loss": 2.5685, "step": 1976500 }, { "epoch": 3.77, "learning_rate": 4.0586541246227484e-05, "loss": 2.5779, "step": 1977000 }, { "epoch": 3.77, "learning_rate": 4.0584164367588337e-05, "loss": 2.5718, "step": 1977500 }, { "epoch": 3.77, "learning_rate": 4.058178272566533e-05, "loss": 2.5759, "step": 1978000 }, { "epoch": 3.77, "learning_rate": 4.0579405847026185e-05, "loss": 2.5666, "step": 1978500 }, { "epoch": 3.77, "learning_rate": 4.0577024205103194e-05, "loss": 2.5696, "step": 1979000 }, { "epoch": 3.77, "learning_rate": 4.05746425631802e-05, "loss": 2.5726, "step": 1979500 }, { "epoch": 3.77, "learning_rate": 4.0572260921257206e-05, "loss": 2.5634, "step": 1980000 }, { "epoch": 3.77, "eval_accuracy": 0.534169780390026, "eval_loss": 2.4602863788604736, "eval_runtime": 4201.9967, "eval_samples_per_second": 65.442, "eval_steps_per_second": 6.544, "step": 1980000 }, { "epoch": 3.77, "learning_rate": 4.056987927933421e-05, "loss": 2.5507, "step": 1980500 }, { "epoch": 3.77, "learning_rate": 4.056749763741122e-05, "loss": 2.5702, "step": 1981000 }, { "epoch": 3.78, "learning_rate": 4.056511599548822e-05, "loss": 2.5477, "step": 1981500 }, { "epoch": 3.78, "learning_rate": 4.0562734353565223e-05, "loss": 2.5377, "step": 1982000 }, { "epoch": 3.78, "learning_rate": 4.0560357474926076e-05, "loss": 2.5737, "step": 1982500 }, { "epoch": 3.78, "learning_rate": 4.055797583300308e-05, "loss": 2.5561, "step": 1983000 }, { "epoch": 3.78, "learning_rate": 4.055559419108009e-05, "loss": 2.5341, "step": 1983500 }, { "epoch": 3.78, "learning_rate": 4.055321254915709e-05, "loss": 2.5541, "step": 1984000 }, { "epoch": 3.78, "learning_rate": 4.055083090723409e-05, "loss": 2.564, "step": 1984500 }, { "epoch": 3.78, "learning_rate": 4.05484492653111e-05, "loss": 2.562, "step": 1985000 }, { "epoch": 3.78, "learning_rate": 4.0546067623388105e-05, "loss": 2.5546, "step": 1985500 }, { "epoch": 3.78, "learning_rate": 4.0543685981465114e-05, "loss": 2.5433, "step": 1986000 }, { "epoch": 3.78, "learning_rate": 4.054130910282597e-05, "loss": 2.5577, "step": 1986500 }, { "epoch": 3.79, "learning_rate": 4.053892746090297e-05, "loss": 2.5523, "step": 1987000 }, { "epoch": 3.79, "learning_rate": 4.053654581897997e-05, "loss": 2.5455, "step": 1987500 }, { "epoch": 3.79, "learning_rate": 4.0534164177056975e-05, "loss": 2.5332, "step": 1988000 }, { "epoch": 3.79, "learning_rate": 4.0531782535133984e-05, "loss": 2.58, "step": 1988500 }, { "epoch": 3.79, "learning_rate": 4.0529405656494836e-05, "loss": 2.5591, "step": 1989000 }, { "epoch": 3.79, "learning_rate": 4.052702401457184e-05, "loss": 2.5665, "step": 1989500 }, { "epoch": 3.79, "learning_rate": 4.052464237264885e-05, "loss": 2.562, "step": 1990000 }, { "epoch": 3.79, "learning_rate": 4.052226073072585e-05, "loss": 2.5511, "step": 1990500 }, { "epoch": 3.79, "learning_rate": 4.0519883852086696e-05, "loss": 2.5611, "step": 1991000 }, { "epoch": 3.79, "learning_rate": 4.0517502210163706e-05, "loss": 2.5573, "step": 1991500 }, { "epoch": 3.8, "learning_rate": 4.051512056824071e-05, "loss": 2.5532, "step": 1992000 }, { "epoch": 3.8, "learning_rate": 4.051273892631772e-05, "loss": 2.5669, "step": 1992500 }, { "epoch": 3.8, "learning_rate": 4.051036204767857e-05, "loss": 2.5589, "step": 1993000 }, { "epoch": 3.8, "learning_rate": 4.050798040575557e-05, "loss": 2.5575, "step": 1993500 }, { "epoch": 3.8, "learning_rate": 4.0505603527116425e-05, "loss": 2.5648, "step": 1994000 }, { "epoch": 3.8, "learning_rate": 4.0503221885193434e-05, "loss": 2.5505, "step": 1994500 }, { "epoch": 3.8, "learning_rate": 4.050084024327043e-05, "loss": 2.561, "step": 1995000 }, { "epoch": 3.8, "learning_rate": 4.049845860134744e-05, "loss": 2.5615, "step": 1995500 }, { "epoch": 3.8, "learning_rate": 4.049607695942444e-05, "loss": 2.5433, "step": 1996000 }, { "epoch": 3.8, "learning_rate": 4.049369531750145e-05, "loss": 2.5689, "step": 1996500 }, { "epoch": 3.8, "learning_rate": 4.0491313675578454e-05, "loss": 2.5577, "step": 1997000 }, { "epoch": 3.81, "learning_rate": 4.048893203365546e-05, "loss": 2.5588, "step": 1997500 }, { "epoch": 3.81, "learning_rate": 4.048655515501631e-05, "loss": 2.5494, "step": 1998000 }, { "epoch": 3.81, "learning_rate": 4.048417351309332e-05, "loss": 2.5488, "step": 1998500 }, { "epoch": 3.81, "learning_rate": 4.048179187117032e-05, "loss": 2.5619, "step": 1999000 }, { "epoch": 3.81, "learning_rate": 4.047941022924733e-05, "loss": 2.5548, "step": 1999500 }, { "epoch": 3.81, "learning_rate": 4.0477033350608176e-05, "loss": 2.5509, "step": 2000000 }, { "epoch": 3.81, "learning_rate": 4.0474651708685186e-05, "loss": 2.5825, "step": 2000500 }, { "epoch": 3.81, "learning_rate": 4.047227006676219e-05, "loss": 2.5715, "step": 2001000 }, { "epoch": 3.81, "learning_rate": 4.046988842483919e-05, "loss": 2.571, "step": 2001500 }, { "epoch": 3.81, "learning_rate": 4.04675067829162e-05, "loss": 2.5549, "step": 2002000 }, { "epoch": 3.82, "learning_rate": 4.046512990427705e-05, "loss": 2.5647, "step": 2002500 }, { "epoch": 3.82, "learning_rate": 4.0462748262354055e-05, "loss": 2.5425, "step": 2003000 }, { "epoch": 3.82, "learning_rate": 4.0460366620431065e-05, "loss": 2.5443, "step": 2003500 }, { "epoch": 3.82, "learning_rate": 4.045798974179191e-05, "loss": 2.5683, "step": 2004000 }, { "epoch": 3.82, "learning_rate": 4.045560809986892e-05, "loss": 2.5724, "step": 2004500 }, { "epoch": 3.82, "learning_rate": 4.045322645794592e-05, "loss": 2.5546, "step": 2005000 }, { "epoch": 3.82, "learning_rate": 4.0450844816022925e-05, "loss": 2.569, "step": 2005500 }, { "epoch": 3.82, "learning_rate": 4.0448463174099934e-05, "loss": 2.5496, "step": 2006000 }, { "epoch": 3.82, "learning_rate": 4.044608153217694e-05, "loss": 2.5603, "step": 2006500 }, { "epoch": 3.82, "learning_rate": 4.0443699890253946e-05, "loss": 2.5439, "step": 2007000 }, { "epoch": 3.82, "learning_rate": 4.044131824833094e-05, "loss": 2.5465, "step": 2007500 }, { "epoch": 3.83, "learning_rate": 4.043893660640795e-05, "loss": 2.5552, "step": 2008000 }, { "epoch": 3.83, "learning_rate": 4.0436559727768804e-05, "loss": 2.5607, "step": 2008500 }, { "epoch": 3.83, "learning_rate": 4.0434178085845806e-05, "loss": 2.5559, "step": 2009000 }, { "epoch": 3.83, "learning_rate": 4.0431796443922816e-05, "loss": 2.5418, "step": 2009500 }, { "epoch": 3.83, "learning_rate": 4.042941480199982e-05, "loss": 2.5855, "step": 2010000 }, { "epoch": 3.83, "eval_accuracy": 0.5342376088185758, "eval_loss": 2.4602694511413574, "eval_runtime": 4196.8446, "eval_samples_per_second": 65.523, "eval_steps_per_second": 6.552, "step": 2010000 }, { "epoch": 3.83, "learning_rate": 4.042703792336067e-05, "loss": 2.5732, "step": 2010500 }, { "epoch": 3.83, "learning_rate": 4.042465628143767e-05, "loss": 2.5594, "step": 2011000 }, { "epoch": 3.83, "learning_rate": 4.0422274639514676e-05, "loss": 2.5673, "step": 2011500 }, { "epoch": 3.83, "learning_rate": 4.0419892997591685e-05, "loss": 2.5535, "step": 2012000 }, { "epoch": 3.83, "learning_rate": 4.041751135566869e-05, "loss": 2.5497, "step": 2012500 }, { "epoch": 3.84, "learning_rate": 4.041513447702954e-05, "loss": 2.5667, "step": 2013000 }, { "epoch": 3.84, "learning_rate": 4.041275283510655e-05, "loss": 2.5502, "step": 2013500 }, { "epoch": 3.84, "learning_rate": 4.041037119318355e-05, "loss": 2.5574, "step": 2014000 }, { "epoch": 3.84, "learning_rate": 4.0407989551260555e-05, "loss": 2.56, "step": 2014500 }, { "epoch": 3.84, "learning_rate": 4.040561267262141e-05, "loss": 2.5457, "step": 2015000 }, { "epoch": 3.84, "learning_rate": 4.040323103069841e-05, "loss": 2.5661, "step": 2015500 }, { "epoch": 3.84, "learning_rate": 4.040084938877542e-05, "loss": 2.5738, "step": 2016000 }, { "epoch": 3.84, "learning_rate": 4.039846774685243e-05, "loss": 2.5624, "step": 2016500 }, { "epoch": 3.84, "learning_rate": 4.0396090868213274e-05, "loss": 2.5715, "step": 2017000 }, { "epoch": 3.84, "learning_rate": 4.0393709226290284e-05, "loss": 2.5609, "step": 2017500 }, { "epoch": 3.84, "learning_rate": 4.0391327584367286e-05, "loss": 2.547, "step": 2018000 }, { "epoch": 3.85, "learning_rate": 4.038894594244429e-05, "loss": 2.5744, "step": 2018500 }, { "epoch": 3.85, "learning_rate": 4.03865643005213e-05, "loss": 2.5725, "step": 2019000 }, { "epoch": 3.85, "learning_rate": 4.0384187421882144e-05, "loss": 2.5456, "step": 2019500 }, { "epoch": 3.85, "learning_rate": 4.038180577995915e-05, "loss": 2.5688, "step": 2020000 }, { "epoch": 3.85, "learning_rate": 4.037942413803616e-05, "loss": 2.5514, "step": 2020500 }, { "epoch": 3.85, "learning_rate": 4.037704249611316e-05, "loss": 2.5611, "step": 2021000 }, { "epoch": 3.85, "learning_rate": 4.037466085419017e-05, "loss": 2.5876, "step": 2021500 }, { "epoch": 3.85, "learning_rate": 4.037228873883486e-05, "loss": 2.5581, "step": 2022000 }, { "epoch": 3.85, "learning_rate": 4.0369907096911866e-05, "loss": 2.5743, "step": 2022500 }, { "epoch": 3.85, "learning_rate": 4.0367525454988875e-05, "loss": 2.5466, "step": 2023000 }, { "epoch": 3.86, "learning_rate": 4.0365143813065884e-05, "loss": 2.5554, "step": 2023500 }, { "epoch": 3.86, "learning_rate": 4.036276217114289e-05, "loss": 2.5524, "step": 2024000 }, { "epoch": 3.86, "learning_rate": 4.036038052921989e-05, "loss": 2.5633, "step": 2024500 }, { "epoch": 3.86, "learning_rate": 4.035799888729689e-05, "loss": 2.5579, "step": 2025000 }, { "epoch": 3.86, "learning_rate": 4.03556172453739e-05, "loss": 2.5566, "step": 2025500 }, { "epoch": 3.86, "learning_rate": 4.0353235603450904e-05, "loss": 2.5836, "step": 2026000 }, { "epoch": 3.86, "learning_rate": 4.0350858724811757e-05, "loss": 2.5658, "step": 2026500 }, { "epoch": 3.86, "learning_rate": 4.0348477082888766e-05, "loss": 2.5557, "step": 2027000 }, { "epoch": 3.86, "learning_rate": 4.034609544096577e-05, "loss": 2.565, "step": 2027500 }, { "epoch": 3.86, "learning_rate": 4.034371379904277e-05, "loss": 2.5582, "step": 2028000 }, { "epoch": 3.86, "learning_rate": 4.0341336920403623e-05, "loss": 2.5499, "step": 2028500 }, { "epoch": 3.87, "learning_rate": 4.0338955278480626e-05, "loss": 2.5762, "step": 2029000 }, { "epoch": 3.87, "learning_rate": 4.0336573636557636e-05, "loss": 2.567, "step": 2029500 }, { "epoch": 3.87, "learning_rate": 4.033419199463464e-05, "loss": 2.5549, "step": 2030000 }, { "epoch": 3.87, "learning_rate": 4.033181035271165e-05, "loss": 2.5668, "step": 2030500 }, { "epoch": 3.87, "learning_rate": 4.032942871078865e-05, "loss": 2.5484, "step": 2031000 }, { "epoch": 3.87, "learning_rate": 4.0327051832149496e-05, "loss": 2.5409, "step": 2031500 }, { "epoch": 3.87, "learning_rate": 4.0324670190226505e-05, "loss": 2.5426, "step": 2032000 }, { "epoch": 3.87, "learning_rate": 4.032228854830351e-05, "loss": 2.551, "step": 2032500 }, { "epoch": 3.87, "learning_rate": 4.031991166966436e-05, "loss": 2.5549, "step": 2033000 }, { "epoch": 3.87, "learning_rate": 4.031753002774137e-05, "loss": 2.5673, "step": 2033500 }, { "epoch": 3.88, "learning_rate": 4.031514838581837e-05, "loss": 2.5706, "step": 2034000 }, { "epoch": 3.88, "learning_rate": 4.0312766743895375e-05, "loss": 2.5498, "step": 2034500 }, { "epoch": 3.88, "learning_rate": 4.0310385101972384e-05, "loss": 2.5551, "step": 2035000 }, { "epoch": 3.88, "learning_rate": 4.030800346004939e-05, "loss": 2.5598, "step": 2035500 }, { "epoch": 3.88, "learning_rate": 4.0305621818126396e-05, "loss": 2.5504, "step": 2036000 }, { "epoch": 3.88, "learning_rate": 4.03032401762034e-05, "loss": 2.5584, "step": 2036500 }, { "epoch": 3.88, "learning_rate": 4.030086329756425e-05, "loss": 2.5642, "step": 2037000 }, { "epoch": 3.88, "learning_rate": 4.0298481655641254e-05, "loss": 2.5559, "step": 2037500 }, { "epoch": 3.88, "learning_rate": 4.0296100013718256e-05, "loss": 2.5562, "step": 2038000 }, { "epoch": 3.88, "learning_rate": 4.0293718371795266e-05, "loss": 2.5571, "step": 2038500 }, { "epoch": 3.88, "learning_rate": 4.029133672987227e-05, "loss": 2.5647, "step": 2039000 }, { "epoch": 3.89, "learning_rate": 4.028895508794928e-05, "loss": 2.5712, "step": 2039500 }, { "epoch": 3.89, "learning_rate": 4.028657344602628e-05, "loss": 2.5532, "step": 2040000 }, { "epoch": 3.89, "eval_accuracy": 0.5345378440827298, "eval_loss": 2.4579551219940186, "eval_runtime": 4201.2329, "eval_samples_per_second": 65.454, "eval_steps_per_second": 6.545, "step": 2040000 }, { "epoch": 3.89, "learning_rate": 4.028419180410328e-05, "loss": 2.5579, "step": 2040500 }, { "epoch": 3.89, "learning_rate": 4.0281814925464135e-05, "loss": 2.5474, "step": 2041000 }, { "epoch": 3.89, "learning_rate": 4.027943328354114e-05, "loss": 2.5588, "step": 2041500 }, { "epoch": 3.89, "learning_rate": 4.027705164161815e-05, "loss": 2.5482, "step": 2042000 }, { "epoch": 3.89, "learning_rate": 4.027466999969515e-05, "loss": 2.5509, "step": 2042500 }, { "epoch": 3.89, "learning_rate": 4.0272293121056e-05, "loss": 2.5531, "step": 2043000 }, { "epoch": 3.89, "learning_rate": 4.026991147913301e-05, "loss": 2.5488, "step": 2043500 }, { "epoch": 3.89, "learning_rate": 4.026752983721001e-05, "loss": 2.5507, "step": 2044000 }, { "epoch": 3.9, "learning_rate": 4.026514819528702e-05, "loss": 2.5428, "step": 2044500 }, { "epoch": 3.9, "learning_rate": 4.026277131664787e-05, "loss": 2.5536, "step": 2045000 }, { "epoch": 3.9, "learning_rate": 4.026038967472487e-05, "loss": 2.5796, "step": 2045500 }, { "epoch": 3.9, "learning_rate": 4.025800803280188e-05, "loss": 2.5455, "step": 2046000 }, { "epoch": 3.9, "learning_rate": 4.0255626390878884e-05, "loss": 2.537, "step": 2046500 }, { "epoch": 3.9, "learning_rate": 4.025324474895589e-05, "loss": 2.5495, "step": 2047000 }, { "epoch": 3.9, "learning_rate": 4.0250863107032896e-05, "loss": 2.5528, "step": 2047500 }, { "epoch": 3.9, "learning_rate": 4.02484814651099e-05, "loss": 2.566, "step": 2048000 }, { "epoch": 3.9, "learning_rate": 4.024609982318691e-05, "loss": 2.5606, "step": 2048500 }, { "epoch": 3.9, "learning_rate": 4.024372294454776e-05, "loss": 2.5668, "step": 2049000 }, { "epoch": 3.9, "learning_rate": 4.024134130262476e-05, "loss": 2.5715, "step": 2049500 }, { "epoch": 3.91, "learning_rate": 4.023895966070177e-05, "loss": 2.5741, "step": 2050000 }, { "epoch": 3.91, "learning_rate": 4.023657801877877e-05, "loss": 2.5639, "step": 2050500 }, { "epoch": 3.91, "learning_rate": 4.023419637685578e-05, "loss": 2.544, "step": 2051000 }, { "epoch": 3.91, "learning_rate": 4.023181949821663e-05, "loss": 2.5693, "step": 2051500 }, { "epoch": 3.91, "learning_rate": 4.022944261957748e-05, "loss": 2.548, "step": 2052000 }, { "epoch": 3.91, "learning_rate": 4.0227060977654485e-05, "loss": 2.5511, "step": 2052500 }, { "epoch": 3.91, "learning_rate": 4.0224679335731494e-05, "loss": 2.5755, "step": 2053000 }, { "epoch": 3.91, "learning_rate": 4.02222976938085e-05, "loss": 2.5345, "step": 2053500 }, { "epoch": 3.91, "learning_rate": 4.02199160518855e-05, "loss": 2.5562, "step": 2054000 }, { "epoch": 3.91, "learning_rate": 4.02175344099625e-05, "loss": 2.5601, "step": 2054500 }, { "epoch": 3.92, "learning_rate": 4.021515276803951e-05, "loss": 2.5557, "step": 2055000 }, { "epoch": 3.92, "learning_rate": 4.0212771126116514e-05, "loss": 2.5518, "step": 2055500 }, { "epoch": 3.92, "learning_rate": 4.0210394247477366e-05, "loss": 2.5367, "step": 2056000 }, { "epoch": 3.92, "learning_rate": 4.0208012605554376e-05, "loss": 2.5636, "step": 2056500 }, { "epoch": 3.92, "learning_rate": 4.020563096363137e-05, "loss": 2.5677, "step": 2057000 }, { "epoch": 3.92, "learning_rate": 4.020324932170838e-05, "loss": 2.5752, "step": 2057500 }, { "epoch": 3.92, "learning_rate": 4.0200867679785383e-05, "loss": 2.5591, "step": 2058000 }, { "epoch": 3.92, "learning_rate": 4.0198490801146236e-05, "loss": 2.5527, "step": 2058500 }, { "epoch": 3.92, "learning_rate": 4.0196109159223245e-05, "loss": 2.5638, "step": 2059000 }, { "epoch": 3.92, "learning_rate": 4.01937322805841e-05, "loss": 2.5597, "step": 2059500 }, { "epoch": 3.92, "learning_rate": 4.01913506386611e-05, "loss": 2.5684, "step": 2060000 }, { "epoch": 3.93, "learning_rate": 4.018896899673811e-05, "loss": 2.5671, "step": 2060500 }, { "epoch": 3.93, "learning_rate": 4.0186587354815105e-05, "loss": 2.543, "step": 2061000 }, { "epoch": 3.93, "learning_rate": 4.0184205712892115e-05, "loss": 2.5469, "step": 2061500 }, { "epoch": 3.93, "learning_rate": 4.018182407096912e-05, "loss": 2.5534, "step": 2062000 }, { "epoch": 3.93, "learning_rate": 4.017944242904613e-05, "loss": 2.5676, "step": 2062500 }, { "epoch": 3.93, "learning_rate": 4.0177060787123136e-05, "loss": 2.5815, "step": 2063000 }, { "epoch": 3.93, "learning_rate": 4.017468390848398e-05, "loss": 2.5519, "step": 2063500 }, { "epoch": 3.93, "learning_rate": 4.0172302266560984e-05, "loss": 2.5432, "step": 2064000 }, { "epoch": 3.93, "learning_rate": 4.0169925387921837e-05, "loss": 2.56, "step": 2064500 }, { "epoch": 3.93, "learning_rate": 4.016754374599884e-05, "loss": 2.5607, "step": 2065000 }, { "epoch": 3.94, "learning_rate": 4.016516210407585e-05, "loss": 2.5658, "step": 2065500 }, { "epoch": 3.94, "learning_rate": 4.016278046215286e-05, "loss": 2.5764, "step": 2066000 }, { "epoch": 3.94, "learning_rate": 4.016039882022986e-05, "loss": 2.5617, "step": 2066500 }, { "epoch": 3.94, "learning_rate": 4.015801717830686e-05, "loss": 2.5536, "step": 2067000 }, { "epoch": 3.94, "learning_rate": 4.0155635536383866e-05, "loss": 2.5628, "step": 2067500 }, { "epoch": 3.94, "learning_rate": 4.0153253894460875e-05, "loss": 2.5494, "step": 2068000 }, { "epoch": 3.94, "learning_rate": 4.015087225253788e-05, "loss": 2.5617, "step": 2068500 }, { "epoch": 3.94, "learning_rate": 4.014849537389873e-05, "loss": 2.5503, "step": 2069000 }, { "epoch": 3.94, "learning_rate": 4.014611849525958e-05, "loss": 2.5487, "step": 2069500 }, { "epoch": 3.94, "learning_rate": 4.014373685333659e-05, "loss": 2.5783, "step": 2070000 }, { "epoch": 3.94, "eval_accuracy": 0.5348384454874497, "eval_loss": 2.4570472240448, "eval_runtime": 4197.8296, "eval_samples_per_second": 65.507, "eval_steps_per_second": 6.551, "step": 2070000 }, { "epoch": 3.94, "learning_rate": 4.0141355211413595e-05, "loss": 2.561, "step": 2070500 }, { "epoch": 3.95, "learning_rate": 4.01389735694906e-05, "loss": 2.5655, "step": 2071000 }, { "epoch": 3.95, "learning_rate": 4.01365919275676e-05, "loss": 2.5733, "step": 2071500 }, { "epoch": 3.95, "learning_rate": 4.013421504892845e-05, "loss": 2.5697, "step": 2072000 }, { "epoch": 3.95, "learning_rate": 4.013183340700546e-05, "loss": 2.5609, "step": 2072500 }, { "epoch": 3.95, "learning_rate": 4.0129451765082464e-05, "loss": 2.5491, "step": 2073000 }, { "epoch": 3.95, "learning_rate": 4.0127074886443316e-05, "loss": 2.5486, "step": 2073500 }, { "epoch": 3.95, "learning_rate": 4.0124693244520326e-05, "loss": 2.5618, "step": 2074000 }, { "epoch": 3.95, "learning_rate": 4.012231160259732e-05, "loss": 2.5543, "step": 2074500 }, { "epoch": 3.95, "learning_rate": 4.011992996067433e-05, "loss": 2.5579, "step": 2075000 }, { "epoch": 3.95, "learning_rate": 4.0117548318751334e-05, "loss": 2.5581, "step": 2075500 }, { "epoch": 3.96, "learning_rate": 4.011516667682834e-05, "loss": 2.5598, "step": 2076000 }, { "epoch": 3.96, "learning_rate": 4.0112785034905346e-05, "loss": 2.5382, "step": 2076500 }, { "epoch": 3.96, "learning_rate": 4.011040339298235e-05, "loss": 2.5628, "step": 2077000 }, { "epoch": 3.96, "learning_rate": 4.010802175105936e-05, "loss": 2.5455, "step": 2077500 }, { "epoch": 3.96, "learning_rate": 4.010564010913636e-05, "loss": 2.5593, "step": 2078000 }, { "epoch": 3.96, "learning_rate": 4.010325846721337e-05, "loss": 2.5677, "step": 2078500 }, { "epoch": 3.96, "learning_rate": 4.010087682529037e-05, "loss": 2.5685, "step": 2079000 }, { "epoch": 3.96, "learning_rate": 4.0098499946651225e-05, "loss": 2.5591, "step": 2079500 }, { "epoch": 3.96, "learning_rate": 4.0096118304728234e-05, "loss": 2.5517, "step": 2080000 }, { "epoch": 3.96, "learning_rate": 4.009373666280523e-05, "loss": 2.5481, "step": 2080500 }, { "epoch": 3.96, "learning_rate": 4.009135502088224e-05, "loss": 2.5712, "step": 2081000 }, { "epoch": 3.97, "learning_rate": 4.008897337895924e-05, "loss": 2.5581, "step": 2081500 }, { "epoch": 3.97, "learning_rate": 4.0086596500320094e-05, "loss": 2.5535, "step": 2082000 }, { "epoch": 3.97, "learning_rate": 4.0084214858397104e-05, "loss": 2.5534, "step": 2082500 }, { "epoch": 3.97, "learning_rate": 4.0081833216474106e-05, "loss": 2.5657, "step": 2083000 }, { "epoch": 3.97, "learning_rate": 4.007945157455111e-05, "loss": 2.5657, "step": 2083500 }, { "epoch": 3.97, "learning_rate": 4.007707469591196e-05, "loss": 2.5574, "step": 2084000 }, { "epoch": 3.97, "learning_rate": 4.0074693053988964e-05, "loss": 2.533, "step": 2084500 }, { "epoch": 3.97, "learning_rate": 4.007231141206597e-05, "loss": 2.559, "step": 2085000 }, { "epoch": 3.97, "learning_rate": 4.0069929770142976e-05, "loss": 2.5634, "step": 2085500 }, { "epoch": 3.97, "learning_rate": 4.006755289150383e-05, "loss": 2.5484, "step": 2086000 }, { "epoch": 3.98, "learning_rate": 4.006517124958084e-05, "loss": 2.5743, "step": 2086500 }, { "epoch": 3.98, "learning_rate": 4.006278960765783e-05, "loss": 2.5662, "step": 2087000 }, { "epoch": 3.98, "learning_rate": 4.006040796573484e-05, "loss": 2.5481, "step": 2087500 }, { "epoch": 3.98, "learning_rate": 4.0058031087095695e-05, "loss": 2.5465, "step": 2088000 }, { "epoch": 3.98, "learning_rate": 4.00556494451727e-05, "loss": 2.5526, "step": 2088500 }, { "epoch": 3.98, "learning_rate": 4.005326780324971e-05, "loss": 2.5645, "step": 2089000 }, { "epoch": 3.98, "learning_rate": 4.005089092461056e-05, "loss": 2.5408, "step": 2089500 }, { "epoch": 3.98, "learning_rate": 4.004850928268756e-05, "loss": 2.563, "step": 2090000 }, { "epoch": 3.98, "learning_rate": 4.0046127640764565e-05, "loss": 2.5633, "step": 2090500 }, { "epoch": 3.98, "learning_rate": 4.004374599884157e-05, "loss": 2.5777, "step": 2091000 }, { "epoch": 3.98, "learning_rate": 4.004136435691858e-05, "loss": 2.5592, "step": 2091500 }, { "epoch": 3.99, "learning_rate": 4.003898271499558e-05, "loss": 2.5715, "step": 2092000 }, { "epoch": 3.99, "learning_rate": 4.003660107307259e-05, "loss": 2.5662, "step": 2092500 }, { "epoch": 3.99, "learning_rate": 4.003421943114959e-05, "loss": 2.5508, "step": 2093000 }, { "epoch": 3.99, "learning_rate": 4.0031837789226594e-05, "loss": 2.552, "step": 2093500 }, { "epoch": 3.99, "learning_rate": 4.0029460910587446e-05, "loss": 2.5593, "step": 2094000 }, { "epoch": 3.99, "learning_rate": 4.0027079268664456e-05, "loss": 2.5507, "step": 2094500 }, { "epoch": 3.99, "learning_rate": 4.00247023900253e-05, "loss": 2.5369, "step": 2095000 }, { "epoch": 3.99, "learning_rate": 4.002232074810231e-05, "loss": 2.5378, "step": 2095500 }, { "epoch": 3.99, "learning_rate": 4.001993910617931e-05, "loss": 2.5591, "step": 2096000 }, { "epoch": 3.99, "learning_rate": 4.001755746425632e-05, "loss": 2.561, "step": 2096500 }, { "epoch": 4.0, "learning_rate": 4.0015175822333325e-05, "loss": 2.5617, "step": 2097000 }, { "epoch": 4.0, "learning_rate": 4.001279418041033e-05, "loss": 2.5487, "step": 2097500 }, { "epoch": 4.0, "learning_rate": 4.001041253848734e-05, "loss": 2.5528, "step": 2098000 }, { "epoch": 4.0, "learning_rate": 4.000803089656434e-05, "loss": 2.5384, "step": 2098500 }, { "epoch": 4.0, "learning_rate": 4.000565401792519e-05, "loss": 2.5479, "step": 2099000 }, { "epoch": 4.0, "learning_rate": 4.00032723760022e-05, "loss": 2.5395, "step": 2099500 }, { "epoch": 4.0, "learning_rate": 4.00008907340792e-05, "loss": 2.5448, "step": 2100000 }, { "epoch": 4.0, "eval_accuracy": 0.5348913652409124, "eval_loss": 2.4556870460510254, "eval_runtime": 4202.8418, "eval_samples_per_second": 65.429, "eval_steps_per_second": 6.543, "step": 2100000 }, { "epoch": 4.0, "learning_rate": 3.999850909215621e-05, "loss": 2.5233, "step": 2100500 }, { "epoch": 4.0, "learning_rate": 3.999613221351706e-05, "loss": 2.5387, "step": 2101000 }, { "epoch": 4.0, "learning_rate": 3.999375057159406e-05, "loss": 2.5537, "step": 2101500 }, { "epoch": 4.0, "learning_rate": 3.999136892967107e-05, "loss": 2.5457, "step": 2102000 }, { "epoch": 4.01, "learning_rate": 3.9988987287748074e-05, "loss": 2.5687, "step": 2102500 }, { "epoch": 4.01, "learning_rate": 3.9986610409108926e-05, "loss": 2.543, "step": 2103000 }, { "epoch": 4.01, "learning_rate": 3.9984228767185935e-05, "loss": 2.5527, "step": 2103500 }, { "epoch": 4.01, "learning_rate": 3.998184712526293e-05, "loss": 2.5494, "step": 2104000 }, { "epoch": 4.01, "learning_rate": 3.997946548333994e-05, "loss": 2.5593, "step": 2104500 }, { "epoch": 4.01, "learning_rate": 3.997708860470079e-05, "loss": 2.5651, "step": 2105000 }, { "epoch": 4.01, "learning_rate": 3.9974706962777796e-05, "loss": 2.5398, "step": 2105500 }, { "epoch": 4.01, "learning_rate": 3.9972325320854805e-05, "loss": 2.5356, "step": 2106000 }, { "epoch": 4.01, "learning_rate": 3.996994367893181e-05, "loss": 2.5393, "step": 2106500 }, { "epoch": 4.01, "learning_rate": 3.996756203700881e-05, "loss": 2.5598, "step": 2107000 }, { "epoch": 4.02, "learning_rate": 3.996518515836966e-05, "loss": 2.559, "step": 2107500 }, { "epoch": 4.02, "learning_rate": 3.9962803516446665e-05, "loss": 2.5623, "step": 2108000 }, { "epoch": 4.02, "learning_rate": 3.9960421874523675e-05, "loss": 2.5335, "step": 2108500 }, { "epoch": 4.02, "learning_rate": 3.995804499588453e-05, "loss": 2.5585, "step": 2109000 }, { "epoch": 4.02, "learning_rate": 3.995566335396153e-05, "loss": 2.5383, "step": 2109500 }, { "epoch": 4.02, "learning_rate": 3.995328171203854e-05, "loss": 2.5314, "step": 2110000 }, { "epoch": 4.02, "learning_rate": 3.9950900070115535e-05, "loss": 2.5612, "step": 2110500 }, { "epoch": 4.02, "learning_rate": 3.9948518428192544e-05, "loss": 2.583, "step": 2111000 }, { "epoch": 4.02, "learning_rate": 3.9946136786269554e-05, "loss": 2.5565, "step": 2111500 }, { "epoch": 4.02, "learning_rate": 3.9943755144346556e-05, "loss": 2.5521, "step": 2112000 }, { "epoch": 4.02, "learning_rate": 3.9941373502423566e-05, "loss": 2.5381, "step": 2112500 }, { "epoch": 4.03, "learning_rate": 3.993899186050057e-05, "loss": 2.5676, "step": 2113000 }, { "epoch": 4.03, "learning_rate": 3.993661021857757e-05, "loss": 2.5449, "step": 2113500 }, { "epoch": 4.03, "learning_rate": 3.993423333993842e-05, "loss": 2.5602, "step": 2114000 }, { "epoch": 4.03, "learning_rate": 3.9931851698015426e-05, "loss": 2.5436, "step": 2114500 }, { "epoch": 4.03, "learning_rate": 3.9929470056092435e-05, "loss": 2.5575, "step": 2115000 }, { "epoch": 4.03, "learning_rate": 3.992708841416944e-05, "loss": 2.5518, "step": 2115500 }, { "epoch": 4.03, "learning_rate": 3.992470677224645e-05, "loss": 2.544, "step": 2116000 }, { "epoch": 4.03, "learning_rate": 3.99223298936073e-05, "loss": 2.5547, "step": 2116500 }, { "epoch": 4.03, "learning_rate": 3.9919953014968145e-05, "loss": 2.5522, "step": 2117000 }, { "epoch": 4.03, "learning_rate": 3.991757137304515e-05, "loss": 2.5319, "step": 2117500 }, { "epoch": 4.04, "learning_rate": 3.991518973112216e-05, "loss": 2.5485, "step": 2118000 }, { "epoch": 4.04, "learning_rate": 3.991280808919916e-05, "loss": 2.5582, "step": 2118500 }, { "epoch": 4.04, "learning_rate": 3.991042644727617e-05, "loss": 2.5609, "step": 2119000 }, { "epoch": 4.04, "learning_rate": 3.990804480535317e-05, "loss": 2.5251, "step": 2119500 }, { "epoch": 4.04, "learning_rate": 3.9905663163430174e-05, "loss": 2.5401, "step": 2120000 }, { "epoch": 4.04, "learning_rate": 3.990328152150718e-05, "loss": 2.5477, "step": 2120500 }, { "epoch": 4.04, "learning_rate": 3.9900899879584186e-05, "loss": 2.532, "step": 2121000 }, { "epoch": 4.04, "learning_rate": 3.989851823766119e-05, "loss": 2.5726, "step": 2121500 }, { "epoch": 4.04, "learning_rate": 3.989614135902204e-05, "loss": 2.56, "step": 2122000 }, { "epoch": 4.04, "learning_rate": 3.989375971709905e-05, "loss": 2.5381, "step": 2122500 }, { "epoch": 4.04, "learning_rate": 3.989137807517605e-05, "loss": 2.5613, "step": 2123000 }, { "epoch": 4.05, "learning_rate": 3.9888996433253056e-05, "loss": 2.55, "step": 2123500 }, { "epoch": 4.05, "learning_rate": 3.9886614791330065e-05, "loss": 2.5564, "step": 2124000 }, { "epoch": 4.05, "learning_rate": 3.988423791269091e-05, "loss": 2.5294, "step": 2124500 }, { "epoch": 4.05, "learning_rate": 3.988186103405176e-05, "loss": 2.534, "step": 2125000 }, { "epoch": 4.05, "learning_rate": 3.987947939212877e-05, "loss": 2.5466, "step": 2125500 }, { "epoch": 4.05, "learning_rate": 3.9877097750205775e-05, "loss": 2.5443, "step": 2126000 }, { "epoch": 4.05, "learning_rate": 3.9874716108282784e-05, "loss": 2.5667, "step": 2126500 }, { "epoch": 4.05, "learning_rate": 3.987233446635979e-05, "loss": 2.5558, "step": 2127000 }, { "epoch": 4.05, "learning_rate": 3.986995282443679e-05, "loss": 2.5752, "step": 2127500 }, { "epoch": 4.05, "learning_rate": 3.986757594579764e-05, "loss": 2.5497, "step": 2128000 }, { "epoch": 4.06, "learning_rate": 3.9865194303874645e-05, "loss": 2.5519, "step": 2128500 }, { "epoch": 4.06, "learning_rate": 3.9862812661951654e-05, "loss": 2.5349, "step": 2129000 }, { "epoch": 4.06, "learning_rate": 3.9860435783312506e-05, "loss": 2.5539, "step": 2129500 }, { "epoch": 4.06, "learning_rate": 3.985805414138951e-05, "loss": 2.5477, "step": 2130000 }, { "epoch": 4.06, "eval_accuracy": 0.5350866390245469, "eval_loss": 2.455536127090454, "eval_runtime": 4196.4346, "eval_samples_per_second": 65.529, "eval_steps_per_second": 6.553, "step": 2130000 }, { "epoch": 4.06, "learning_rate": 3.985567249946651e-05, "loss": 2.5472, "step": 2130500 }, { "epoch": 4.06, "learning_rate": 3.985329085754352e-05, "loss": 2.5525, "step": 2131000 }, { "epoch": 4.06, "learning_rate": 3.9850909215620524e-05, "loss": 2.5566, "step": 2131500 }, { "epoch": 4.06, "learning_rate": 3.984852757369753e-05, "loss": 2.5461, "step": 2132000 }, { "epoch": 4.06, "learning_rate": 3.9846145931774536e-05, "loss": 2.5484, "step": 2132500 }, { "epoch": 4.06, "learning_rate": 3.984376428985154e-05, "loss": 2.5663, "step": 2133000 }, { "epoch": 4.06, "learning_rate": 3.984138264792854e-05, "loss": 2.5584, "step": 2133500 }, { "epoch": 4.07, "learning_rate": 3.983900100600555e-05, "loss": 2.5542, "step": 2134000 }, { "epoch": 4.07, "learning_rate": 3.983661936408255e-05, "loss": 2.5489, "step": 2134500 }, { "epoch": 4.07, "learning_rate": 3.983423772215956e-05, "loss": 2.5673, "step": 2135000 }, { "epoch": 4.07, "learning_rate": 3.9831856080236565e-05, "loss": 2.5557, "step": 2135500 }, { "epoch": 4.07, "learning_rate": 3.982947920159742e-05, "loss": 2.5457, "step": 2136000 }, { "epoch": 4.07, "learning_rate": 3.982709755967442e-05, "loss": 2.5569, "step": 2136500 }, { "epoch": 4.07, "learning_rate": 3.982471591775143e-05, "loss": 2.5547, "step": 2137000 }, { "epoch": 4.07, "learning_rate": 3.982233427582843e-05, "loss": 2.5393, "step": 2137500 }, { "epoch": 4.07, "learning_rate": 3.9819957397189284e-05, "loss": 2.5603, "step": 2138000 }, { "epoch": 4.07, "learning_rate": 3.981757575526629e-05, "loss": 2.5548, "step": 2138500 }, { "epoch": 4.08, "learning_rate": 3.9815194113343296e-05, "loss": 2.5534, "step": 2139000 }, { "epoch": 4.08, "learning_rate": 3.98128124714203e-05, "loss": 2.532, "step": 2139500 }, { "epoch": 4.08, "learning_rate": 3.981043559278115e-05, "loss": 2.5497, "step": 2140000 }, { "epoch": 4.08, "learning_rate": 3.9808053950858154e-05, "loss": 2.5494, "step": 2140500 }, { "epoch": 4.08, "learning_rate": 3.980567230893516e-05, "loss": 2.543, "step": 2141000 }, { "epoch": 4.08, "learning_rate": 3.9803290667012166e-05, "loss": 2.5563, "step": 2141500 }, { "epoch": 4.08, "learning_rate": 3.9800909025089175e-05, "loss": 2.5623, "step": 2142000 }, { "epoch": 4.08, "learning_rate": 3.979852738316617e-05, "loss": 2.5445, "step": 2142500 }, { "epoch": 4.08, "learning_rate": 3.979614574124318e-05, "loss": 2.5431, "step": 2143000 }, { "epoch": 4.08, "learning_rate": 3.979376886260403e-05, "loss": 2.5596, "step": 2143500 }, { "epoch": 4.08, "learning_rate": 3.9791387220681035e-05, "loss": 2.5309, "step": 2144000 }, { "epoch": 4.09, "learning_rate": 3.9789005578758045e-05, "loss": 2.5477, "step": 2144500 }, { "epoch": 4.09, "learning_rate": 3.978662393683505e-05, "loss": 2.5282, "step": 2145000 }, { "epoch": 4.09, "learning_rate": 3.978424229491206e-05, "loss": 2.5296, "step": 2145500 }, { "epoch": 4.09, "learning_rate": 3.978186065298905e-05, "loss": 2.532, "step": 2146000 }, { "epoch": 4.09, "learning_rate": 3.9779483774349905e-05, "loss": 2.5414, "step": 2146500 }, { "epoch": 4.09, "learning_rate": 3.9777102132426914e-05, "loss": 2.5449, "step": 2147000 }, { "epoch": 4.09, "learning_rate": 3.977472049050392e-05, "loss": 2.5427, "step": 2147500 }, { "epoch": 4.09, "learning_rate": 3.9772338848580926e-05, "loss": 2.5339, "step": 2148000 }, { "epoch": 4.09, "learning_rate": 3.976995720665793e-05, "loss": 2.5498, "step": 2148500 }, { "epoch": 4.09, "learning_rate": 3.976757556473493e-05, "loss": 2.5537, "step": 2149000 }, { "epoch": 4.1, "learning_rate": 3.976519392281194e-05, "loss": 2.5615, "step": 2149500 }, { "epoch": 4.1, "learning_rate": 3.9762812280888944e-05, "loss": 2.5669, "step": 2150000 }, { "epoch": 4.1, "learning_rate": 3.9760435402249796e-05, "loss": 2.5464, "step": 2150500 }, { "epoch": 4.1, "learning_rate": 3.9758053760326805e-05, "loss": 2.5312, "step": 2151000 }, { "epoch": 4.1, "learning_rate": 3.975567688168765e-05, "loss": 2.5569, "step": 2151500 }, { "epoch": 4.1, "learning_rate": 3.975329523976466e-05, "loss": 2.5659, "step": 2152000 }, { "epoch": 4.1, "learning_rate": 3.975091359784166e-05, "loss": 2.5245, "step": 2152500 }, { "epoch": 4.1, "learning_rate": 3.9748531955918665e-05, "loss": 2.5395, "step": 2153000 }, { "epoch": 4.1, "learning_rate": 3.9746150313995675e-05, "loss": 2.5601, "step": 2153500 }, { "epoch": 4.1, "learning_rate": 3.974376867207268e-05, "loss": 2.5475, "step": 2154000 }, { "epoch": 4.1, "learning_rate": 3.974138703014969e-05, "loss": 2.5493, "step": 2154500 }, { "epoch": 4.11, "learning_rate": 3.973901015151054e-05, "loss": 2.5383, "step": 2155000 }, { "epoch": 4.11, "learning_rate": 3.973662850958754e-05, "loss": 2.5722, "step": 2155500 }, { "epoch": 4.11, "learning_rate": 3.9734246867664544e-05, "loss": 2.536, "step": 2156000 }, { "epoch": 4.11, "learning_rate": 3.973186522574155e-05, "loss": 2.5604, "step": 2156500 }, { "epoch": 4.11, "learning_rate": 3.9729483583818556e-05, "loss": 2.5512, "step": 2157000 }, { "epoch": 4.11, "learning_rate": 3.972710194189556e-05, "loss": 2.5687, "step": 2157500 }, { "epoch": 4.11, "learning_rate": 3.972472029997257e-05, "loss": 2.5453, "step": 2158000 }, { "epoch": 4.11, "learning_rate": 3.972233865804957e-05, "loss": 2.543, "step": 2158500 }, { "epoch": 4.11, "learning_rate": 3.9719957016126574e-05, "loss": 2.5445, "step": 2159000 }, { "epoch": 4.11, "learning_rate": 3.9717580137487426e-05, "loss": 2.5383, "step": 2159500 }, { "epoch": 4.12, "learning_rate": 3.971519849556443e-05, "loss": 2.5489, "step": 2160000 }, { "epoch": 4.12, "eval_accuracy": 0.5352041594819579, "eval_loss": 2.454031467437744, "eval_runtime": 4191.6239, "eval_samples_per_second": 65.604, "eval_steps_per_second": 6.56, "step": 2160000 }, { "epoch": 4.12, "learning_rate": 3.971281685364144e-05, "loss": 2.5489, "step": 2160500 }, { "epoch": 4.12, "learning_rate": 3.971043521171845e-05, "loss": 2.5225, "step": 2161000 }, { "epoch": 4.12, "learning_rate": 3.970805833307929e-05, "loss": 2.5621, "step": 2161500 }, { "epoch": 4.12, "learning_rate": 3.9705676691156296e-05, "loss": 2.5627, "step": 2162000 }, { "epoch": 4.12, "learning_rate": 3.9703295049233305e-05, "loss": 2.5609, "step": 2162500 }, { "epoch": 4.12, "learning_rate": 3.970091340731031e-05, "loss": 2.5541, "step": 2163000 }, { "epoch": 4.12, "learning_rate": 3.969853652867116e-05, "loss": 2.5702, "step": 2163500 }, { "epoch": 4.12, "learning_rate": 3.969615488674816e-05, "loss": 2.5467, "step": 2164000 }, { "epoch": 4.12, "learning_rate": 3.969377324482517e-05, "loss": 2.556, "step": 2164500 }, { "epoch": 4.13, "learning_rate": 3.9691396366186024e-05, "loss": 2.553, "step": 2165000 }, { "epoch": 4.13, "learning_rate": 3.968901472426303e-05, "loss": 2.5499, "step": 2165500 }, { "epoch": 4.13, "learning_rate": 3.968663308234003e-05, "loss": 2.5518, "step": 2166000 }, { "epoch": 4.13, "learning_rate": 3.968425144041704e-05, "loss": 2.554, "step": 2166500 }, { "epoch": 4.13, "learning_rate": 3.968186979849404e-05, "loss": 2.5487, "step": 2167000 }, { "epoch": 4.13, "learning_rate": 3.967948815657105e-05, "loss": 2.548, "step": 2167500 }, { "epoch": 4.13, "learning_rate": 3.9677106514648054e-05, "loss": 2.551, "step": 2168000 }, { "epoch": 4.13, "learning_rate": 3.9674724872725056e-05, "loss": 2.5737, "step": 2168500 }, { "epoch": 4.13, "learning_rate": 3.967234799408591e-05, "loss": 2.5529, "step": 2169000 }, { "epoch": 4.13, "learning_rate": 3.966996635216291e-05, "loss": 2.5564, "step": 2169500 }, { "epoch": 4.13, "learning_rate": 3.966758471023992e-05, "loss": 2.5577, "step": 2170000 }, { "epoch": 4.14, "learning_rate": 3.966520783160077e-05, "loss": 2.5661, "step": 2170500 }, { "epoch": 4.14, "learning_rate": 3.9662826189677775e-05, "loss": 2.5367, "step": 2171000 }, { "epoch": 4.14, "learning_rate": 3.9660444547754785e-05, "loss": 2.5549, "step": 2171500 }, { "epoch": 4.14, "learning_rate": 3.965806290583178e-05, "loss": 2.5685, "step": 2172000 }, { "epoch": 4.14, "learning_rate": 3.965568126390879e-05, "loss": 2.5613, "step": 2172500 }, { "epoch": 4.14, "learning_rate": 3.965329962198579e-05, "loss": 2.5367, "step": 2173000 }, { "epoch": 4.14, "learning_rate": 3.96509179800628e-05, "loss": 2.5642, "step": 2173500 }, { "epoch": 4.14, "learning_rate": 3.9648536338139805e-05, "loss": 2.5429, "step": 2174000 }, { "epoch": 4.14, "learning_rate": 3.964615945950066e-05, "loss": 2.5397, "step": 2174500 }, { "epoch": 4.14, "learning_rate": 3.964377781757766e-05, "loss": 2.5365, "step": 2175000 }, { "epoch": 4.15, "learning_rate": 3.964139617565467e-05, "loss": 2.5531, "step": 2175500 }, { "epoch": 4.15, "learning_rate": 3.963901453373167e-05, "loss": 2.5489, "step": 2176000 }, { "epoch": 4.15, "learning_rate": 3.9636637655092524e-05, "loss": 2.5306, "step": 2176500 }, { "epoch": 4.15, "learning_rate": 3.9634256013169527e-05, "loss": 2.5539, "step": 2177000 }, { "epoch": 4.15, "learning_rate": 3.9631874371246536e-05, "loss": 2.5412, "step": 2177500 }, { "epoch": 4.15, "learning_rate": 3.962949272932354e-05, "loss": 2.5678, "step": 2178000 }, { "epoch": 4.15, "learning_rate": 3.962711585068439e-05, "loss": 2.5542, "step": 2178500 }, { "epoch": 4.15, "learning_rate": 3.9624734208761394e-05, "loss": 2.5472, "step": 2179000 }, { "epoch": 4.15, "learning_rate": 3.96223525668384e-05, "loss": 2.5524, "step": 2179500 }, { "epoch": 4.15, "learning_rate": 3.9619970924915406e-05, "loss": 2.5348, "step": 2180000 }, { "epoch": 4.15, "learning_rate": 3.961759404627626e-05, "loss": 2.573, "step": 2180500 }, { "epoch": 4.16, "learning_rate": 3.961521240435326e-05, "loss": 2.5536, "step": 2181000 }, { "epoch": 4.16, "learning_rate": 3.961283076243027e-05, "loss": 2.5641, "step": 2181500 }, { "epoch": 4.16, "learning_rate": 3.961044912050727e-05, "loss": 2.5585, "step": 2182000 }, { "epoch": 4.16, "learning_rate": 3.9608067478584275e-05, "loss": 2.5613, "step": 2182500 }, { "epoch": 4.16, "learning_rate": 3.9605685836661285e-05, "loss": 2.524, "step": 2183000 }, { "epoch": 4.16, "learning_rate": 3.960330895802214e-05, "loss": 2.5555, "step": 2183500 }, { "epoch": 4.16, "learning_rate": 3.960092731609914e-05, "loss": 2.5644, "step": 2184000 }, { "epoch": 4.16, "learning_rate": 3.959854567417615e-05, "loss": 2.5416, "step": 2184500 }, { "epoch": 4.16, "learning_rate": 3.9596164032253145e-05, "loss": 2.5554, "step": 2185000 }, { "epoch": 4.16, "learning_rate": 3.9593782390330154e-05, "loss": 2.5454, "step": 2185500 }, { "epoch": 4.17, "learning_rate": 3.9591405511691006e-05, "loss": 2.5497, "step": 2186000 }, { "epoch": 4.17, "learning_rate": 3.958902386976801e-05, "loss": 2.5404, "step": 2186500 }, { "epoch": 4.17, "learning_rate": 3.958664222784502e-05, "loss": 2.5425, "step": 2187000 }, { "epoch": 4.17, "learning_rate": 3.958426058592202e-05, "loss": 2.5596, "step": 2187500 }, { "epoch": 4.17, "learning_rate": 3.958187894399903e-05, "loss": 2.5509, "step": 2188000 }, { "epoch": 4.17, "learning_rate": 3.9579506828643726e-05, "loss": 2.5476, "step": 2188500 }, { "epoch": 4.17, "learning_rate": 3.957712518672073e-05, "loss": 2.5207, "step": 2189000 }, { "epoch": 4.17, "learning_rate": 3.957474354479773e-05, "loss": 2.5442, "step": 2189500 }, { "epoch": 4.17, "learning_rate": 3.957236190287474e-05, "loss": 2.533, "step": 2190000 }, { "epoch": 4.17, "eval_accuracy": 0.5354514394450214, "eval_loss": 2.453537702560425, "eval_runtime": 4196.5361, "eval_samples_per_second": 65.527, "eval_steps_per_second": 6.553, "step": 2190000 }, { "epoch": 4.17, "learning_rate": 3.956998026095174e-05, "loss": 2.5426, "step": 2190500 }, { "epoch": 4.17, "learning_rate": 3.956759861902875e-05, "loss": 2.5399, "step": 2191000 }, { "epoch": 4.18, "learning_rate": 3.9565216977105755e-05, "loss": 2.5407, "step": 2191500 }, { "epoch": 4.18, "learning_rate": 3.956284009846661e-05, "loss": 2.5884, "step": 2192000 }, { "epoch": 4.18, "learning_rate": 3.956045845654361e-05, "loss": 2.5511, "step": 2192500 }, { "epoch": 4.18, "learning_rate": 3.955807681462061e-05, "loss": 2.5635, "step": 2193000 }, { "epoch": 4.18, "learning_rate": 3.955569517269762e-05, "loss": 2.5656, "step": 2193500 }, { "epoch": 4.18, "learning_rate": 3.9553313530774624e-05, "loss": 2.5474, "step": 2194000 }, { "epoch": 4.18, "learning_rate": 3.9550931888851634e-05, "loss": 2.5453, "step": 2194500 }, { "epoch": 4.18, "learning_rate": 3.9548555010212486e-05, "loss": 2.5433, "step": 2195000 }, { "epoch": 4.18, "learning_rate": 3.954617336828948e-05, "loss": 2.5644, "step": 2195500 }, { "epoch": 4.18, "learning_rate": 3.954379172636649e-05, "loss": 2.5499, "step": 2196000 }, { "epoch": 4.19, "learning_rate": 3.95414100844435e-05, "loss": 2.5375, "step": 2196500 }, { "epoch": 4.19, "learning_rate": 3.9539028442520503e-05, "loss": 2.56, "step": 2197000 }, { "epoch": 4.19, "learning_rate": 3.953664680059751e-05, "loss": 2.545, "step": 2197500 }, { "epoch": 4.19, "learning_rate": 3.9534265158674515e-05, "loss": 2.5594, "step": 2198000 }, { "epoch": 4.19, "learning_rate": 3.953188351675152e-05, "loss": 2.5563, "step": 2198500 }, { "epoch": 4.19, "learning_rate": 3.952950187482852e-05, "loss": 2.5544, "step": 2199000 }, { "epoch": 4.19, "learning_rate": 3.952712499618937e-05, "loss": 2.5535, "step": 2199500 }, { "epoch": 4.19, "learning_rate": 3.952474335426638e-05, "loss": 2.5385, "step": 2200000 }, { "epoch": 4.19, "learning_rate": 3.9522361712343385e-05, "loss": 2.554, "step": 2200500 }, { "epoch": 4.19, "learning_rate": 3.9519980070420394e-05, "loss": 2.5558, "step": 2201000 }, { "epoch": 4.19, "learning_rate": 3.951760319178125e-05, "loss": 2.5379, "step": 2201500 }, { "epoch": 4.2, "learning_rate": 3.951522154985824e-05, "loss": 2.5365, "step": 2202000 }, { "epoch": 4.2, "learning_rate": 3.9512844671219095e-05, "loss": 2.5362, "step": 2202500 }, { "epoch": 4.2, "learning_rate": 3.9510463029296104e-05, "loss": 2.5539, "step": 2203000 }, { "epoch": 4.2, "learning_rate": 3.950808138737311e-05, "loss": 2.5495, "step": 2203500 }, { "epoch": 4.2, "learning_rate": 3.9505699745450116e-05, "loss": 2.5538, "step": 2204000 }, { "epoch": 4.2, "learning_rate": 3.950331810352712e-05, "loss": 2.535, "step": 2204500 }, { "epoch": 4.2, "learning_rate": 3.950094122488797e-05, "loss": 2.5411, "step": 2205000 }, { "epoch": 4.2, "learning_rate": 3.9498559582964974e-05, "loss": 2.5541, "step": 2205500 }, { "epoch": 4.2, "learning_rate": 3.9496177941041976e-05, "loss": 2.5456, "step": 2206000 }, { "epoch": 4.2, "learning_rate": 3.9493796299118986e-05, "loss": 2.5535, "step": 2206500 }, { "epoch": 4.21, "learning_rate": 3.949141465719599e-05, "loss": 2.551, "step": 2207000 }, { "epoch": 4.21, "learning_rate": 3.9489033015273e-05, "loss": 2.5529, "step": 2207500 }, { "epoch": 4.21, "learning_rate": 3.948665613663385e-05, "loss": 2.5383, "step": 2208000 }, { "epoch": 4.21, "learning_rate": 3.9484274494710846e-05, "loss": 2.5496, "step": 2208500 }, { "epoch": 4.21, "learning_rate": 3.9481892852787855e-05, "loss": 2.5392, "step": 2209000 }, { "epoch": 4.21, "learning_rate": 3.947951121086486e-05, "loss": 2.5628, "step": 2209500 }, { "epoch": 4.21, "learning_rate": 3.947712956894187e-05, "loss": 2.55, "step": 2210000 }, { "epoch": 4.21, "learning_rate": 3.947474792701888e-05, "loss": 2.5374, "step": 2210500 }, { "epoch": 4.21, "learning_rate": 3.947236628509588e-05, "loss": 2.5546, "step": 2211000 }, { "epoch": 4.21, "learning_rate": 3.946998464317288e-05, "loss": 2.5332, "step": 2211500 }, { "epoch": 4.21, "learning_rate": 3.9467607764533734e-05, "loss": 2.5508, "step": 2212000 }, { "epoch": 4.22, "learning_rate": 3.946522612261074e-05, "loss": 2.5545, "step": 2212500 }, { "epoch": 4.22, "learning_rate": 3.9462844480687746e-05, "loss": 2.5478, "step": 2213000 }, { "epoch": 4.22, "learning_rate": 3.946046283876475e-05, "loss": 2.5458, "step": 2213500 }, { "epoch": 4.22, "learning_rate": 3.94580859601256e-05, "loss": 2.5377, "step": 2214000 }, { "epoch": 4.22, "learning_rate": 3.945570431820261e-05, "loss": 2.5467, "step": 2214500 }, { "epoch": 4.22, "learning_rate": 3.945332267627961e-05, "loss": 2.5403, "step": 2215000 }, { "epoch": 4.22, "learning_rate": 3.9450941034356616e-05, "loss": 2.5469, "step": 2215500 }, { "epoch": 4.22, "learning_rate": 3.944855939243362e-05, "loss": 2.5427, "step": 2216000 }, { "epoch": 4.22, "learning_rate": 3.944618251379447e-05, "loss": 2.5482, "step": 2216500 }, { "epoch": 4.22, "learning_rate": 3.944380087187148e-05, "loss": 2.5469, "step": 2217000 }, { "epoch": 4.23, "learning_rate": 3.944141922994848e-05, "loss": 2.5326, "step": 2217500 }, { "epoch": 4.23, "learning_rate": 3.9439037588025486e-05, "loss": 2.5441, "step": 2218000 }, { "epoch": 4.23, "learning_rate": 3.943665594610249e-05, "loss": 2.5418, "step": 2218500 }, { "epoch": 4.23, "learning_rate": 3.94342743041795e-05, "loss": 2.5278, "step": 2219000 }, { "epoch": 4.23, "learning_rate": 3.943189742554035e-05, "loss": 2.5384, "step": 2219500 }, { "epoch": 4.23, "learning_rate": 3.94295205469012e-05, "loss": 2.5705, "step": 2220000 }, { "epoch": 4.23, "eval_accuracy": 0.5353970018078528, "eval_loss": 2.4527204036712646, "eval_runtime": 4200.4438, "eval_samples_per_second": 65.466, "eval_steps_per_second": 6.547, "step": 2220000 }, { "epoch": 4.23, "learning_rate": 3.9427138904978205e-05, "loss": 2.5471, "step": 2220500 }, { "epoch": 4.23, "learning_rate": 3.9424757263055214e-05, "loss": 2.5513, "step": 2221000 }, { "epoch": 4.23, "learning_rate": 3.942237562113222e-05, "loss": 2.5536, "step": 2221500 }, { "epoch": 4.23, "learning_rate": 3.941999397920922e-05, "loss": 2.5432, "step": 2222000 }, { "epoch": 4.23, "learning_rate": 3.941761233728622e-05, "loss": 2.5595, "step": 2222500 }, { "epoch": 4.24, "learning_rate": 3.941523069536323e-05, "loss": 2.5363, "step": 2223000 }, { "epoch": 4.24, "learning_rate": 3.9412849053440234e-05, "loss": 2.5382, "step": 2223500 }, { "epoch": 4.24, "learning_rate": 3.9410472174801086e-05, "loss": 2.5298, "step": 2224000 }, { "epoch": 4.24, "learning_rate": 3.9408090532878096e-05, "loss": 2.5418, "step": 2224500 }, { "epoch": 4.24, "learning_rate": 3.94057088909551e-05, "loss": 2.5535, "step": 2225000 }, { "epoch": 4.24, "learning_rate": 3.94033272490321e-05, "loss": 2.5442, "step": 2225500 }, { "epoch": 4.24, "learning_rate": 3.940095037039295e-05, "loss": 2.544, "step": 2226000 }, { "epoch": 4.24, "learning_rate": 3.9398568728469956e-05, "loss": 2.5438, "step": 2226500 }, { "epoch": 4.24, "learning_rate": 3.9396187086546965e-05, "loss": 2.5455, "step": 2227000 }, { "epoch": 4.24, "learning_rate": 3.939380544462397e-05, "loss": 2.5573, "step": 2227500 }, { "epoch": 4.25, "learning_rate": 3.939142380270097e-05, "loss": 2.5516, "step": 2228000 }, { "epoch": 4.25, "learning_rate": 3.938904692406182e-05, "loss": 2.5641, "step": 2228500 }, { "epoch": 4.25, "learning_rate": 3.9386670045422675e-05, "loss": 2.552, "step": 2229000 }, { "epoch": 4.25, "learning_rate": 3.938428840349968e-05, "loss": 2.5374, "step": 2229500 }, { "epoch": 4.25, "learning_rate": 3.938190676157669e-05, "loss": 2.5577, "step": 2230000 }, { "epoch": 4.25, "learning_rate": 3.937952511965369e-05, "loss": 2.5528, "step": 2230500 }, { "epoch": 4.25, "learning_rate": 3.93771434777307e-05, "loss": 2.5411, "step": 2231000 }, { "epoch": 4.25, "learning_rate": 3.93747618358077e-05, "loss": 2.5546, "step": 2231500 }, { "epoch": 4.25, "learning_rate": 3.9372380193884705e-05, "loss": 2.549, "step": 2232000 }, { "epoch": 4.25, "learning_rate": 3.9369998551961714e-05, "loss": 2.5494, "step": 2232500 }, { "epoch": 4.25, "learning_rate": 3.9367616910038717e-05, "loss": 2.5386, "step": 2233000 }, { "epoch": 4.26, "learning_rate": 3.936524003139957e-05, "loss": 2.5579, "step": 2233500 }, { "epoch": 4.26, "learning_rate": 3.936285838947658e-05, "loss": 2.5584, "step": 2234000 }, { "epoch": 4.26, "learning_rate": 3.9360481510837424e-05, "loss": 2.544, "step": 2234500 }, { "epoch": 4.26, "learning_rate": 3.935809986891443e-05, "loss": 2.5718, "step": 2235000 }, { "epoch": 4.26, "learning_rate": 3.9355718226991436e-05, "loss": 2.5429, "step": 2235500 }, { "epoch": 4.26, "learning_rate": 3.935333658506844e-05, "loss": 2.5439, "step": 2236000 }, { "epoch": 4.26, "learning_rate": 3.935095494314545e-05, "loss": 2.5544, "step": 2236500 }, { "epoch": 4.26, "learning_rate": 3.934857330122245e-05, "loss": 2.5398, "step": 2237000 }, { "epoch": 4.26, "learning_rate": 3.934619165929946e-05, "loss": 2.5581, "step": 2237500 }, { "epoch": 4.26, "learning_rate": 3.9343810017376456e-05, "loss": 2.5527, "step": 2238000 }, { "epoch": 4.27, "learning_rate": 3.934143313873731e-05, "loss": 2.5447, "step": 2238500 }, { "epoch": 4.27, "learning_rate": 3.933905149681432e-05, "loss": 2.5463, "step": 2239000 }, { "epoch": 4.27, "learning_rate": 3.933667461817517e-05, "loss": 2.5435, "step": 2239500 }, { "epoch": 4.27, "learning_rate": 3.933429297625217e-05, "loss": 2.5384, "step": 2240000 }, { "epoch": 4.27, "learning_rate": 3.933191133432918e-05, "loss": 2.5672, "step": 2240500 }, { "epoch": 4.27, "learning_rate": 3.9329529692406184e-05, "loss": 2.5556, "step": 2241000 }, { "epoch": 4.27, "learning_rate": 3.932714805048319e-05, "loss": 2.5448, "step": 2241500 }, { "epoch": 4.27, "learning_rate": 3.9324766408560196e-05, "loss": 2.5546, "step": 2242000 }, { "epoch": 4.27, "learning_rate": 3.93223847666372e-05, "loss": 2.5455, "step": 2242500 }, { "epoch": 4.27, "learning_rate": 3.932000312471421e-05, "loss": 2.528, "step": 2243000 }, { "epoch": 4.27, "learning_rate": 3.9317626246075054e-05, "loss": 2.5519, "step": 2243500 }, { "epoch": 4.28, "learning_rate": 3.931524460415206e-05, "loss": 2.5226, "step": 2244000 }, { "epoch": 4.28, "learning_rate": 3.9312862962229066e-05, "loss": 2.5465, "step": 2244500 }, { "epoch": 4.28, "learning_rate": 3.931048132030607e-05, "loss": 2.5516, "step": 2245000 }, { "epoch": 4.28, "learning_rate": 3.930809967838308e-05, "loss": 2.5558, "step": 2245500 }, { "epoch": 4.28, "learning_rate": 3.930572279974393e-05, "loss": 2.5623, "step": 2246000 }, { "epoch": 4.28, "learning_rate": 3.930334115782093e-05, "loss": 2.557, "step": 2246500 }, { "epoch": 4.28, "learning_rate": 3.930095951589794e-05, "loss": 2.5423, "step": 2247000 }, { "epoch": 4.28, "learning_rate": 3.9298577873974945e-05, "loss": 2.5409, "step": 2247500 }, { "epoch": 4.28, "learning_rate": 3.92962009953358e-05, "loss": 2.5596, "step": 2248000 }, { "epoch": 4.28, "learning_rate": 3.92938193534128e-05, "loss": 2.5696, "step": 2248500 }, { "epoch": 4.29, "learning_rate": 3.92914377114898e-05, "loss": 2.5341, "step": 2249000 }, { "epoch": 4.29, "learning_rate": 3.928905606956681e-05, "loss": 2.5417, "step": 2249500 }, { "epoch": 4.29, "learning_rate": 3.9286679190927664e-05, "loss": 2.5519, "step": 2250000 }, { "epoch": 4.29, "eval_accuracy": 0.5356714757061611, "eval_loss": 2.450448751449585, "eval_runtime": 4197.6525, "eval_samples_per_second": 65.51, "eval_steps_per_second": 6.551, "step": 2250000 }, { "epoch": 4.29, "learning_rate": 3.928430231228851e-05, "loss": 2.5547, "step": 2250500 }, { "epoch": 4.29, "learning_rate": 3.928192067036552e-05, "loss": 2.5676, "step": 2251000 }, { "epoch": 4.29, "learning_rate": 3.927953902844252e-05, "loss": 2.5473, "step": 2251500 }, { "epoch": 4.29, "learning_rate": 3.9277157386519524e-05, "loss": 2.5388, "step": 2252000 }, { "epoch": 4.29, "learning_rate": 3.9274775744596534e-05, "loss": 2.524, "step": 2252500 }, { "epoch": 4.29, "learning_rate": 3.9272394102673536e-05, "loss": 2.5604, "step": 2253000 }, { "epoch": 4.29, "learning_rate": 3.9270012460750546e-05, "loss": 2.541, "step": 2253500 }, { "epoch": 4.29, "learning_rate": 3.926763081882755e-05, "loss": 2.5289, "step": 2254000 }, { "epoch": 4.3, "learning_rate": 3.926524917690456e-05, "loss": 2.5283, "step": 2254500 }, { "epoch": 4.3, "learning_rate": 3.92628722982654e-05, "loss": 2.535, "step": 2255000 }, { "epoch": 4.3, "learning_rate": 3.9260490656342406e-05, "loss": 2.5605, "step": 2255500 }, { "epoch": 4.3, "learning_rate": 3.9258109014419415e-05, "loss": 2.5357, "step": 2256000 }, { "epoch": 4.3, "learning_rate": 3.925573213578027e-05, "loss": 2.5731, "step": 2256500 }, { "epoch": 4.3, "learning_rate": 3.925335049385727e-05, "loss": 2.5386, "step": 2257000 }, { "epoch": 4.3, "learning_rate": 3.925096885193428e-05, "loss": 2.5624, "step": 2257500 }, { "epoch": 4.3, "learning_rate": 3.924858721001128e-05, "loss": 2.5559, "step": 2258000 }, { "epoch": 4.3, "learning_rate": 3.9246205568088285e-05, "loss": 2.5561, "step": 2258500 }, { "epoch": 4.3, "learning_rate": 3.924382392616529e-05, "loss": 2.5472, "step": 2259000 }, { "epoch": 4.31, "learning_rate": 3.92414422842423e-05, "loss": 2.5464, "step": 2259500 }, { "epoch": 4.31, "learning_rate": 3.9239060642319306e-05, "loss": 2.5447, "step": 2260000 }, { "epoch": 4.31, "learning_rate": 3.923668376368015e-05, "loss": 2.5575, "step": 2260500 }, { "epoch": 4.31, "learning_rate": 3.923430212175716e-05, "loss": 2.5411, "step": 2261000 }, { "epoch": 4.31, "learning_rate": 3.9231920479834164e-05, "loss": 2.5409, "step": 2261500 }, { "epoch": 4.31, "learning_rate": 3.9229538837911166e-05, "loss": 2.5827, "step": 2262000 }, { "epoch": 4.31, "learning_rate": 3.922716195927202e-05, "loss": 2.5537, "step": 2262500 }, { "epoch": 4.31, "learning_rate": 3.922478031734902e-05, "loss": 2.5521, "step": 2263000 }, { "epoch": 4.31, "learning_rate": 3.922239867542603e-05, "loss": 2.5353, "step": 2263500 }, { "epoch": 4.31, "learning_rate": 3.922001703350304e-05, "loss": 2.5388, "step": 2264000 }, { "epoch": 4.31, "learning_rate": 3.9217640154863886e-05, "loss": 2.5633, "step": 2264500 }, { "epoch": 4.32, "learning_rate": 3.921526327622474e-05, "loss": 2.5385, "step": 2265000 }, { "epoch": 4.32, "learning_rate": 3.921288163430174e-05, "loss": 2.5527, "step": 2265500 }, { "epoch": 4.32, "learning_rate": 3.921049999237874e-05, "loss": 2.54, "step": 2266000 }, { "epoch": 4.32, "learning_rate": 3.920811835045575e-05, "loss": 2.5532, "step": 2266500 }, { "epoch": 4.32, "learning_rate": 3.920573670853276e-05, "loss": 2.5507, "step": 2267000 }, { "epoch": 4.32, "learning_rate": 3.9203355066609765e-05, "loss": 2.5448, "step": 2267500 }, { "epoch": 4.32, "learning_rate": 3.9200973424686774e-05, "loss": 2.5512, "step": 2268000 }, { "epoch": 4.32, "learning_rate": 3.919859654604762e-05, "loss": 2.5486, "step": 2268500 }, { "epoch": 4.32, "learning_rate": 3.919621490412462e-05, "loss": 2.544, "step": 2269000 }, { "epoch": 4.32, "learning_rate": 3.919383326220163e-05, "loss": 2.5278, "step": 2269500 }, { "epoch": 4.33, "learning_rate": 3.9191451620278634e-05, "loss": 2.5739, "step": 2270000 }, { "epoch": 4.33, "learning_rate": 3.9189069978355644e-05, "loss": 2.5272, "step": 2270500 }, { "epoch": 4.33, "learning_rate": 3.9186688336432646e-05, "loss": 2.5527, "step": 2271000 }, { "epoch": 4.33, "learning_rate": 3.918430669450965e-05, "loss": 2.5427, "step": 2271500 }, { "epoch": 4.33, "learning_rate": 3.918192505258665e-05, "loss": 2.5373, "step": 2272000 }, { "epoch": 4.33, "learning_rate": 3.917954341066366e-05, "loss": 2.5578, "step": 2272500 }, { "epoch": 4.33, "learning_rate": 3.917716653202451e-05, "loss": 2.5352, "step": 2273000 }, { "epoch": 4.33, "learning_rate": 3.9174789653385365e-05, "loss": 2.5593, "step": 2273500 }, { "epoch": 4.33, "learning_rate": 3.917240801146237e-05, "loss": 2.5474, "step": 2274000 }, { "epoch": 4.33, "learning_rate": 3.917002636953938e-05, "loss": 2.5575, "step": 2274500 }, { "epoch": 4.33, "learning_rate": 3.916764472761637e-05, "loss": 2.549, "step": 2275000 }, { "epoch": 4.34, "learning_rate": 3.916526308569338e-05, "loss": 2.5475, "step": 2275500 }, { "epoch": 4.34, "learning_rate": 3.9162881443770385e-05, "loss": 2.5529, "step": 2276000 }, { "epoch": 4.34, "learning_rate": 3.9160499801847395e-05, "loss": 2.5386, "step": 2276500 }, { "epoch": 4.34, "learning_rate": 3.91581181599244e-05, "loss": 2.5559, "step": 2277000 }, { "epoch": 4.34, "learning_rate": 3.915574128128525e-05, "loss": 2.5624, "step": 2277500 }, { "epoch": 4.34, "learning_rate": 3.91533644026461e-05, "loss": 2.5451, "step": 2278000 }, { "epoch": 4.34, "learning_rate": 3.9150982760723105e-05, "loss": 2.5418, "step": 2278500 }, { "epoch": 4.34, "learning_rate": 3.914860111880011e-05, "loss": 2.5542, "step": 2279000 }, { "epoch": 4.34, "learning_rate": 3.914621947687712e-05, "loss": 2.5353, "step": 2279500 }, { "epoch": 4.34, "learning_rate": 3.914383783495412e-05, "loss": 2.5597, "step": 2280000 }, { "epoch": 4.34, "eval_accuracy": 0.5358386206519403, "eval_loss": 2.449659585952759, "eval_runtime": 4197.1262, "eval_samples_per_second": 65.518, "eval_steps_per_second": 6.552, "step": 2280000 }, { "epoch": 4.35, "learning_rate": 3.914145619303113e-05, "loss": 2.541, "step": 2280500 }, { "epoch": 4.35, "learning_rate": 3.913907455110814e-05, "loss": 2.5573, "step": 2281000 }, { "epoch": 4.35, "learning_rate": 3.9136692909185134e-05, "loss": 2.5273, "step": 2281500 }, { "epoch": 4.35, "learning_rate": 3.9134316030545986e-05, "loss": 2.5427, "step": 2282000 }, { "epoch": 4.35, "learning_rate": 3.9131934388622996e-05, "loss": 2.5355, "step": 2282500 }, { "epoch": 4.35, "learning_rate": 3.91295527467e-05, "loss": 2.5562, "step": 2283000 }, { "epoch": 4.35, "learning_rate": 3.912717110477701e-05, "loss": 2.5372, "step": 2283500 }, { "epoch": 4.35, "learning_rate": 3.912478946285401e-05, "loss": 2.5409, "step": 2284000 }, { "epoch": 4.35, "learning_rate": 3.912240782093101e-05, "loss": 2.5535, "step": 2284500 }, { "epoch": 4.35, "learning_rate": 3.9120030942291865e-05, "loss": 2.5492, "step": 2285000 }, { "epoch": 4.35, "learning_rate": 3.911764930036887e-05, "loss": 2.536, "step": 2285500 }, { "epoch": 4.36, "learning_rate": 3.911526765844588e-05, "loss": 2.5303, "step": 2286000 }, { "epoch": 4.36, "learning_rate": 3.911288601652288e-05, "loss": 2.5281, "step": 2286500 }, { "epoch": 4.36, "learning_rate": 3.911050437459989e-05, "loss": 2.545, "step": 2287000 }, { "epoch": 4.36, "learning_rate": 3.9108122732676885e-05, "loss": 2.5407, "step": 2287500 }, { "epoch": 4.36, "learning_rate": 3.9105741090753894e-05, "loss": 2.5351, "step": 2288000 }, { "epoch": 4.36, "learning_rate": 3.910336421211475e-05, "loss": 2.5488, "step": 2288500 }, { "epoch": 4.36, "learning_rate": 3.910098257019175e-05, "loss": 2.5598, "step": 2289000 }, { "epoch": 4.36, "learning_rate": 3.909860092826876e-05, "loss": 2.5641, "step": 2289500 }, { "epoch": 4.36, "learning_rate": 3.909621928634576e-05, "loss": 2.5331, "step": 2290000 }, { "epoch": 4.36, "learning_rate": 3.909383764442277e-05, "loss": 2.5531, "step": 2290500 }, { "epoch": 4.37, "learning_rate": 3.9091456002499773e-05, "loss": 2.561, "step": 2291000 }, { "epoch": 4.37, "learning_rate": 3.9089079123860626e-05, "loss": 2.5549, "step": 2291500 }, { "epoch": 4.37, "learning_rate": 3.908669748193763e-05, "loss": 2.5324, "step": 2292000 }, { "epoch": 4.37, "learning_rate": 3.908431584001464e-05, "loss": 2.5516, "step": 2292500 }, { "epoch": 4.37, "learning_rate": 3.908193419809164e-05, "loss": 2.5324, "step": 2293000 }, { "epoch": 4.37, "learning_rate": 3.907955255616865e-05, "loss": 2.564, "step": 2293500 }, { "epoch": 4.37, "learning_rate": 3.9077170914245646e-05, "loss": 2.5596, "step": 2294000 }, { "epoch": 4.37, "learning_rate": 3.9074789272322655e-05, "loss": 2.537, "step": 2294500 }, { "epoch": 4.37, "learning_rate": 3.907240763039966e-05, "loss": 2.5515, "step": 2295000 }, { "epoch": 4.37, "learning_rate": 3.907002598847667e-05, "loss": 2.5466, "step": 2295500 }, { "epoch": 4.37, "learning_rate": 3.906765387312136e-05, "loss": 2.5338, "step": 2296000 }, { "epoch": 4.38, "learning_rate": 3.906527223119837e-05, "loss": 2.5439, "step": 2296500 }, { "epoch": 4.38, "learning_rate": 3.9062890589275374e-05, "loss": 2.5392, "step": 2297000 }, { "epoch": 4.38, "learning_rate": 3.906050894735238e-05, "loss": 2.5408, "step": 2297500 }, { "epoch": 4.38, "learning_rate": 3.905812730542938e-05, "loss": 2.5444, "step": 2298000 }, { "epoch": 4.38, "learning_rate": 3.905575042679023e-05, "loss": 2.573, "step": 2298500 }, { "epoch": 4.38, "learning_rate": 3.905336878486724e-05, "loss": 2.5501, "step": 2299000 }, { "epoch": 4.38, "learning_rate": 3.9050987142944244e-05, "loss": 2.5582, "step": 2299500 }, { "epoch": 4.38, "learning_rate": 3.904860550102125e-05, "loss": 2.5405, "step": 2300000 }, { "epoch": 4.38, "learning_rate": 3.9046223859098256e-05, "loss": 2.5582, "step": 2300500 }, { "epoch": 4.38, "learning_rate": 3.904384698045911e-05, "loss": 2.5487, "step": 2301000 }, { "epoch": 4.39, "learning_rate": 3.904146533853611e-05, "loss": 2.5428, "step": 2301500 }, { "epoch": 4.39, "learning_rate": 3.9039083696613113e-05, "loss": 2.5312, "step": 2302000 }, { "epoch": 4.39, "learning_rate": 3.903670205469012e-05, "loss": 2.5382, "step": 2302500 }, { "epoch": 4.39, "learning_rate": 3.9034320412767125e-05, "loss": 2.5421, "step": 2303000 }, { "epoch": 4.39, "learning_rate": 3.9031938770844135e-05, "loss": 2.5409, "step": 2303500 }, { "epoch": 4.39, "learning_rate": 3.902955712892114e-05, "loss": 2.5494, "step": 2304000 }, { "epoch": 4.39, "learning_rate": 3.902717548699814e-05, "loss": 2.5464, "step": 2304500 }, { "epoch": 4.39, "learning_rate": 3.902479860835899e-05, "loss": 2.5522, "step": 2305000 }, { "epoch": 4.39, "learning_rate": 3.9022416966436e-05, "loss": 2.5281, "step": 2305500 }, { "epoch": 4.39, "learning_rate": 3.9020035324513004e-05, "loss": 2.5459, "step": 2306000 }, { "epoch": 4.39, "learning_rate": 3.9017653682590014e-05, "loss": 2.5481, "step": 2306500 }, { "epoch": 4.4, "learning_rate": 3.901527680395086e-05, "loss": 2.5556, "step": 2307000 }, { "epoch": 4.4, "learning_rate": 3.901289516202786e-05, "loss": 2.5539, "step": 2307500 }, { "epoch": 4.4, "learning_rate": 3.901051352010487e-05, "loss": 2.5431, "step": 2308000 }, { "epoch": 4.4, "learning_rate": 3.9008131878181874e-05, "loss": 2.5583, "step": 2308500 }, { "epoch": 4.4, "learning_rate": 3.9005754999542726e-05, "loss": 2.5739, "step": 2309000 }, { "epoch": 4.4, "learning_rate": 3.9003373357619736e-05, "loss": 2.5677, "step": 2309500 }, { "epoch": 4.4, "learning_rate": 3.900099171569674e-05, "loss": 2.5429, "step": 2310000 }, { "epoch": 4.4, "eval_accuracy": 0.5360705298207556, "eval_loss": 2.448493719100952, "eval_runtime": 4196.1148, "eval_samples_per_second": 65.534, "eval_steps_per_second": 6.553, "step": 2310000 }, { "epoch": 4.4, "learning_rate": 3.899861007377375e-05, "loss": 2.5481, "step": 2310500 }, { "epoch": 4.4, "learning_rate": 3.8996228431850744e-05, "loss": 2.5525, "step": 2311000 }, { "epoch": 4.4, "learning_rate": 3.8993851553211596e-05, "loss": 2.554, "step": 2311500 }, { "epoch": 4.41, "learning_rate": 3.8991469911288605e-05, "loss": 2.5366, "step": 2312000 }, { "epoch": 4.41, "learning_rate": 3.898908826936561e-05, "loss": 2.5343, "step": 2312500 }, { "epoch": 4.41, "learning_rate": 3.898670662744262e-05, "loss": 2.531, "step": 2313000 }, { "epoch": 4.41, "learning_rate": 3.898432974880347e-05, "loss": 2.5569, "step": 2313500 }, { "epoch": 4.41, "learning_rate": 3.898194810688047e-05, "loss": 2.5585, "step": 2314000 }, { "epoch": 4.41, "learning_rate": 3.8979566464957475e-05, "loss": 2.5636, "step": 2314500 }, { "epoch": 4.41, "learning_rate": 3.897718482303448e-05, "loss": 2.5537, "step": 2315000 }, { "epoch": 4.41, "learning_rate": 3.897480794439533e-05, "loss": 2.5362, "step": 2315500 }, { "epoch": 4.41, "learning_rate": 3.897242630247234e-05, "loss": 2.5588, "step": 2316000 }, { "epoch": 4.41, "learning_rate": 3.897004942383319e-05, "loss": 2.5547, "step": 2316500 }, { "epoch": 4.41, "learning_rate": 3.8967667781910194e-05, "loss": 2.551, "step": 2317000 }, { "epoch": 4.42, "learning_rate": 3.8965286139987203e-05, "loss": 2.5437, "step": 2317500 }, { "epoch": 4.42, "learning_rate": 3.89629044980642e-05, "loss": 2.5479, "step": 2318000 }, { "epoch": 4.42, "learning_rate": 3.896052285614121e-05, "loss": 2.5593, "step": 2318500 }, { "epoch": 4.42, "learning_rate": 3.895814121421821e-05, "loss": 2.5301, "step": 2319000 }, { "epoch": 4.42, "learning_rate": 3.895575957229522e-05, "loss": 2.5509, "step": 2319500 }, { "epoch": 4.42, "learning_rate": 3.895337793037222e-05, "loss": 2.5375, "step": 2320000 }, { "epoch": 4.42, "learning_rate": 3.8951001051733076e-05, "loss": 2.5337, "step": 2320500 }, { "epoch": 4.42, "learning_rate": 3.894861940981008e-05, "loss": 2.5504, "step": 2321000 }, { "epoch": 4.42, "learning_rate": 3.894623776788708e-05, "loss": 2.5438, "step": 2321500 }, { "epoch": 4.42, "learning_rate": 3.894385612596409e-05, "loss": 2.5493, "step": 2322000 }, { "epoch": 4.43, "learning_rate": 3.894147924732494e-05, "loss": 2.5484, "step": 2322500 }, { "epoch": 4.43, "learning_rate": 3.8939097605401945e-05, "loss": 2.5577, "step": 2323000 }, { "epoch": 4.43, "learning_rate": 3.8936715963478955e-05, "loss": 2.558, "step": 2323500 }, { "epoch": 4.43, "learning_rate": 3.893433432155596e-05, "loss": 2.5482, "step": 2324000 }, { "epoch": 4.43, "learning_rate": 3.893195267963296e-05, "loss": 2.5567, "step": 2324500 }, { "epoch": 4.43, "learning_rate": 3.892957580099381e-05, "loss": 2.5313, "step": 2325000 }, { "epoch": 4.43, "learning_rate": 3.8927194159070815e-05, "loss": 2.5351, "step": 2325500 }, { "epoch": 4.43, "learning_rate": 3.8924812517147824e-05, "loss": 2.5391, "step": 2326000 }, { "epoch": 4.43, "learning_rate": 3.8922435638508676e-05, "loss": 2.532, "step": 2326500 }, { "epoch": 4.43, "learning_rate": 3.892005399658568e-05, "loss": 2.5502, "step": 2327000 }, { "epoch": 4.43, "learning_rate": 3.891767235466269e-05, "loss": 2.5431, "step": 2327500 }, { "epoch": 4.44, "learning_rate": 3.891529071273969e-05, "loss": 2.5314, "step": 2328000 }, { "epoch": 4.44, "learning_rate": 3.8912909070816694e-05, "loss": 2.5456, "step": 2328500 }, { "epoch": 4.44, "learning_rate": 3.89105274288937e-05, "loss": 2.538, "step": 2329000 }, { "epoch": 4.44, "learning_rate": 3.8908145786970706e-05, "loss": 2.5324, "step": 2329500 }, { "epoch": 4.44, "learning_rate": 3.8905764145047715e-05, "loss": 2.5429, "step": 2330000 }, { "epoch": 4.44, "learning_rate": 3.890338726640857e-05, "loss": 2.5519, "step": 2330500 }, { "epoch": 4.44, "learning_rate": 3.890100562448556e-05, "loss": 2.5345, "step": 2331000 }, { "epoch": 4.44, "learning_rate": 3.889862398256257e-05, "loss": 2.5324, "step": 2331500 }, { "epoch": 4.44, "learning_rate": 3.8896242340639575e-05, "loss": 2.5568, "step": 2332000 }, { "epoch": 4.44, "learning_rate": 3.889386546200043e-05, "loss": 2.562, "step": 2332500 }, { "epoch": 4.45, "learning_rate": 3.889148382007744e-05, "loss": 2.5418, "step": 2333000 }, { "epoch": 4.45, "learning_rate": 3.888910217815444e-05, "loss": 2.558, "step": 2333500 }, { "epoch": 4.45, "learning_rate": 3.888672053623145e-05, "loss": 2.5328, "step": 2334000 }, { "epoch": 4.45, "learning_rate": 3.8884338894308445e-05, "loss": 2.5353, "step": 2334500 }, { "epoch": 4.45, "learning_rate": 3.88819620156693e-05, "loss": 2.5607, "step": 2335000 }, { "epoch": 4.45, "learning_rate": 3.8879580373746307e-05, "loss": 2.5329, "step": 2335500 }, { "epoch": 4.45, "learning_rate": 3.887720349510716e-05, "loss": 2.539, "step": 2336000 }, { "epoch": 4.45, "learning_rate": 3.887482185318416e-05, "loss": 2.5399, "step": 2336500 }, { "epoch": 4.45, "learning_rate": 3.887244021126117e-05, "loss": 2.5708, "step": 2337000 }, { "epoch": 4.45, "learning_rate": 3.8870058569338174e-05, "loss": 2.5389, "step": 2337500 }, { "epoch": 4.45, "learning_rate": 3.8867676927415176e-05, "loss": 2.5653, "step": 2338000 }, { "epoch": 4.46, "learning_rate": 3.886529528549218e-05, "loss": 2.5398, "step": 2338500 }, { "epoch": 4.46, "learning_rate": 3.886291364356919e-05, "loss": 2.5511, "step": 2339000 }, { "epoch": 4.46, "learning_rate": 3.886053676493004e-05, "loss": 2.534, "step": 2339500 }, { "epoch": 4.46, "learning_rate": 3.885815512300704e-05, "loss": 2.5531, "step": 2340000 }, { "epoch": 4.46, "eval_accuracy": 0.536314525183013, "eval_loss": 2.4475905895233154, "eval_runtime": 4200.5087, "eval_samples_per_second": 65.465, "eval_steps_per_second": 6.547, "step": 2340000 }, { "epoch": 4.46, "learning_rate": 3.885577348108405e-05, "loss": 2.5412, "step": 2340500 }, { "epoch": 4.46, "learning_rate": 3.8853391839161055e-05, "loss": 2.5448, "step": 2341000 }, { "epoch": 4.46, "learning_rate": 3.885101019723806e-05, "loss": 2.5665, "step": 2341500 }, { "epoch": 4.46, "learning_rate": 3.884862855531507e-05, "loss": 2.5458, "step": 2342000 }, { "epoch": 4.46, "learning_rate": 3.884624691339207e-05, "loss": 2.5385, "step": 2342500 }, { "epoch": 4.46, "learning_rate": 3.884386527146908e-05, "loss": 2.5495, "step": 2343000 }, { "epoch": 4.47, "learning_rate": 3.8841493156113774e-05, "loss": 2.54, "step": 2343500 }, { "epoch": 4.47, "learning_rate": 3.883911151419078e-05, "loss": 2.575, "step": 2344000 }, { "epoch": 4.47, "learning_rate": 3.883672987226778e-05, "loss": 2.5307, "step": 2344500 }, { "epoch": 4.47, "learning_rate": 3.883434823034479e-05, "loss": 2.5383, "step": 2345000 }, { "epoch": 4.47, "learning_rate": 3.883196658842179e-05, "loss": 2.5458, "step": 2345500 }, { "epoch": 4.47, "learning_rate": 3.88295849464988e-05, "loss": 2.5384, "step": 2346000 }, { "epoch": 4.47, "learning_rate": 3.8827203304575804e-05, "loss": 2.5581, "step": 2346500 }, { "epoch": 4.47, "learning_rate": 3.882482166265281e-05, "loss": 2.5372, "step": 2347000 }, { "epoch": 4.47, "learning_rate": 3.882244002072981e-05, "loss": 2.5379, "step": 2347500 }, { "epoch": 4.47, "learning_rate": 3.882006314209066e-05, "loss": 2.5414, "step": 2348000 }, { "epoch": 4.47, "learning_rate": 3.881768150016767e-05, "loss": 2.5475, "step": 2348500 }, { "epoch": 4.48, "learning_rate": 3.881529985824467e-05, "loss": 2.5627, "step": 2349000 }, { "epoch": 4.48, "learning_rate": 3.881291821632168e-05, "loss": 2.5605, "step": 2349500 }, { "epoch": 4.48, "learning_rate": 3.8810541337682535e-05, "loss": 2.5331, "step": 2350000 }, { "epoch": 4.48, "learning_rate": 3.880815969575954e-05, "loss": 2.53, "step": 2350500 }, { "epoch": 4.48, "learning_rate": 3.880577805383654e-05, "loss": 2.5428, "step": 2351000 }, { "epoch": 4.48, "learning_rate": 3.880339641191354e-05, "loss": 2.5527, "step": 2351500 }, { "epoch": 4.48, "learning_rate": 3.8801024296558245e-05, "loss": 2.5388, "step": 2352000 }, { "epoch": 4.48, "learning_rate": 3.879864265463525e-05, "loss": 2.5652, "step": 2352500 }, { "epoch": 4.48, "learning_rate": 3.879626101271226e-05, "loss": 2.5509, "step": 2353000 }, { "epoch": 4.48, "learning_rate": 3.879387937078926e-05, "loss": 2.5543, "step": 2353500 }, { "epoch": 4.49, "learning_rate": 3.879149772886627e-05, "loss": 2.5193, "step": 2354000 }, { "epoch": 4.49, "learning_rate": 3.8789116086943265e-05, "loss": 2.5625, "step": 2354500 }, { "epoch": 4.49, "learning_rate": 3.8786734445020274e-05, "loss": 2.5494, "step": 2355000 }, { "epoch": 4.49, "learning_rate": 3.878435280309728e-05, "loss": 2.5466, "step": 2355500 }, { "epoch": 4.49, "learning_rate": 3.8781971161174286e-05, "loss": 2.5201, "step": 2356000 }, { "epoch": 4.49, "learning_rate": 3.877959428253514e-05, "loss": 2.5446, "step": 2356500 }, { "epoch": 4.49, "learning_rate": 3.877721264061214e-05, "loss": 2.5283, "step": 2357000 }, { "epoch": 4.49, "learning_rate": 3.8774830998689144e-05, "loss": 2.5524, "step": 2357500 }, { "epoch": 4.49, "learning_rate": 3.8772449356766146e-05, "loss": 2.5452, "step": 2358000 }, { "epoch": 4.49, "learning_rate": 3.877007724141085e-05, "loss": 2.5539, "step": 2358500 }, { "epoch": 4.49, "learning_rate": 3.876769559948785e-05, "loss": 2.5547, "step": 2359000 }, { "epoch": 4.5, "learning_rate": 3.876531395756486e-05, "loss": 2.5416, "step": 2359500 }, { "epoch": 4.5, "learning_rate": 3.876293231564186e-05, "loss": 2.5377, "step": 2360000 }, { "epoch": 4.5, "learning_rate": 3.876055067371887e-05, "loss": 2.5574, "step": 2360500 }, { "epoch": 4.5, "learning_rate": 3.8758169031795875e-05, "loss": 2.543, "step": 2361000 }, { "epoch": 4.5, "learning_rate": 3.875578738987288e-05, "loss": 2.5557, "step": 2361500 }, { "epoch": 4.5, "learning_rate": 3.875340574794989e-05, "loss": 2.5561, "step": 2362000 }, { "epoch": 4.5, "learning_rate": 3.875103363259458e-05, "loss": 2.5432, "step": 2362500 }, { "epoch": 4.5, "learning_rate": 3.8748651990671585e-05, "loss": 2.5451, "step": 2363000 }, { "epoch": 4.5, "learning_rate": 3.8746270348748594e-05, "loss": 2.5307, "step": 2363500 }, { "epoch": 4.5, "learning_rate": 3.87438887068256e-05, "loss": 2.5621, "step": 2364000 }, { "epoch": 4.51, "learning_rate": 3.8741507064902606e-05, "loss": 2.5441, "step": 2364500 }, { "epoch": 4.51, "learning_rate": 3.873913018626346e-05, "loss": 2.5565, "step": 2365000 }, { "epoch": 4.51, "learning_rate": 3.8736748544340454e-05, "loss": 2.5576, "step": 2365500 }, { "epoch": 4.51, "learning_rate": 3.8734366902417464e-05, "loss": 2.5453, "step": 2366000 }, { "epoch": 4.51, "learning_rate": 3.8731985260494466e-05, "loss": 2.5268, "step": 2366500 }, { "epoch": 4.51, "learning_rate": 3.8729603618571476e-05, "loss": 2.5488, "step": 2367000 }, { "epoch": 4.51, "learning_rate": 3.872722197664848e-05, "loss": 2.5437, "step": 2367500 }, { "epoch": 4.51, "learning_rate": 3.872484033472548e-05, "loss": 2.5663, "step": 2368000 }, { "epoch": 4.51, "learning_rate": 3.872245869280249e-05, "loss": 2.546, "step": 2368500 }, { "epoch": 4.51, "learning_rate": 3.8720081814163336e-05, "loss": 2.5575, "step": 2369000 }, { "epoch": 4.51, "learning_rate": 3.8717700172240345e-05, "loss": 2.5488, "step": 2369500 }, { "epoch": 4.52, "learning_rate": 3.8715318530317355e-05, "loss": 2.5314, "step": 2370000 }, { "epoch": 4.52, "eval_accuracy": 0.5364371609440602, "eval_loss": 2.446284055709839, "eval_runtime": 4200.62, "eval_samples_per_second": 65.464, "eval_steps_per_second": 6.546, "step": 2370000 }, { "epoch": 4.52, "learning_rate": 3.871293688839436e-05, "loss": 2.5353, "step": 2370500 }, { "epoch": 4.52, "learning_rate": 3.871055524647137e-05, "loss": 2.5506, "step": 2371000 }, { "epoch": 4.52, "learning_rate": 3.870817836783221e-05, "loss": 2.5507, "step": 2371500 }, { "epoch": 4.52, "learning_rate": 3.8705796725909215e-05, "loss": 2.5682, "step": 2372000 }, { "epoch": 4.52, "learning_rate": 3.870341984727007e-05, "loss": 2.5485, "step": 2372500 }, { "epoch": 4.52, "learning_rate": 3.8701038205347077e-05, "loss": 2.5453, "step": 2373000 }, { "epoch": 4.52, "learning_rate": 3.869865656342408e-05, "loss": 2.5442, "step": 2373500 }, { "epoch": 4.52, "learning_rate": 3.869627492150109e-05, "loss": 2.542, "step": 2374000 }, { "epoch": 4.52, "learning_rate": 3.869389327957809e-05, "loss": 2.5376, "step": 2374500 }, { "epoch": 4.53, "learning_rate": 3.8691516400938943e-05, "loss": 2.5555, "step": 2375000 }, { "epoch": 4.53, "learning_rate": 3.8689134759015946e-05, "loss": 2.5305, "step": 2375500 }, { "epoch": 4.53, "learning_rate": 3.868675311709295e-05, "loss": 2.5364, "step": 2376000 }, { "epoch": 4.53, "learning_rate": 3.868437147516996e-05, "loss": 2.5435, "step": 2376500 }, { "epoch": 4.53, "learning_rate": 3.868198983324696e-05, "loss": 2.5487, "step": 2377000 }, { "epoch": 4.53, "learning_rate": 3.867960819132397e-05, "loss": 2.5344, "step": 2377500 }, { "epoch": 4.53, "learning_rate": 3.8677226549400966e-05, "loss": 2.5481, "step": 2378000 }, { "epoch": 4.53, "learning_rate": 3.8674844907477975e-05, "loss": 2.5404, "step": 2378500 }, { "epoch": 4.53, "learning_rate": 3.867246326555498e-05, "loss": 2.5341, "step": 2379000 }, { "epoch": 4.53, "learning_rate": 3.867008638691583e-05, "loss": 2.555, "step": 2379500 }, { "epoch": 4.53, "learning_rate": 3.866770950827668e-05, "loss": 2.5434, "step": 2380000 }, { "epoch": 4.54, "learning_rate": 3.866532786635369e-05, "loss": 2.5529, "step": 2380500 }, { "epoch": 4.54, "learning_rate": 3.8662946224430695e-05, "loss": 2.5491, "step": 2381000 }, { "epoch": 4.54, "learning_rate": 3.86605645825077e-05, "loss": 2.5504, "step": 2381500 }, { "epoch": 4.54, "learning_rate": 3.86581829405847e-05, "loss": 2.5241, "step": 2382000 }, { "epoch": 4.54, "learning_rate": 3.865580606194555e-05, "loss": 2.5613, "step": 2382500 }, { "epoch": 4.54, "learning_rate": 3.865342442002256e-05, "loss": 2.542, "step": 2383000 }, { "epoch": 4.54, "learning_rate": 3.8651042778099564e-05, "loss": 2.5428, "step": 2383500 }, { "epoch": 4.54, "learning_rate": 3.8648661136176574e-05, "loss": 2.5222, "step": 2384000 }, { "epoch": 4.54, "learning_rate": 3.8646284257537426e-05, "loss": 2.5321, "step": 2384500 }, { "epoch": 4.54, "learning_rate": 3.864390261561442e-05, "loss": 2.5369, "step": 2385000 }, { "epoch": 4.55, "learning_rate": 3.864152097369143e-05, "loss": 2.5486, "step": 2385500 }, { "epoch": 4.55, "learning_rate": 3.8639139331768434e-05, "loss": 2.5418, "step": 2386000 }, { "epoch": 4.55, "learning_rate": 3.863675768984544e-05, "loss": 2.527, "step": 2386500 }, { "epoch": 4.55, "learning_rate": 3.863437604792245e-05, "loss": 2.5482, "step": 2387000 }, { "epoch": 4.55, "learning_rate": 3.8631994405999455e-05, "loss": 2.5579, "step": 2387500 }, { "epoch": 4.55, "learning_rate": 3.862961276407646e-05, "loss": 2.5556, "step": 2388000 }, { "epoch": 4.55, "learning_rate": 3.862723588543731e-05, "loss": 2.5312, "step": 2388500 }, { "epoch": 4.55, "learning_rate": 3.862485424351431e-05, "loss": 2.5366, "step": 2389000 }, { "epoch": 4.55, "learning_rate": 3.862247260159132e-05, "loss": 2.5387, "step": 2389500 }, { "epoch": 4.55, "learning_rate": 3.8620090959668325e-05, "loss": 2.5469, "step": 2390000 }, { "epoch": 4.55, "learning_rate": 3.8617709317745334e-05, "loss": 2.5549, "step": 2390500 }, { "epoch": 4.56, "learning_rate": 3.8615332439106186e-05, "loss": 2.5536, "step": 2391000 }, { "epoch": 4.56, "learning_rate": 3.861295079718318e-05, "loss": 2.5304, "step": 2391500 }, { "epoch": 4.56, "learning_rate": 3.861056915526019e-05, "loss": 2.5557, "step": 2392000 }, { "epoch": 4.56, "learning_rate": 3.8608187513337194e-05, "loss": 2.5686, "step": 2392500 }, { "epoch": 4.56, "learning_rate": 3.860581063469805e-05, "loss": 2.5476, "step": 2393000 }, { "epoch": 4.56, "learning_rate": 3.86034337560589e-05, "loss": 2.5558, "step": 2393500 }, { "epoch": 4.56, "learning_rate": 3.860105211413591e-05, "loss": 2.5347, "step": 2394000 }, { "epoch": 4.56, "learning_rate": 3.859867047221291e-05, "loss": 2.5187, "step": 2394500 }, { "epoch": 4.56, "learning_rate": 3.8596288830289914e-05, "loss": 2.5346, "step": 2395000 }, { "epoch": 4.56, "learning_rate": 3.8593907188366916e-05, "loss": 2.5205, "step": 2395500 }, { "epoch": 4.57, "learning_rate": 3.8591525546443926e-05, "loss": 2.542, "step": 2396000 }, { "epoch": 4.57, "learning_rate": 3.858914390452093e-05, "loss": 2.5467, "step": 2396500 }, { "epoch": 4.57, "learning_rate": 3.858676702588178e-05, "loss": 2.5525, "step": 2397000 }, { "epoch": 4.57, "learning_rate": 3.858438538395879e-05, "loss": 2.5348, "step": 2397500 }, { "epoch": 4.57, "learning_rate": 3.858200374203579e-05, "loss": 2.5343, "step": 2398000 }, { "epoch": 4.57, "learning_rate": 3.8579622100112795e-05, "loss": 2.5493, "step": 2398500 }, { "epoch": 4.57, "learning_rate": 3.85772404581898e-05, "loss": 2.537, "step": 2399000 }, { "epoch": 4.57, "learning_rate": 3.857485881626681e-05, "loss": 2.5492, "step": 2399500 }, { "epoch": 4.57, "learning_rate": 3.857247717434381e-05, "loss": 2.5311, "step": 2400000 }, { "epoch": 4.57, "eval_accuracy": 0.536739888096921, "eval_loss": 2.4447696208953857, "eval_runtime": 4193.8524, "eval_samples_per_second": 65.569, "eval_steps_per_second": 6.557, "step": 2400000 }, { "epoch": 4.57, "learning_rate": 3.857009553242082e-05, "loss": 2.5525, "step": 2400500 }, { "epoch": 4.57, "learning_rate": 3.856771389049782e-05, "loss": 2.553, "step": 2401000 }, { "epoch": 4.58, "learning_rate": 3.8565337011858674e-05, "loss": 2.5531, "step": 2401500 }, { "epoch": 4.58, "learning_rate": 3.856295536993568e-05, "loss": 2.5398, "step": 2402000 }, { "epoch": 4.58, "learning_rate": 3.8560573728012686e-05, "loss": 2.5411, "step": 2402500 }, { "epoch": 4.58, "learning_rate": 3.855819208608969e-05, "loss": 2.5326, "step": 2403000 }, { "epoch": 4.58, "learning_rate": 3.855581520745054e-05, "loss": 2.5451, "step": 2403500 }, { "epoch": 4.58, "learning_rate": 3.855343832881139e-05, "loss": 2.5521, "step": 2404000 }, { "epoch": 4.58, "learning_rate": 3.8551056686888396e-05, "loss": 2.5721, "step": 2404500 }, { "epoch": 4.58, "learning_rate": 3.85486750449654e-05, "loss": 2.5454, "step": 2405000 }, { "epoch": 4.58, "learning_rate": 3.854629340304241e-05, "loss": 2.5425, "step": 2405500 }, { "epoch": 4.58, "learning_rate": 3.854391176111941e-05, "loss": 2.5488, "step": 2406000 }, { "epoch": 4.59, "learning_rate": 3.854153488248026e-05, "loss": 2.5389, "step": 2406500 }, { "epoch": 4.59, "learning_rate": 3.8539153240557266e-05, "loss": 2.5287, "step": 2407000 }, { "epoch": 4.59, "learning_rate": 3.8536771598634275e-05, "loss": 2.5403, "step": 2407500 }, { "epoch": 4.59, "learning_rate": 3.853438995671128e-05, "loss": 2.5284, "step": 2408000 }, { "epoch": 4.59, "learning_rate": 3.853200831478828e-05, "loss": 2.5618, "step": 2408500 }, { "epoch": 4.59, "learning_rate": 3.852962667286529e-05, "loss": 2.5246, "step": 2409000 }, { "epoch": 4.59, "learning_rate": 3.852724503094229e-05, "loss": 2.548, "step": 2409500 }, { "epoch": 4.59, "learning_rate": 3.85248633890193e-05, "loss": 2.5497, "step": 2410000 }, { "epoch": 4.59, "learning_rate": 3.8522486510380154e-05, "loss": 2.5388, "step": 2410500 }, { "epoch": 4.59, "learning_rate": 3.8520104868457157e-05, "loss": 2.5376, "step": 2411000 }, { "epoch": 4.59, "learning_rate": 3.851772322653416e-05, "loss": 2.5302, "step": 2411500 }, { "epoch": 4.6, "learning_rate": 3.851534158461116e-05, "loss": 2.5464, "step": 2412000 }, { "epoch": 4.6, "learning_rate": 3.851295994268817e-05, "loss": 2.521, "step": 2412500 }, { "epoch": 4.6, "learning_rate": 3.8510578300765174e-05, "loss": 2.5472, "step": 2413000 }, { "epoch": 4.6, "learning_rate": 3.850819665884218e-05, "loss": 2.542, "step": 2413500 }, { "epoch": 4.6, "learning_rate": 3.8505815016919186e-05, "loss": 2.5371, "step": 2414000 }, { "epoch": 4.6, "learning_rate": 3.850343813828003e-05, "loss": 2.5464, "step": 2414500 }, { "epoch": 4.6, "learning_rate": 3.850105649635704e-05, "loss": 2.5443, "step": 2415000 }, { "epoch": 4.6, "learning_rate": 3.849867961771789e-05, "loss": 2.5337, "step": 2415500 }, { "epoch": 4.6, "learning_rate": 3.8496297975794896e-05, "loss": 2.5374, "step": 2416000 }, { "epoch": 4.6, "learning_rate": 3.8493916333871905e-05, "loss": 2.5164, "step": 2416500 }, { "epoch": 4.61, "learning_rate": 3.849153469194891e-05, "loss": 2.5355, "step": 2417000 }, { "epoch": 4.61, "learning_rate": 3.848915781330976e-05, "loss": 2.5466, "step": 2417500 }, { "epoch": 4.61, "learning_rate": 3.848677617138676e-05, "loss": 2.5504, "step": 2418000 }, { "epoch": 4.61, "learning_rate": 3.848439452946377e-05, "loss": 2.533, "step": 2418500 }, { "epoch": 4.61, "learning_rate": 3.8482012887540775e-05, "loss": 2.5502, "step": 2419000 }, { "epoch": 4.61, "learning_rate": 3.8479631245617784e-05, "loss": 2.5356, "step": 2419500 }, { "epoch": 4.61, "learning_rate": 3.847724960369479e-05, "loss": 2.5447, "step": 2420000 }, { "epoch": 4.61, "learning_rate": 3.8474867961771796e-05, "loss": 2.5564, "step": 2420500 }, { "epoch": 4.61, "learning_rate": 3.847248631984879e-05, "loss": 2.5511, "step": 2421000 }, { "epoch": 4.61, "learning_rate": 3.84701046779258e-05, "loss": 2.5359, "step": 2421500 }, { "epoch": 4.61, "learning_rate": 3.8467727799286654e-05, "loss": 2.5347, "step": 2422000 }, { "epoch": 4.62, "learning_rate": 3.8465346157363656e-05, "loss": 2.5428, "step": 2422500 }, { "epoch": 4.62, "learning_rate": 3.8462964515440666e-05, "loss": 2.55, "step": 2423000 }, { "epoch": 4.62, "learning_rate": 3.846058287351767e-05, "loss": 2.5595, "step": 2423500 }, { "epoch": 4.62, "learning_rate": 3.845820599487852e-05, "loss": 2.5426, "step": 2424000 }, { "epoch": 4.62, "learning_rate": 3.845582435295552e-05, "loss": 2.5513, "step": 2424500 }, { "epoch": 4.62, "learning_rate": 3.8453442711032526e-05, "loss": 2.5228, "step": 2425000 }, { "epoch": 4.62, "learning_rate": 3.8451061069109535e-05, "loss": 2.5464, "step": 2425500 }, { "epoch": 4.62, "learning_rate": 3.844867942718654e-05, "loss": 2.5548, "step": 2426000 }, { "epoch": 4.62, "learning_rate": 3.844629778526355e-05, "loss": 2.5493, "step": 2426500 }, { "epoch": 4.62, "learning_rate": 3.844391614334055e-05, "loss": 2.5542, "step": 2427000 }, { "epoch": 4.63, "learning_rate": 3.844153450141755e-05, "loss": 2.5417, "step": 2427500 }, { "epoch": 4.63, "learning_rate": 3.843915285949456e-05, "loss": 2.5147, "step": 2428000 }, { "epoch": 4.63, "learning_rate": 3.843677598085541e-05, "loss": 2.5522, "step": 2428500 }, { "epoch": 4.63, "learning_rate": 3.843439433893242e-05, "loss": 2.531, "step": 2429000 }, { "epoch": 4.63, "learning_rate": 3.8432012697009426e-05, "loss": 2.545, "step": 2429500 }, { "epoch": 4.63, "learning_rate": 3.842963105508643e-05, "loss": 2.5652, "step": 2430000 }, { "epoch": 4.63, "eval_accuracy": 0.5365613003697621, "eval_loss": 2.4436495304107666, "eval_runtime": 4203.4565, "eval_samples_per_second": 65.419, "eval_steps_per_second": 6.542, "step": 2430000 }, { "epoch": 4.63, "learning_rate": 3.842725417644728e-05, "loss": 2.5487, "step": 2430500 }, { "epoch": 4.63, "learning_rate": 3.8424872534524284e-05, "loss": 2.5392, "step": 2431000 }, { "epoch": 4.63, "learning_rate": 3.8422490892601286e-05, "loss": 2.5442, "step": 2431500 }, { "epoch": 4.63, "learning_rate": 3.8420109250678296e-05, "loss": 2.5349, "step": 2432000 }, { "epoch": 4.63, "learning_rate": 3.84177276087553e-05, "loss": 2.538, "step": 2432500 }, { "epoch": 4.64, "learning_rate": 3.841534596683231e-05, "loss": 2.5534, "step": 2433000 }, { "epoch": 4.64, "learning_rate": 3.8412964324909304e-05, "loss": 2.5281, "step": 2433500 }, { "epoch": 4.64, "learning_rate": 3.841058268298631e-05, "loss": 2.5289, "step": 2434000 }, { "epoch": 4.64, "learning_rate": 3.8408205804347165e-05, "loss": 2.5562, "step": 2434500 }, { "epoch": 4.64, "learning_rate": 3.840582416242417e-05, "loss": 2.5358, "step": 2435000 }, { "epoch": 4.64, "learning_rate": 3.840344252050118e-05, "loss": 2.5431, "step": 2435500 }, { "epoch": 4.64, "learning_rate": 3.840106087857818e-05, "loss": 2.5479, "step": 2436000 }, { "epoch": 4.64, "learning_rate": 3.839868399993903e-05, "loss": 2.5395, "step": 2436500 }, { "epoch": 4.64, "learning_rate": 3.839630235801604e-05, "loss": 2.5285, "step": 2437000 }, { "epoch": 4.64, "learning_rate": 3.839392071609304e-05, "loss": 2.5371, "step": 2437500 }, { "epoch": 4.65, "learning_rate": 3.839153907417005e-05, "loss": 2.5324, "step": 2438000 }, { "epoch": 4.65, "learning_rate": 3.83891621955309e-05, "loss": 2.5611, "step": 2438500 }, { "epoch": 4.65, "learning_rate": 3.83867805536079e-05, "loss": 2.5229, "step": 2439000 }, { "epoch": 4.65, "learning_rate": 3.838439891168491e-05, "loss": 2.5618, "step": 2439500 }, { "epoch": 4.65, "learning_rate": 3.8382017269761914e-05, "loss": 2.548, "step": 2440000 }, { "epoch": 4.65, "learning_rate": 3.8379635627838917e-05, "loss": 2.5343, "step": 2440500 }, { "epoch": 4.65, "learning_rate": 3.8377253985915926e-05, "loss": 2.5367, "step": 2441000 }, { "epoch": 4.65, "learning_rate": 3.837487234399293e-05, "loss": 2.5523, "step": 2441500 }, { "epoch": 4.65, "learning_rate": 3.837249070206994e-05, "loss": 2.5333, "step": 2442000 }, { "epoch": 4.65, "learning_rate": 3.8370113823430783e-05, "loss": 2.5556, "step": 2442500 }, { "epoch": 4.65, "learning_rate": 3.836773218150779e-05, "loss": 2.5444, "step": 2443000 }, { "epoch": 4.66, "learning_rate": 3.8365350539584796e-05, "loss": 2.5263, "step": 2443500 }, { "epoch": 4.66, "learning_rate": 3.83629688976618e-05, "loss": 2.5427, "step": 2444000 }, { "epoch": 4.66, "learning_rate": 3.836059201902265e-05, "loss": 2.524, "step": 2444500 }, { "epoch": 4.66, "learning_rate": 3.835821037709966e-05, "loss": 2.508, "step": 2445000 }, { "epoch": 4.66, "learning_rate": 3.8355833498460505e-05, "loss": 2.5338, "step": 2445500 }, { "epoch": 4.66, "learning_rate": 3.8353451856537515e-05, "loss": 2.5365, "step": 2446000 }, { "epoch": 4.66, "learning_rate": 3.835107021461452e-05, "loss": 2.5282, "step": 2446500 }, { "epoch": 4.66, "learning_rate": 3.834868857269152e-05, "loss": 2.5496, "step": 2447000 }, { "epoch": 4.66, "learning_rate": 3.834630693076853e-05, "loss": 2.5475, "step": 2447500 }, { "epoch": 4.66, "learning_rate": 3.834392528884553e-05, "loss": 2.5467, "step": 2448000 }, { "epoch": 4.67, "learning_rate": 3.834154364692254e-05, "loss": 2.545, "step": 2448500 }, { "epoch": 4.67, "learning_rate": 3.8339162004999544e-05, "loss": 2.5405, "step": 2449000 }, { "epoch": 4.67, "learning_rate": 3.8336780363076553e-05, "loss": 2.5575, "step": 2449500 }, { "epoch": 4.67, "learning_rate": 3.8334403484437406e-05, "loss": 2.5496, "step": 2450000 }, { "epoch": 4.67, "learning_rate": 3.83320218425144e-05, "loss": 2.5373, "step": 2450500 }, { "epoch": 4.67, "learning_rate": 3.832964020059141e-05, "loss": 2.5347, "step": 2451000 }, { "epoch": 4.67, "learning_rate": 3.8327258558668414e-05, "loss": 2.5353, "step": 2451500 }, { "epoch": 4.67, "learning_rate": 3.8324881680029266e-05, "loss": 2.5497, "step": 2452000 }, { "epoch": 4.67, "learning_rate": 3.8322500038106275e-05, "loss": 2.5512, "step": 2452500 }, { "epoch": 4.67, "learning_rate": 3.832011839618328e-05, "loss": 2.5322, "step": 2453000 }, { "epoch": 4.67, "learning_rate": 3.831773675426028e-05, "loss": 2.5563, "step": 2453500 }, { "epoch": 4.68, "learning_rate": 3.831535987562113e-05, "loss": 2.5377, "step": 2454000 }, { "epoch": 4.68, "learning_rate": 3.8312982996981985e-05, "loss": 2.5341, "step": 2454500 }, { "epoch": 4.68, "learning_rate": 3.831060135505899e-05, "loss": 2.5472, "step": 2455000 }, { "epoch": 4.68, "learning_rate": 3.8308219713136e-05, "loss": 2.5363, "step": 2455500 }, { "epoch": 4.68, "learning_rate": 3.8305838071213e-05, "loss": 2.5176, "step": 2456000 }, { "epoch": 4.68, "learning_rate": 3.830345642929001e-05, "loss": 2.5341, "step": 2456500 }, { "epoch": 4.68, "learning_rate": 3.8301074787367005e-05, "loss": 2.5309, "step": 2457000 }, { "epoch": 4.68, "learning_rate": 3.8298693145444014e-05, "loss": 2.5382, "step": 2457500 }, { "epoch": 4.68, "learning_rate": 3.8296311503521024e-05, "loss": 2.545, "step": 2458000 }, { "epoch": 4.68, "learning_rate": 3.829393462488187e-05, "loss": 2.5273, "step": 2458500 }, { "epoch": 4.69, "learning_rate": 3.829155298295888e-05, "loss": 2.5307, "step": 2459000 }, { "epoch": 4.69, "learning_rate": 3.828917134103588e-05, "loss": 2.5399, "step": 2459500 }, { "epoch": 4.69, "learning_rate": 3.828678969911289e-05, "loss": 2.558, "step": 2460000 }, { "epoch": 4.69, "eval_accuracy": 0.5368834258630975, "eval_loss": 2.442981004714966, "eval_runtime": 4208.1144, "eval_samples_per_second": 65.347, "eval_steps_per_second": 6.535, "step": 2460000 }, { "epoch": 4.69, "learning_rate": 3.8284412820473736e-05, "loss": 2.561, "step": 2460500 }, { "epoch": 4.69, "learning_rate": 3.8282031178550746e-05, "loss": 2.5196, "step": 2461000 }, { "epoch": 4.69, "learning_rate": 3.827965429991159e-05, "loss": 2.5477, "step": 2461500 }, { "epoch": 4.69, "learning_rate": 3.82772726579886e-05, "loss": 2.5428, "step": 2462000 }, { "epoch": 4.69, "learning_rate": 3.82748910160656e-05, "loss": 2.5464, "step": 2462500 }, { "epoch": 4.69, "learning_rate": 3.827250937414261e-05, "loss": 2.55, "step": 2463000 }, { "epoch": 4.69, "learning_rate": 3.8270127732219615e-05, "loss": 2.5333, "step": 2463500 }, { "epoch": 4.69, "learning_rate": 3.826774609029662e-05, "loss": 2.5237, "step": 2464000 }, { "epoch": 4.7, "learning_rate": 3.826536444837363e-05, "loss": 2.5342, "step": 2464500 }, { "epoch": 4.7, "learning_rate": 3.826298280645063e-05, "loss": 2.5708, "step": 2465000 }, { "epoch": 4.7, "learning_rate": 3.826060592781148e-05, "loss": 2.5445, "step": 2465500 }, { "epoch": 4.7, "learning_rate": 3.825822428588849e-05, "loss": 2.516, "step": 2466000 }, { "epoch": 4.7, "learning_rate": 3.8255842643965494e-05, "loss": 2.5206, "step": 2466500 }, { "epoch": 4.7, "learning_rate": 3.82534610020425e-05, "loss": 2.5269, "step": 2467000 }, { "epoch": 4.7, "learning_rate": 3.82510793601195e-05, "loss": 2.5338, "step": 2467500 }, { "epoch": 4.7, "learning_rate": 3.824870248148035e-05, "loss": 2.5572, "step": 2468000 }, { "epoch": 4.7, "learning_rate": 3.824632083955736e-05, "loss": 2.519, "step": 2468500 }, { "epoch": 4.7, "learning_rate": 3.8243939197634364e-05, "loss": 2.5278, "step": 2469000 }, { "epoch": 4.71, "learning_rate": 3.824155755571137e-05, "loss": 2.5357, "step": 2469500 }, { "epoch": 4.71, "learning_rate": 3.8239180677072225e-05, "loss": 2.5574, "step": 2470000 }, { "epoch": 4.71, "learning_rate": 3.823679903514922e-05, "loss": 2.5606, "step": 2470500 }, { "epoch": 4.71, "learning_rate": 3.8234422156510074e-05, "loss": 2.548, "step": 2471000 }, { "epoch": 4.71, "learning_rate": 3.823204051458708e-05, "loss": 2.5354, "step": 2471500 }, { "epoch": 4.71, "learning_rate": 3.8229658872664086e-05, "loss": 2.5482, "step": 2472000 }, { "epoch": 4.71, "learning_rate": 3.8227277230741095e-05, "loss": 2.5332, "step": 2472500 }, { "epoch": 4.71, "learning_rate": 3.82248955888181e-05, "loss": 2.5323, "step": 2473000 }, { "epoch": 4.71, "learning_rate": 3.822251394689511e-05, "loss": 2.5308, "step": 2473500 }, { "epoch": 4.71, "learning_rate": 3.82201323049721e-05, "loss": 2.5444, "step": 2474000 }, { "epoch": 4.71, "learning_rate": 3.821775066304911e-05, "loss": 2.5326, "step": 2474500 }, { "epoch": 4.72, "learning_rate": 3.8215373784409965e-05, "loss": 2.5509, "step": 2475000 }, { "epoch": 4.72, "learning_rate": 3.821299214248697e-05, "loss": 2.5431, "step": 2475500 }, { "epoch": 4.72, "learning_rate": 3.821061050056398e-05, "loss": 2.5395, "step": 2476000 }, { "epoch": 4.72, "learning_rate": 3.820822885864098e-05, "loss": 2.5392, "step": 2476500 }, { "epoch": 4.72, "learning_rate": 3.820584721671798e-05, "loss": 2.5509, "step": 2477000 }, { "epoch": 4.72, "learning_rate": 3.820346557479499e-05, "loss": 2.5395, "step": 2477500 }, { "epoch": 4.72, "learning_rate": 3.8201083932871994e-05, "loss": 2.5331, "step": 2478000 }, { "epoch": 4.72, "learning_rate": 3.8198702290949e-05, "loss": 2.5465, "step": 2478500 }, { "epoch": 4.72, "learning_rate": 3.8196325412309856e-05, "loss": 2.5506, "step": 2479000 }, { "epoch": 4.72, "learning_rate": 3.819394377038686e-05, "loss": 2.5348, "step": 2479500 }, { "epoch": 4.73, "learning_rate": 3.819156212846386e-05, "loss": 2.5264, "step": 2480000 }, { "epoch": 4.73, "learning_rate": 3.818918524982471e-05, "loss": 2.5377, "step": 2480500 }, { "epoch": 4.73, "learning_rate": 3.8186803607901716e-05, "loss": 2.5336, "step": 2481000 }, { "epoch": 4.73, "learning_rate": 3.8184421965978725e-05, "loss": 2.5364, "step": 2481500 }, { "epoch": 4.73, "learning_rate": 3.818204032405573e-05, "loss": 2.541, "step": 2482000 }, { "epoch": 4.73, "learning_rate": 3.817965868213274e-05, "loss": 2.5331, "step": 2482500 }, { "epoch": 4.73, "learning_rate": 3.817728180349359e-05, "loss": 2.5336, "step": 2483000 }, { "epoch": 4.73, "learning_rate": 3.817490016157059e-05, "loss": 2.5421, "step": 2483500 }, { "epoch": 4.73, "learning_rate": 3.8172518519647595e-05, "loss": 2.5554, "step": 2484000 }, { "epoch": 4.73, "learning_rate": 3.81701368777246e-05, "loss": 2.5232, "step": 2484500 }, { "epoch": 4.73, "learning_rate": 3.816775523580161e-05, "loss": 2.5403, "step": 2485000 }, { "epoch": 4.74, "learning_rate": 3.816537359387861e-05, "loss": 2.5416, "step": 2485500 }, { "epoch": 4.74, "learning_rate": 3.816299195195562e-05, "loss": 2.5613, "step": 2486000 }, { "epoch": 4.74, "learning_rate": 3.816061031003262e-05, "loss": 2.5609, "step": 2486500 }, { "epoch": 4.74, "learning_rate": 3.815823343139347e-05, "loss": 2.5339, "step": 2487000 }, { "epoch": 4.74, "learning_rate": 3.8155851789470476e-05, "loss": 2.5529, "step": 2487500 }, { "epoch": 4.74, "learning_rate": 3.815347014754748e-05, "loss": 2.5427, "step": 2488000 }, { "epoch": 4.74, "learning_rate": 3.815108850562449e-05, "loss": 2.5278, "step": 2488500 }, { "epoch": 4.74, "learning_rate": 3.814870686370149e-05, "loss": 2.5452, "step": 2489000 }, { "epoch": 4.74, "learning_rate": 3.814632998506234e-05, "loss": 2.5305, "step": 2489500 }, { "epoch": 4.74, "learning_rate": 3.8143948343139346e-05, "loss": 2.5339, "step": 2490000 }, { "epoch": 4.74, "eval_accuracy": 0.5369715235490023, "eval_loss": 2.4417076110839844, "eval_runtime": 4201.885, "eval_samples_per_second": 65.444, "eval_steps_per_second": 6.544, "step": 2490000 }, { "epoch": 4.75, "learning_rate": 3.8141566701216355e-05, "loss": 2.5373, "step": 2490500 }, { "epoch": 4.75, "learning_rate": 3.813918505929336e-05, "loss": 2.53, "step": 2491000 }, { "epoch": 4.75, "learning_rate": 3.813680341737037e-05, "loss": 2.557, "step": 2491500 }, { "epoch": 4.75, "learning_rate": 3.813442177544737e-05, "loss": 2.5437, "step": 2492000 }, { "epoch": 4.75, "learning_rate": 3.813204013352438e-05, "loss": 2.5477, "step": 2492500 }, { "epoch": 4.75, "learning_rate": 3.8129658491601375e-05, "loss": 2.546, "step": 2493000 }, { "epoch": 4.75, "learning_rate": 3.8127276849678385e-05, "loss": 2.5569, "step": 2493500 }, { "epoch": 4.75, "learning_rate": 3.812489997103924e-05, "loss": 2.52, "step": 2494000 }, { "epoch": 4.75, "learning_rate": 3.812251832911624e-05, "loss": 2.5489, "step": 2494500 }, { "epoch": 4.75, "learning_rate": 3.812013668719325e-05, "loss": 2.541, "step": 2495000 }, { "epoch": 4.75, "learning_rate": 3.811775504527025e-05, "loss": 2.5218, "step": 2495500 }, { "epoch": 4.76, "learning_rate": 3.811538292991495e-05, "loss": 2.5697, "step": 2496000 }, { "epoch": 4.76, "learning_rate": 3.8113001287991956e-05, "loss": 2.5279, "step": 2496500 }, { "epoch": 4.76, "learning_rate": 3.811061964606896e-05, "loss": 2.5383, "step": 2497000 }, { "epoch": 4.76, "learning_rate": 3.810823800414596e-05, "loss": 2.5473, "step": 2497500 }, { "epoch": 4.76, "learning_rate": 3.810585636222297e-05, "loss": 2.5389, "step": 2498000 }, { "epoch": 4.76, "learning_rate": 3.8103474720299973e-05, "loss": 2.5356, "step": 2498500 }, { "epoch": 4.76, "learning_rate": 3.810109307837698e-05, "loss": 2.5492, "step": 2499000 }, { "epoch": 4.76, "learning_rate": 3.8098711436453985e-05, "loss": 2.5548, "step": 2499500 }, { "epoch": 4.76, "learning_rate": 3.809633455781483e-05, "loss": 2.5423, "step": 2500000 }, { "epoch": 4.76, "learning_rate": 3.809395291589184e-05, "loss": 2.5367, "step": 2500500 }, { "epoch": 4.77, "learning_rate": 3.809157127396884e-05, "loss": 2.5481, "step": 2501000 }, { "epoch": 4.77, "learning_rate": 3.808918963204585e-05, "loss": 2.5479, "step": 2501500 }, { "epoch": 4.77, "learning_rate": 3.8086807990122855e-05, "loss": 2.5638, "step": 2502000 }, { "epoch": 4.77, "learning_rate": 3.8084426348199864e-05, "loss": 2.5277, "step": 2502500 }, { "epoch": 4.77, "learning_rate": 3.808204470627687e-05, "loss": 2.5266, "step": 2503000 }, { "epoch": 4.77, "learning_rate": 3.807966306435387e-05, "loss": 2.5333, "step": 2503500 }, { "epoch": 4.77, "learning_rate": 3.807728142243088e-05, "loss": 2.5305, "step": 2504000 }, { "epoch": 4.77, "learning_rate": 3.807490454379173e-05, "loss": 2.5403, "step": 2504500 }, { "epoch": 4.77, "learning_rate": 3.807252766515258e-05, "loss": 2.5414, "step": 2505000 }, { "epoch": 4.77, "learning_rate": 3.8070146023229586e-05, "loss": 2.5297, "step": 2505500 }, { "epoch": 4.77, "learning_rate": 3.806776438130659e-05, "loss": 2.538, "step": 2506000 }, { "epoch": 4.78, "learning_rate": 3.806538273938359e-05, "loss": 2.5361, "step": 2506500 }, { "epoch": 4.78, "learning_rate": 3.80630010974606e-05, "loss": 2.5425, "step": 2507000 }, { "epoch": 4.78, "learning_rate": 3.806062421882145e-05, "loss": 2.5246, "step": 2507500 }, { "epoch": 4.78, "learning_rate": 3.8058242576898456e-05, "loss": 2.5393, "step": 2508000 }, { "epoch": 4.78, "learning_rate": 3.8055860934975465e-05, "loss": 2.5316, "step": 2508500 }, { "epoch": 4.78, "learning_rate": 3.805347929305247e-05, "loss": 2.5427, "step": 2509000 }, { "epoch": 4.78, "learning_rate": 3.805109765112947e-05, "loss": 2.541, "step": 2509500 }, { "epoch": 4.78, "learning_rate": 3.804871600920647e-05, "loss": 2.542, "step": 2510000 }, { "epoch": 4.78, "learning_rate": 3.804633436728348e-05, "loss": 2.5526, "step": 2510500 }, { "epoch": 4.78, "learning_rate": 3.8043952725360485e-05, "loss": 2.537, "step": 2511000 }, { "epoch": 4.79, "learning_rate": 3.8041571083437495e-05, "loss": 2.5371, "step": 2511500 }, { "epoch": 4.79, "learning_rate": 3.803919896808219e-05, "loss": 2.5469, "step": 2512000 }, { "epoch": 4.79, "learning_rate": 3.80368173261592e-05, "loss": 2.5493, "step": 2512500 }, { "epoch": 4.79, "learning_rate": 3.8034435684236195e-05, "loss": 2.5423, "step": 2513000 }, { "epoch": 4.79, "learning_rate": 3.8032054042313204e-05, "loss": 2.5415, "step": 2513500 }, { "epoch": 4.79, "learning_rate": 3.802967240039021e-05, "loss": 2.5213, "step": 2514000 }, { "epoch": 4.79, "learning_rate": 3.8027290758467216e-05, "loss": 2.5346, "step": 2514500 }, { "epoch": 4.79, "learning_rate": 3.802490911654422e-05, "loss": 2.5274, "step": 2515000 }, { "epoch": 4.79, "learning_rate": 3.802253223790507e-05, "loss": 2.5532, "step": 2515500 }, { "epoch": 4.79, "learning_rate": 3.8020155359265924e-05, "loss": 2.5527, "step": 2516000 }, { "epoch": 4.79, "learning_rate": 3.801777371734293e-05, "loss": 2.5247, "step": 2516500 }, { "epoch": 4.8, "learning_rate": 3.801539207541993e-05, "loss": 2.5365, "step": 2517000 }, { "epoch": 4.8, "learning_rate": 3.801301043349694e-05, "loss": 2.5419, "step": 2517500 }, { "epoch": 4.8, "learning_rate": 3.801062879157394e-05, "loss": 2.5279, "step": 2518000 }, { "epoch": 4.8, "learning_rate": 3.800824714965095e-05, "loss": 2.5294, "step": 2518500 }, { "epoch": 4.8, "learning_rate": 3.800586550772795e-05, "loss": 2.5437, "step": 2519000 }, { "epoch": 4.8, "learning_rate": 3.8003483865804956e-05, "loss": 2.5483, "step": 2519500 }, { "epoch": 4.8, "learning_rate": 3.8001102223881965e-05, "loss": 2.5369, "step": 2520000 }, { "epoch": 4.8, "eval_accuracy": 0.5372064933685687, "eval_loss": 2.4428091049194336, "eval_runtime": 4203.7529, "eval_samples_per_second": 65.415, "eval_steps_per_second": 6.542, "step": 2520000 }, { "epoch": 4.8, "learning_rate": 3.799872058195897e-05, "loss": 2.531, "step": 2520500 }, { "epoch": 4.8, "learning_rate": 3.799633894003598e-05, "loss": 2.529, "step": 2521000 }, { "epoch": 4.8, "learning_rate": 3.799395729811298e-05, "loss": 2.545, "step": 2521500 }, { "epoch": 4.81, "learning_rate": 3.799158041947383e-05, "loss": 2.5421, "step": 2522000 }, { "epoch": 4.81, "learning_rate": 3.7989198777550835e-05, "loss": 2.543, "step": 2522500 }, { "epoch": 4.81, "learning_rate": 3.798681713562784e-05, "loss": 2.5304, "step": 2523000 }, { "epoch": 4.81, "learning_rate": 3.798444025698869e-05, "loss": 2.5456, "step": 2523500 }, { "epoch": 4.81, "learning_rate": 3.79820586150657e-05, "loss": 2.5312, "step": 2524000 }, { "epoch": 4.81, "learning_rate": 3.79796769731427e-05, "loss": 2.5404, "step": 2524500 }, { "epoch": 4.81, "learning_rate": 3.797729533121971e-05, "loss": 2.5486, "step": 2525000 }, { "epoch": 4.81, "learning_rate": 3.7974913689296714e-05, "loss": 2.5486, "step": 2525500 }, { "epoch": 4.81, "learning_rate": 3.7972536810657566e-05, "loss": 2.5443, "step": 2526000 }, { "epoch": 4.81, "learning_rate": 3.797015516873457e-05, "loss": 2.5296, "step": 2526500 }, { "epoch": 4.81, "learning_rate": 3.796777352681157e-05, "loss": 2.5409, "step": 2527000 }, { "epoch": 4.82, "learning_rate": 3.796539188488858e-05, "loss": 2.5243, "step": 2527500 }, { "epoch": 4.82, "learning_rate": 3.796301024296558e-05, "loss": 2.5564, "step": 2528000 }, { "epoch": 4.82, "learning_rate": 3.796062860104259e-05, "loss": 2.5454, "step": 2528500 }, { "epoch": 4.82, "learning_rate": 3.7958246959119595e-05, "loss": 2.5576, "step": 2529000 }, { "epoch": 4.82, "learning_rate": 3.79558653171966e-05, "loss": 2.5429, "step": 2529500 }, { "epoch": 4.82, "learning_rate": 3.795348367527361e-05, "loss": 2.5461, "step": 2530000 }, { "epoch": 4.82, "learning_rate": 3.795110679663445e-05, "loss": 2.5555, "step": 2530500 }, { "epoch": 4.82, "learning_rate": 3.794872515471146e-05, "loss": 2.5363, "step": 2531000 }, { "epoch": 4.82, "learning_rate": 3.794634351278847e-05, "loss": 2.5421, "step": 2531500 }, { "epoch": 4.82, "learning_rate": 3.794396187086547e-05, "loss": 2.5553, "step": 2532000 }, { "epoch": 4.83, "learning_rate": 3.794158975551017e-05, "loss": 2.5582, "step": 2532500 }, { "epoch": 4.83, "learning_rate": 3.793920811358717e-05, "loss": 2.5334, "step": 2533000 }, { "epoch": 4.83, "learning_rate": 3.7936826471664175e-05, "loss": 2.5472, "step": 2533500 }, { "epoch": 4.83, "learning_rate": 3.7934444829741184e-05, "loss": 2.5431, "step": 2534000 }, { "epoch": 4.83, "learning_rate": 3.7932063187818187e-05, "loss": 2.5442, "step": 2534500 }, { "epoch": 4.83, "learning_rate": 3.7929681545895196e-05, "loss": 2.5264, "step": 2535000 }, { "epoch": 4.83, "learning_rate": 3.7927299903972205e-05, "loss": 2.5472, "step": 2535500 }, { "epoch": 4.83, "learning_rate": 3.79249182620492e-05, "loss": 2.5313, "step": 2536000 }, { "epoch": 4.83, "learning_rate": 3.792253662012621e-05, "loss": 2.545, "step": 2536500 }, { "epoch": 4.83, "learning_rate": 3.792015974148706e-05, "loss": 2.5485, "step": 2537000 }, { "epoch": 4.83, "learning_rate": 3.7917778099564066e-05, "loss": 2.5452, "step": 2537500 }, { "epoch": 4.84, "learning_rate": 3.7915396457641075e-05, "loss": 2.5186, "step": 2538000 }, { "epoch": 4.84, "learning_rate": 3.791301481571808e-05, "loss": 2.5394, "step": 2538500 }, { "epoch": 4.84, "learning_rate": 3.791063793707893e-05, "loss": 2.5534, "step": 2539000 }, { "epoch": 4.84, "learning_rate": 3.790825629515593e-05, "loss": 2.5348, "step": 2539500 }, { "epoch": 4.84, "learning_rate": 3.7905874653232935e-05, "loss": 2.5447, "step": 2540000 }, { "epoch": 4.84, "learning_rate": 3.7903493011309944e-05, "loss": 2.5455, "step": 2540500 }, { "epoch": 4.84, "learning_rate": 3.79011161326708e-05, "loss": 2.5154, "step": 2541000 }, { "epoch": 4.84, "learning_rate": 3.789873925403164e-05, "loss": 2.5498, "step": 2541500 }, { "epoch": 4.84, "learning_rate": 3.789635761210865e-05, "loss": 2.5405, "step": 2542000 }, { "epoch": 4.84, "learning_rate": 3.789397597018566e-05, "loss": 2.5151, "step": 2542500 }, { "epoch": 4.85, "learning_rate": 3.789159432826266e-05, "loss": 2.5344, "step": 2543000 }, { "epoch": 4.85, "learning_rate": 3.7889212686339666e-05, "loss": 2.5376, "step": 2543500 }, { "epoch": 4.85, "learning_rate": 3.788683104441667e-05, "loss": 2.5109, "step": 2544000 }, { "epoch": 4.85, "learning_rate": 3.788444940249368e-05, "loss": 2.5534, "step": 2544500 }, { "epoch": 4.85, "learning_rate": 3.788207252385453e-05, "loss": 2.5478, "step": 2545000 }, { "epoch": 4.85, "learning_rate": 3.787969088193153e-05, "loss": 2.5323, "step": 2545500 }, { "epoch": 4.85, "learning_rate": 3.7877309240008536e-05, "loss": 2.5428, "step": 2546000 }, { "epoch": 4.85, "learning_rate": 3.787492759808554e-05, "loss": 2.5569, "step": 2546500 }, { "epoch": 4.85, "learning_rate": 3.787254595616255e-05, "loss": 2.5489, "step": 2547000 }, { "epoch": 4.85, "learning_rate": 3.787016431423955e-05, "loss": 2.557, "step": 2547500 }, { "epoch": 4.85, "learning_rate": 3.786778267231656e-05, "loss": 2.5466, "step": 2548000 }, { "epoch": 4.86, "learning_rate": 3.786540103039356e-05, "loss": 2.5322, "step": 2548500 }, { "epoch": 4.86, "learning_rate": 3.7863024151754415e-05, "loss": 2.5222, "step": 2549000 }, { "epoch": 4.86, "learning_rate": 3.786064250983142e-05, "loss": 2.5452, "step": 2549500 }, { "epoch": 4.86, "learning_rate": 3.785826563119227e-05, "loss": 2.5369, "step": 2550000 }, { "epoch": 4.86, "eval_accuracy": 0.5374264265415879, "eval_loss": 2.4390408992767334, "eval_runtime": 4211.1347, "eval_samples_per_second": 65.3, "eval_steps_per_second": 6.53, "step": 2550000 }, { "epoch": 4.86, "learning_rate": 3.785588398926927e-05, "loss": 2.5389, "step": 2550500 }, { "epoch": 4.86, "learning_rate": 3.785350234734628e-05, "loss": 2.5407, "step": 2551000 }, { "epoch": 4.86, "learning_rate": 3.7851120705423284e-05, "loss": 2.5599, "step": 2551500 }, { "epoch": 4.86, "learning_rate": 3.7848739063500294e-05, "loss": 2.5457, "step": 2552000 }, { "epoch": 4.86, "learning_rate": 3.7846357421577296e-05, "loss": 2.5466, "step": 2552500 }, { "epoch": 4.86, "learning_rate": 3.78439757796543e-05, "loss": 2.5444, "step": 2553000 }, { "epoch": 4.87, "learning_rate": 3.784159413773131e-05, "loss": 2.5426, "step": 2553500 }, { "epoch": 4.87, "learning_rate": 3.783921725909216e-05, "loss": 2.5551, "step": 2554000 }, { "epoch": 4.87, "learning_rate": 3.7836835617169163e-05, "loss": 2.5322, "step": 2554500 }, { "epoch": 4.87, "learning_rate": 3.783445397524617e-05, "loss": 2.5495, "step": 2555000 }, { "epoch": 4.87, "learning_rate": 3.783207233332317e-05, "loss": 2.5707, "step": 2555500 }, { "epoch": 4.87, "learning_rate": 3.782969069140018e-05, "loss": 2.5465, "step": 2556000 }, { "epoch": 4.87, "learning_rate": 3.782731381276103e-05, "loss": 2.5395, "step": 2556500 }, { "epoch": 4.87, "learning_rate": 3.782493693412188e-05, "loss": 2.5605, "step": 2557000 }, { "epoch": 4.87, "learning_rate": 3.7822555292198885e-05, "loss": 2.548, "step": 2557500 }, { "epoch": 4.87, "learning_rate": 3.7820173650275895e-05, "loss": 2.5524, "step": 2558000 }, { "epoch": 4.87, "learning_rate": 3.78177920083529e-05, "loss": 2.5469, "step": 2558500 }, { "epoch": 4.88, "learning_rate": 3.781541036642991e-05, "loss": 2.5456, "step": 2559000 }, { "epoch": 4.88, "learning_rate": 3.78130287245069e-05, "loss": 2.5469, "step": 2559500 }, { "epoch": 4.88, "learning_rate": 3.781064708258391e-05, "loss": 2.5348, "step": 2560000 }, { "epoch": 4.88, "learning_rate": 3.7808265440660915e-05, "loss": 2.5506, "step": 2560500 }, { "epoch": 4.88, "learning_rate": 3.780588856202177e-05, "loss": 2.5258, "step": 2561000 }, { "epoch": 4.88, "learning_rate": 3.7803506920098776e-05, "loss": 2.5519, "step": 2561500 }, { "epoch": 4.88, "learning_rate": 3.780112527817578e-05, "loss": 2.537, "step": 2562000 }, { "epoch": 4.88, "learning_rate": 3.779874363625278e-05, "loss": 2.5255, "step": 2562500 }, { "epoch": 4.88, "learning_rate": 3.7796366757613634e-05, "loss": 2.5494, "step": 2563000 }, { "epoch": 4.88, "learning_rate": 3.7793989878974486e-05, "loss": 2.5488, "step": 2563500 }, { "epoch": 4.89, "learning_rate": 3.779160823705149e-05, "loss": 2.5304, "step": 2564000 }, { "epoch": 4.89, "learning_rate": 3.77892265951285e-05, "loss": 2.5525, "step": 2564500 }, { "epoch": 4.89, "learning_rate": 3.77868449532055e-05, "loss": 2.5369, "step": 2565000 }, { "epoch": 4.89, "learning_rate": 3.778446807456635e-05, "loss": 2.5534, "step": 2565500 }, { "epoch": 4.89, "learning_rate": 3.778208643264336e-05, "loss": 2.5385, "step": 2566000 }, { "epoch": 4.89, "learning_rate": 3.777970479072036e-05, "loss": 2.537, "step": 2566500 }, { "epoch": 4.89, "learning_rate": 3.777732791208121e-05, "loss": 2.5188, "step": 2567000 }, { "epoch": 4.89, "learning_rate": 3.777494627015822e-05, "loss": 2.5438, "step": 2567500 }, { "epoch": 4.89, "learning_rate": 3.777256462823522e-05, "loss": 2.5328, "step": 2568000 }, { "epoch": 4.89, "learning_rate": 3.777018298631223e-05, "loss": 2.5367, "step": 2568500 }, { "epoch": 4.89, "learning_rate": 3.7767801344389235e-05, "loss": 2.5382, "step": 2569000 }, { "epoch": 4.9, "learning_rate": 3.776541970246624e-05, "loss": 2.554, "step": 2569500 }, { "epoch": 4.9, "learning_rate": 3.776303806054324e-05, "loss": 2.5343, "step": 2570000 }, { "epoch": 4.9, "learning_rate": 3.776065641862025e-05, "loss": 2.553, "step": 2570500 }, { "epoch": 4.9, "learning_rate": 3.775827477669726e-05, "loss": 2.5467, "step": 2571000 }, { "epoch": 4.9, "learning_rate": 3.775589313477426e-05, "loss": 2.5161, "step": 2571500 }, { "epoch": 4.9, "learning_rate": 3.775351149285127e-05, "loss": 2.5285, "step": 2572000 }, { "epoch": 4.9, "learning_rate": 3.7751129850928267e-05, "loss": 2.5498, "step": 2572500 }, { "epoch": 4.9, "learning_rate": 3.774875773557297e-05, "loss": 2.5429, "step": 2573000 }, { "epoch": 4.9, "learning_rate": 3.774637609364997e-05, "loss": 2.5136, "step": 2573500 }, { "epoch": 4.9, "learning_rate": 3.774399445172698e-05, "loss": 2.5261, "step": 2574000 }, { "epoch": 4.91, "learning_rate": 3.774161280980398e-05, "loss": 2.5337, "step": 2574500 }, { "epoch": 4.91, "learning_rate": 3.773923116788099e-05, "loss": 2.5384, "step": 2575000 }, { "epoch": 4.91, "learning_rate": 3.7736849525957995e-05, "loss": 2.5283, "step": 2575500 }, { "epoch": 4.91, "learning_rate": 3.773447264731885e-05, "loss": 2.5412, "step": 2576000 }, { "epoch": 4.91, "learning_rate": 3.773209100539585e-05, "loss": 2.5475, "step": 2576500 }, { "epoch": 4.91, "learning_rate": 3.772970936347285e-05, "loss": 2.5613, "step": 2577000 }, { "epoch": 4.91, "learning_rate": 3.772732772154986e-05, "loss": 2.5435, "step": 2577500 }, { "epoch": 4.91, "learning_rate": 3.7724946079626865e-05, "loss": 2.5268, "step": 2578000 }, { "epoch": 4.91, "learning_rate": 3.772256920098772e-05, "loss": 2.5559, "step": 2578500 }, { "epoch": 4.91, "learning_rate": 3.7720187559064726e-05, "loss": 2.5231, "step": 2579000 }, { "epoch": 4.91, "learning_rate": 3.771780591714172e-05, "loss": 2.5308, "step": 2579500 }, { "epoch": 4.92, "learning_rate": 3.771542427521873e-05, "loss": 2.5259, "step": 2580000 }, { "epoch": 4.92, "eval_accuracy": 0.5375039879106215, "eval_loss": 2.4392075538635254, "eval_runtime": 4207.9559, "eval_samples_per_second": 65.35, "eval_steps_per_second": 6.535, "step": 2580000 }, { "epoch": 4.92, "learning_rate": 3.7713042633295734e-05, "loss": 2.5409, "step": 2580500 }, { "epoch": 4.92, "learning_rate": 3.7710660991372744e-05, "loss": 2.5353, "step": 2581000 }, { "epoch": 4.92, "learning_rate": 3.7708279349449746e-05, "loss": 2.5377, "step": 2581500 }, { "epoch": 4.92, "learning_rate": 3.7705897707526756e-05, "loss": 2.5506, "step": 2582000 }, { "epoch": 4.92, "learning_rate": 3.770352082888761e-05, "loss": 2.5413, "step": 2582500 }, { "epoch": 4.92, "learning_rate": 3.7701139186964604e-05, "loss": 2.5423, "step": 2583000 }, { "epoch": 4.92, "learning_rate": 3.7698762308325456e-05, "loss": 2.5342, "step": 2583500 }, { "epoch": 4.92, "learning_rate": 3.7696380666402466e-05, "loss": 2.5329, "step": 2584000 }, { "epoch": 4.92, "learning_rate": 3.769399902447947e-05, "loss": 2.5378, "step": 2584500 }, { "epoch": 4.93, "learning_rate": 3.769161738255648e-05, "loss": 2.5242, "step": 2585000 }, { "epoch": 4.93, "learning_rate": 3.768923574063348e-05, "loss": 2.5491, "step": 2585500 }, { "epoch": 4.93, "learning_rate": 3.768685409871048e-05, "loss": 2.5384, "step": 2586000 }, { "epoch": 4.93, "learning_rate": 3.768447245678749e-05, "loss": 2.5603, "step": 2586500 }, { "epoch": 4.93, "learning_rate": 3.7682090814864495e-05, "loss": 2.528, "step": 2587000 }, { "epoch": 4.93, "learning_rate": 3.7679709172941504e-05, "loss": 2.532, "step": 2587500 }, { "epoch": 4.93, "learning_rate": 3.767732753101851e-05, "loss": 2.5581, "step": 2588000 }, { "epoch": 4.93, "learning_rate": 3.76749554156632e-05, "loss": 2.5473, "step": 2588500 }, { "epoch": 4.93, "learning_rate": 3.767257377374021e-05, "loss": 2.5472, "step": 2589000 }, { "epoch": 4.93, "learning_rate": 3.7670192131817214e-05, "loss": 2.5544, "step": 2589500 }, { "epoch": 4.93, "learning_rate": 3.766781048989422e-05, "loss": 2.5316, "step": 2590000 }, { "epoch": 4.94, "learning_rate": 3.7665428847971226e-05, "loss": 2.5312, "step": 2590500 }, { "epoch": 4.94, "learning_rate": 3.766304720604823e-05, "loss": 2.541, "step": 2591000 }, { "epoch": 4.94, "learning_rate": 3.766066556412524e-05, "loss": 2.5628, "step": 2591500 }, { "epoch": 4.94, "learning_rate": 3.765828392220224e-05, "loss": 2.5602, "step": 2592000 }, { "epoch": 4.94, "learning_rate": 3.7655907043563086e-05, "loss": 2.525, "step": 2592500 }, { "epoch": 4.94, "learning_rate": 3.7653525401640096e-05, "loss": 2.5484, "step": 2593000 }, { "epoch": 4.94, "learning_rate": 3.76511437597171e-05, "loss": 2.542, "step": 2593500 }, { "epoch": 4.94, "learning_rate": 3.764876211779411e-05, "loss": 2.5437, "step": 2594000 }, { "epoch": 4.94, "learning_rate": 3.764638047587111e-05, "loss": 2.541, "step": 2594500 }, { "epoch": 4.94, "learning_rate": 3.764399883394812e-05, "loss": 2.5387, "step": 2595000 }, { "epoch": 4.95, "learning_rate": 3.764162195530897e-05, "loss": 2.5292, "step": 2595500 }, { "epoch": 4.95, "learning_rate": 3.763924031338597e-05, "loss": 2.5575, "step": 2596000 }, { "epoch": 4.95, "learning_rate": 3.763685867146298e-05, "loss": 2.5336, "step": 2596500 }, { "epoch": 4.95, "learning_rate": 3.763447702953998e-05, "loss": 2.5241, "step": 2597000 }, { "epoch": 4.95, "learning_rate": 3.763209538761699e-05, "loss": 2.5514, "step": 2597500 }, { "epoch": 4.95, "learning_rate": 3.762971850897784e-05, "loss": 2.5448, "step": 2598000 }, { "epoch": 4.95, "learning_rate": 3.7627336867054844e-05, "loss": 2.5041, "step": 2598500 }, { "epoch": 4.95, "learning_rate": 3.762495522513185e-05, "loss": 2.5209, "step": 2599000 }, { "epoch": 4.95, "learning_rate": 3.7622573583208856e-05, "loss": 2.545, "step": 2599500 }, { "epoch": 4.95, "learning_rate": 3.76201967045697e-05, "loss": 2.5281, "step": 2600000 }, { "epoch": 4.95, "learning_rate": 3.7617819825930554e-05, "loss": 2.5235, "step": 2600500 }, { "epoch": 4.96, "learning_rate": 3.7615438184007563e-05, "loss": 2.557, "step": 2601000 }, { "epoch": 4.96, "learning_rate": 3.7613056542084566e-05, "loss": 2.5468, "step": 2601500 }, { "epoch": 4.96, "learning_rate": 3.7610674900161576e-05, "loss": 2.5249, "step": 2602000 }, { "epoch": 4.96, "learning_rate": 3.760829325823858e-05, "loss": 2.5353, "step": 2602500 }, { "epoch": 4.96, "learning_rate": 3.760591161631558e-05, "loss": 2.5367, "step": 2603000 }, { "epoch": 4.96, "learning_rate": 3.760352997439259e-05, "loss": 2.5245, "step": 2603500 }, { "epoch": 4.96, "learning_rate": 3.760114833246959e-05, "loss": 2.5397, "step": 2604000 }, { "epoch": 4.96, "learning_rate": 3.7598771453830445e-05, "loss": 2.5305, "step": 2604500 }, { "epoch": 4.96, "learning_rate": 3.75963945751913e-05, "loss": 2.5382, "step": 2605000 }, { "epoch": 4.96, "learning_rate": 3.75940129332683e-05, "loss": 2.5332, "step": 2605500 }, { "epoch": 4.97, "learning_rate": 3.75916312913453e-05, "loss": 2.5184, "step": 2606000 }, { "epoch": 4.97, "learning_rate": 3.758924964942231e-05, "loss": 2.5378, "step": 2606500 }, { "epoch": 4.97, "learning_rate": 3.7586868007499315e-05, "loss": 2.5253, "step": 2607000 }, { "epoch": 4.97, "learning_rate": 3.7584486365576324e-05, "loss": 2.5433, "step": 2607500 }, { "epoch": 4.97, "learning_rate": 3.758210948693717e-05, "loss": 2.5244, "step": 2608000 }, { "epoch": 4.97, "learning_rate": 3.757972784501418e-05, "loss": 2.5379, "step": 2608500 }, { "epoch": 4.97, "learning_rate": 3.757734620309118e-05, "loss": 2.5246, "step": 2609000 }, { "epoch": 4.97, "learning_rate": 3.7574964561168184e-05, "loss": 2.5474, "step": 2609500 }, { "epoch": 4.97, "learning_rate": 3.7572582919245194e-05, "loss": 2.534, "step": 2610000 }, { "epoch": 4.97, "eval_accuracy": 0.5373068798693941, "eval_loss": 2.4379160404205322, "eval_runtime": 4207.7476, "eval_samples_per_second": 65.353, "eval_steps_per_second": 6.535, "step": 2610000 }, { "epoch": 4.97, "learning_rate": 3.7570201277322196e-05, "loss": 2.5354, "step": 2610500 }, { "epoch": 4.97, "learning_rate": 3.7567819635399206e-05, "loss": 2.5287, "step": 2611000 }, { "epoch": 4.98, "learning_rate": 3.756543799347621e-05, "loss": 2.5368, "step": 2611500 }, { "epoch": 4.98, "learning_rate": 3.756305635155321e-05, "loss": 2.5123, "step": 2612000 }, { "epoch": 4.98, "learning_rate": 3.756067947291406e-05, "loss": 2.5199, "step": 2612500 }, { "epoch": 4.98, "learning_rate": 3.7558297830991066e-05, "loss": 2.5387, "step": 2613000 }, { "epoch": 4.98, "learning_rate": 3.7555916189068075e-05, "loss": 2.5414, "step": 2613500 }, { "epoch": 4.98, "learning_rate": 3.755353454714508e-05, "loss": 2.5315, "step": 2614000 }, { "epoch": 4.98, "learning_rate": 3.755115290522209e-05, "loss": 2.5338, "step": 2614500 }, { "epoch": 4.98, "learning_rate": 3.754877602658294e-05, "loss": 2.5217, "step": 2615000 }, { "epoch": 4.98, "learning_rate": 3.754639438465994e-05, "loss": 2.5421, "step": 2615500 }, { "epoch": 4.98, "learning_rate": 3.7544012742736945e-05, "loss": 2.5395, "step": 2616000 }, { "epoch": 4.99, "learning_rate": 3.75416358640978e-05, "loss": 2.5507, "step": 2616500 }, { "epoch": 4.99, "learning_rate": 3.75392542221748e-05, "loss": 2.531, "step": 2617000 }, { "epoch": 4.99, "learning_rate": 3.753687258025181e-05, "loss": 2.5358, "step": 2617500 }, { "epoch": 4.99, "learning_rate": 3.753449093832881e-05, "loss": 2.5428, "step": 2618000 }, { "epoch": 4.99, "learning_rate": 3.753210929640582e-05, "loss": 2.5447, "step": 2618500 }, { "epoch": 4.99, "learning_rate": 3.7529727654482824e-05, "loss": 2.5355, "step": 2619000 }, { "epoch": 4.99, "learning_rate": 3.752735077584367e-05, "loss": 2.5419, "step": 2619500 }, { "epoch": 4.99, "learning_rate": 3.752497389720452e-05, "loss": 2.5468, "step": 2620000 }, { "epoch": 4.99, "learning_rate": 3.752259225528153e-05, "loss": 2.5302, "step": 2620500 }, { "epoch": 4.99, "learning_rate": 3.7520210613358534e-05, "loss": 2.5532, "step": 2621000 }, { "epoch": 4.99, "learning_rate": 3.751782897143554e-05, "loss": 2.5312, "step": 2621500 }, { "epoch": 5.0, "learning_rate": 3.7515447329512546e-05, "loss": 2.5414, "step": 2622000 }, { "epoch": 5.0, "learning_rate": 3.751306568758955e-05, "loss": 2.5392, "step": 2622500 }, { "epoch": 5.0, "learning_rate": 3.751068404566656e-05, "loss": 2.5513, "step": 2623000 }, { "epoch": 5.0, "learning_rate": 3.750830240374356e-05, "loss": 2.5248, "step": 2623500 }, { "epoch": 5.0, "learning_rate": 3.750592552510441e-05, "loss": 2.5324, "step": 2624000 }, { "epoch": 5.0, "learning_rate": 3.750354388318142e-05, "loss": 2.545, "step": 2624500 }, { "epoch": 5.0, "learning_rate": 3.7501162241258425e-05, "loss": 2.5177, "step": 2625000 }, { "epoch": 5.0, "learning_rate": 3.749878059933543e-05, "loss": 2.5209, "step": 2625500 }, { "epoch": 5.0, "learning_rate": 3.749639895741243e-05, "loss": 2.5335, "step": 2626000 }, { "epoch": 5.0, "learning_rate": 3.749401731548944e-05, "loss": 2.522, "step": 2626500 }, { "epoch": 5.01, "learning_rate": 3.749164043685029e-05, "loss": 2.5701, "step": 2627000 }, { "epoch": 5.01, "learning_rate": 3.7489258794927294e-05, "loss": 2.5225, "step": 2627500 }, { "epoch": 5.01, "learning_rate": 3.7486877153004304e-05, "loss": 2.5248, "step": 2628000 }, { "epoch": 5.01, "learning_rate": 3.7484495511081306e-05, "loss": 2.5487, "step": 2628500 }, { "epoch": 5.01, "learning_rate": 3.748211386915831e-05, "loss": 2.5257, "step": 2629000 }, { "epoch": 5.01, "learning_rate": 3.747973222723531e-05, "loss": 2.5193, "step": 2629500 }, { "epoch": 5.01, "learning_rate": 3.747735058531232e-05, "loss": 2.5129, "step": 2630000 }, { "epoch": 5.01, "learning_rate": 3.747496894338933e-05, "loss": 2.55, "step": 2630500 }, { "epoch": 5.01, "learning_rate": 3.747258730146633e-05, "loss": 2.517, "step": 2631000 }, { "epoch": 5.01, "learning_rate": 3.747021518611103e-05, "loss": 2.5307, "step": 2631500 }, { "epoch": 5.01, "learning_rate": 3.746783354418804e-05, "loss": 2.5323, "step": 2632000 }, { "epoch": 5.02, "learning_rate": 3.746545666554889e-05, "loss": 2.5324, "step": 2632500 }, { "epoch": 5.02, "learning_rate": 3.7463075023625886e-05, "loss": 2.5233, "step": 2633000 }, { "epoch": 5.02, "learning_rate": 3.7460693381702895e-05, "loss": 2.5242, "step": 2633500 }, { "epoch": 5.02, "learning_rate": 3.74583117397799e-05, "loss": 2.5269, "step": 2634000 }, { "epoch": 5.02, "learning_rate": 3.745593009785691e-05, "loss": 2.5367, "step": 2634500 }, { "epoch": 5.02, "learning_rate": 3.745354845593391e-05, "loss": 2.5328, "step": 2635000 }, { "epoch": 5.02, "learning_rate": 3.745116681401091e-05, "loss": 2.5253, "step": 2635500 }, { "epoch": 5.02, "learning_rate": 3.744878517208792e-05, "loss": 2.5566, "step": 2636000 }, { "epoch": 5.02, "learning_rate": 3.7446403530164924e-05, "loss": 2.527, "step": 2636500 }, { "epoch": 5.02, "learning_rate": 3.7444026651525777e-05, "loss": 2.5356, "step": 2637000 }, { "epoch": 5.03, "learning_rate": 3.7441645009602786e-05, "loss": 2.5188, "step": 2637500 }, { "epoch": 5.03, "learning_rate": 3.743926336767979e-05, "loss": 2.5171, "step": 2638000 }, { "epoch": 5.03, "learning_rate": 3.74368817257568e-05, "loss": 2.5295, "step": 2638500 }, { "epoch": 5.03, "learning_rate": 3.7434504847117644e-05, "loss": 2.5287, "step": 2639000 }, { "epoch": 5.03, "learning_rate": 3.7432123205194646e-05, "loss": 2.5322, "step": 2639500 }, { "epoch": 5.03, "learning_rate": 3.7429741563271656e-05, "loss": 2.537, "step": 2640000 }, { "epoch": 5.03, "eval_accuracy": 0.5376265596859388, "eval_loss": 2.4380645751953125, "eval_runtime": 4198.6437, "eval_samples_per_second": 65.494, "eval_steps_per_second": 6.549, "step": 2640000 }, { "epoch": 5.03, "learning_rate": 3.742735992134866e-05, "loss": 2.5307, "step": 2640500 }, { "epoch": 5.03, "learning_rate": 3.742497827942567e-05, "loss": 2.5309, "step": 2641000 }, { "epoch": 5.03, "learning_rate": 3.742259663750267e-05, "loss": 2.5303, "step": 2641500 }, { "epoch": 5.03, "learning_rate": 3.742021499557967e-05, "loss": 2.5254, "step": 2642000 }, { "epoch": 5.03, "learning_rate": 3.7417833353656675e-05, "loss": 2.5331, "step": 2642500 }, { "epoch": 5.04, "learning_rate": 3.741545647501753e-05, "loss": 2.5507, "step": 2643000 }, { "epoch": 5.04, "learning_rate": 3.741307483309454e-05, "loss": 2.5298, "step": 2643500 }, { "epoch": 5.04, "learning_rate": 3.741069795445539e-05, "loss": 2.5093, "step": 2644000 }, { "epoch": 5.04, "learning_rate": 3.740831631253239e-05, "loss": 2.5362, "step": 2644500 }, { "epoch": 5.04, "learning_rate": 3.74059346706094e-05, "loss": 2.509, "step": 2645000 }, { "epoch": 5.04, "learning_rate": 3.74035530286864e-05, "loss": 2.5515, "step": 2645500 }, { "epoch": 5.04, "learning_rate": 3.740117138676341e-05, "loss": 2.5053, "step": 2646000 }, { "epoch": 5.04, "learning_rate": 3.739878974484041e-05, "loss": 2.5289, "step": 2646500 }, { "epoch": 5.04, "learning_rate": 3.739640810291742e-05, "loss": 2.5575, "step": 2647000 }, { "epoch": 5.04, "learning_rate": 3.739402646099442e-05, "loss": 2.5478, "step": 2647500 }, { "epoch": 5.05, "learning_rate": 3.739164481907143e-05, "loss": 2.5396, "step": 2648000 }, { "epoch": 5.05, "learning_rate": 3.738926794043228e-05, "loss": 2.5313, "step": 2648500 }, { "epoch": 5.05, "learning_rate": 3.7386886298509286e-05, "loss": 2.5536, "step": 2649000 }, { "epoch": 5.05, "learning_rate": 3.738450465658629e-05, "loss": 2.5306, "step": 2649500 }, { "epoch": 5.05, "learning_rate": 3.73821230146633e-05, "loss": 2.535, "step": 2650000 }, { "epoch": 5.05, "learning_rate": 3.737974613602414e-05, "loss": 2.5265, "step": 2650500 }, { "epoch": 5.05, "learning_rate": 3.737736449410115e-05, "loss": 2.5401, "step": 2651000 }, { "epoch": 5.05, "learning_rate": 3.737498285217816e-05, "loss": 2.5196, "step": 2651500 }, { "epoch": 5.05, "learning_rate": 3.737260121025516e-05, "loss": 2.5358, "step": 2652000 }, { "epoch": 5.05, "learning_rate": 3.737022433161601e-05, "loss": 2.5372, "step": 2652500 }, { "epoch": 5.05, "learning_rate": 3.736784268969302e-05, "loss": 2.5445, "step": 2653000 }, { "epoch": 5.06, "learning_rate": 3.736546104777002e-05, "loss": 2.5333, "step": 2653500 }, { "epoch": 5.06, "learning_rate": 3.736307940584703e-05, "loss": 2.5358, "step": 2654000 }, { "epoch": 5.06, "learning_rate": 3.7360697763924034e-05, "loss": 2.5242, "step": 2654500 }, { "epoch": 5.06, "learning_rate": 3.7358320885284887e-05, "loss": 2.5518, "step": 2655000 }, { "epoch": 5.06, "learning_rate": 3.735593924336189e-05, "loss": 2.5212, "step": 2655500 }, { "epoch": 5.06, "learning_rate": 3.735355760143889e-05, "loss": 2.5493, "step": 2656000 }, { "epoch": 5.06, "learning_rate": 3.73511759595159e-05, "loss": 2.5171, "step": 2656500 }, { "epoch": 5.06, "learning_rate": 3.7348794317592904e-05, "loss": 2.5491, "step": 2657000 }, { "epoch": 5.06, "learning_rate": 3.734641267566991e-05, "loss": 2.5245, "step": 2657500 }, { "epoch": 5.06, "learning_rate": 3.7344031033746916e-05, "loss": 2.5316, "step": 2658000 }, { "epoch": 5.07, "learning_rate": 3.734164939182392e-05, "loss": 2.5282, "step": 2658500 }, { "epoch": 5.07, "learning_rate": 3.733927251318477e-05, "loss": 2.5295, "step": 2659000 }, { "epoch": 5.07, "learning_rate": 3.733689087126177e-05, "loss": 2.5217, "step": 2659500 }, { "epoch": 5.07, "learning_rate": 3.733450922933878e-05, "loss": 2.5452, "step": 2660000 }, { "epoch": 5.07, "learning_rate": 3.7332127587415785e-05, "loss": 2.5176, "step": 2660500 }, { "epoch": 5.07, "learning_rate": 3.7329745945492795e-05, "loss": 2.5393, "step": 2661000 }, { "epoch": 5.07, "learning_rate": 3.732736906685365e-05, "loss": 2.5256, "step": 2661500 }, { "epoch": 5.07, "learning_rate": 3.732498742493065e-05, "loss": 2.5153, "step": 2662000 }, { "epoch": 5.07, "learning_rate": 3.732260578300765e-05, "loss": 2.5408, "step": 2662500 }, { "epoch": 5.07, "learning_rate": 3.732022414108466e-05, "loss": 2.5305, "step": 2663000 }, { "epoch": 5.07, "learning_rate": 3.731784726244551e-05, "loss": 2.5372, "step": 2663500 }, { "epoch": 5.08, "learning_rate": 3.731546562052252e-05, "loss": 2.5275, "step": 2664000 }, { "epoch": 5.08, "learning_rate": 3.731308874188337e-05, "loss": 2.5509, "step": 2664500 }, { "epoch": 5.08, "learning_rate": 3.731070709996037e-05, "loss": 2.5465, "step": 2665000 }, { "epoch": 5.08, "learning_rate": 3.7308330221321224e-05, "loss": 2.5596, "step": 2665500 }, { "epoch": 5.08, "learning_rate": 3.7305948579398226e-05, "loss": 2.5393, "step": 2666000 }, { "epoch": 5.08, "learning_rate": 3.730356693747523e-05, "loss": 2.5284, "step": 2666500 }, { "epoch": 5.08, "learning_rate": 3.730118529555224e-05, "loss": 2.5, "step": 2667000 }, { "epoch": 5.08, "learning_rate": 3.729880365362924e-05, "loss": 2.5425, "step": 2667500 }, { "epoch": 5.08, "learning_rate": 3.729642201170625e-05, "loss": 2.5366, "step": 2668000 }, { "epoch": 5.08, "learning_rate": 3.729404036978325e-05, "loss": 2.5437, "step": 2668500 }, { "epoch": 5.09, "learning_rate": 3.7291658727860256e-05, "loss": 2.5403, "step": 2669000 }, { "epoch": 5.09, "learning_rate": 3.7289277085937265e-05, "loss": 2.523, "step": 2669500 }, { "epoch": 5.09, "learning_rate": 3.728689544401427e-05, "loss": 2.5308, "step": 2670000 }, { "epoch": 5.09, "eval_accuracy": 0.5378810522626769, "eval_loss": 2.4359946250915527, "eval_runtime": 4205.7547, "eval_samples_per_second": 65.384, "eval_steps_per_second": 6.538, "step": 2670000 }, { "epoch": 5.09, "learning_rate": 3.728451380209128e-05, "loss": 2.5079, "step": 2670500 }, { "epoch": 5.09, "learning_rate": 3.728213216016828e-05, "loss": 2.5396, "step": 2671000 }, { "epoch": 5.09, "learning_rate": 3.727975528152913e-05, "loss": 2.5348, "step": 2671500 }, { "epoch": 5.09, "learning_rate": 3.7277373639606135e-05, "loss": 2.5383, "step": 2672000 }, { "epoch": 5.09, "learning_rate": 3.727499199768314e-05, "loss": 2.5316, "step": 2672500 }, { "epoch": 5.09, "learning_rate": 3.727261035576015e-05, "loss": 2.5227, "step": 2673000 }, { "epoch": 5.09, "learning_rate": 3.7270233477121e-05, "loss": 2.5261, "step": 2673500 }, { "epoch": 5.09, "learning_rate": 3.7267851835198e-05, "loss": 2.5181, "step": 2674000 }, { "epoch": 5.1, "learning_rate": 3.726547019327501e-05, "loss": 2.5409, "step": 2674500 }, { "epoch": 5.1, "learning_rate": 3.726308855135201e-05, "loss": 2.5371, "step": 2675000 }, { "epoch": 5.1, "learning_rate": 3.7260706909429016e-05, "loss": 2.5389, "step": 2675500 }, { "epoch": 5.1, "learning_rate": 3.725833479407371e-05, "loss": 2.5334, "step": 2676000 }, { "epoch": 5.1, "learning_rate": 3.725595315215072e-05, "loss": 2.5238, "step": 2676500 }, { "epoch": 5.1, "learning_rate": 3.725357627351157e-05, "loss": 2.5394, "step": 2677000 }, { "epoch": 5.1, "learning_rate": 3.7251194631588576e-05, "loss": 2.5245, "step": 2677500 }, { "epoch": 5.1, "learning_rate": 3.7248812989665585e-05, "loss": 2.5013, "step": 2678000 }, { "epoch": 5.1, "learning_rate": 3.724643134774259e-05, "loss": 2.5229, "step": 2678500 }, { "epoch": 5.1, "learning_rate": 3.724404970581959e-05, "loss": 2.523, "step": 2679000 }, { "epoch": 5.11, "learning_rate": 3.724166806389659e-05, "loss": 2.5384, "step": 2679500 }, { "epoch": 5.11, "learning_rate": 3.72392864219736e-05, "loss": 2.5439, "step": 2680000 }, { "epoch": 5.11, "learning_rate": 3.7236904780050605e-05, "loss": 2.5298, "step": 2680500 }, { "epoch": 5.11, "learning_rate": 3.7234523138127615e-05, "loss": 2.5331, "step": 2681000 }, { "epoch": 5.11, "learning_rate": 3.723214149620462e-05, "loss": 2.537, "step": 2681500 }, { "epoch": 5.11, "learning_rate": 3.722975985428162e-05, "loss": 2.5286, "step": 2682000 }, { "epoch": 5.11, "learning_rate": 3.722737821235863e-05, "loss": 2.5478, "step": 2682500 }, { "epoch": 5.11, "learning_rate": 3.7225001333719475e-05, "loss": 2.5339, "step": 2683000 }, { "epoch": 5.11, "learning_rate": 3.7222619691796484e-05, "loss": 2.5233, "step": 2683500 }, { "epoch": 5.11, "learning_rate": 3.7220238049873494e-05, "loss": 2.5121, "step": 2684000 }, { "epoch": 5.11, "learning_rate": 3.7217856407950496e-05, "loss": 2.5391, "step": 2684500 }, { "epoch": 5.12, "learning_rate": 3.721547952931135e-05, "loss": 2.5427, "step": 2685000 }, { "epoch": 5.12, "learning_rate": 3.721309788738835e-05, "loss": 2.5092, "step": 2685500 }, { "epoch": 5.12, "learning_rate": 3.7210716245465354e-05, "loss": 2.5425, "step": 2686000 }, { "epoch": 5.12, "learning_rate": 3.720833460354236e-05, "loss": 2.5303, "step": 2686500 }, { "epoch": 5.12, "learning_rate": 3.7205957724903215e-05, "loss": 2.5473, "step": 2687000 }, { "epoch": 5.12, "learning_rate": 3.720357608298022e-05, "loss": 2.5198, "step": 2687500 }, { "epoch": 5.12, "learning_rate": 3.720119444105723e-05, "loss": 2.53, "step": 2688000 }, { "epoch": 5.12, "learning_rate": 3.719881756241807e-05, "loss": 2.5321, "step": 2688500 }, { "epoch": 5.12, "learning_rate": 3.7196435920495076e-05, "loss": 2.5212, "step": 2689000 }, { "epoch": 5.12, "learning_rate": 3.719405904185593e-05, "loss": 2.5326, "step": 2689500 }, { "epoch": 5.13, "learning_rate": 3.719167739993293e-05, "loss": 2.5263, "step": 2690000 }, { "epoch": 5.13, "learning_rate": 3.718929575800994e-05, "loss": 2.5153, "step": 2690500 }, { "epoch": 5.13, "learning_rate": 3.718691411608695e-05, "loss": 2.5223, "step": 2691000 }, { "epoch": 5.13, "learning_rate": 3.718453247416395e-05, "loss": 2.5296, "step": 2691500 }, { "epoch": 5.13, "learning_rate": 3.7182150832240955e-05, "loss": 2.5229, "step": 2692000 }, { "epoch": 5.13, "learning_rate": 3.717976919031796e-05, "loss": 2.5344, "step": 2692500 }, { "epoch": 5.13, "learning_rate": 3.7177387548394967e-05, "loss": 2.5398, "step": 2693000 }, { "epoch": 5.13, "learning_rate": 3.717500590647197e-05, "loss": 2.5474, "step": 2693500 }, { "epoch": 5.13, "learning_rate": 3.717262426454898e-05, "loss": 2.5449, "step": 2694000 }, { "epoch": 5.13, "learning_rate": 3.717024262262598e-05, "loss": 2.531, "step": 2694500 }, { "epoch": 5.13, "learning_rate": 3.7167860980702984e-05, "loss": 2.5311, "step": 2695000 }, { "epoch": 5.14, "learning_rate": 3.7165484102063836e-05, "loss": 2.5291, "step": 2695500 }, { "epoch": 5.14, "learning_rate": 3.716310246014084e-05, "loss": 2.5471, "step": 2696000 }, { "epoch": 5.14, "learning_rate": 3.716072081821785e-05, "loss": 2.5324, "step": 2696500 }, { "epoch": 5.14, "learning_rate": 3.715833917629485e-05, "loss": 2.5292, "step": 2697000 }, { "epoch": 5.14, "learning_rate": 3.715595753437186e-05, "loss": 2.5411, "step": 2697500 }, { "epoch": 5.14, "learning_rate": 3.715357589244886e-05, "loss": 2.5359, "step": 2698000 }, { "epoch": 5.14, "learning_rate": 3.7151194250525865e-05, "loss": 2.5025, "step": 2698500 }, { "epoch": 5.14, "learning_rate": 3.7148812608602875e-05, "loss": 2.5466, "step": 2699000 }, { "epoch": 5.14, "learning_rate": 3.714643572996373e-05, "loss": 2.5265, "step": 2699500 }, { "epoch": 5.14, "learning_rate": 3.714405408804073e-05, "loss": 2.5476, "step": 2700000 }, { "epoch": 5.14, "eval_accuracy": 0.53787057282201, "eval_loss": 2.4362542629241943, "eval_runtime": 4203.9732, "eval_samples_per_second": 65.411, "eval_steps_per_second": 6.541, "step": 2700000 }, { "epoch": 5.15, "learning_rate": 3.714167244611774e-05, "loss": 2.5301, "step": 2700500 }, { "epoch": 5.15, "learning_rate": 3.713929556747859e-05, "loss": 2.5348, "step": 2701000 }, { "epoch": 5.15, "learning_rate": 3.713691392555559e-05, "loss": 2.5387, "step": 2701500 }, { "epoch": 5.15, "learning_rate": 3.71345322836326e-05, "loss": 2.5409, "step": 2702000 }, { "epoch": 5.15, "learning_rate": 3.71321506417096e-05, "loss": 2.5294, "step": 2702500 }, { "epoch": 5.15, "learning_rate": 3.712976899978661e-05, "loss": 2.5246, "step": 2703000 }, { "epoch": 5.15, "learning_rate": 3.712739212114746e-05, "loss": 2.5364, "step": 2703500 }, { "epoch": 5.15, "learning_rate": 3.7125010479224464e-05, "loss": 2.5261, "step": 2704000 }, { "epoch": 5.15, "learning_rate": 3.712262883730147e-05, "loss": 2.5357, "step": 2704500 }, { "epoch": 5.15, "learning_rate": 3.712024719537847e-05, "loss": 2.5084, "step": 2705000 }, { "epoch": 5.15, "learning_rate": 3.711786555345548e-05, "loss": 2.5461, "step": 2705500 }, { "epoch": 5.16, "learning_rate": 3.711548391153248e-05, "loss": 2.5179, "step": 2706000 }, { "epoch": 5.16, "learning_rate": 3.711310703289333e-05, "loss": 2.5459, "step": 2706500 }, { "epoch": 5.16, "learning_rate": 3.711072539097034e-05, "loss": 2.5336, "step": 2707000 }, { "epoch": 5.16, "learning_rate": 3.7108343749047345e-05, "loss": 2.5458, "step": 2707500 }, { "epoch": 5.16, "learning_rate": 3.710596210712435e-05, "loss": 2.5293, "step": 2708000 }, { "epoch": 5.16, "learning_rate": 3.710358046520136e-05, "loss": 2.5293, "step": 2708500 }, { "epoch": 5.16, "learning_rate": 3.710119882327836e-05, "loss": 2.5509, "step": 2709000 }, { "epoch": 5.16, "learning_rate": 3.709881718135537e-05, "loss": 2.5288, "step": 2709500 }, { "epoch": 5.16, "learning_rate": 3.709643553943237e-05, "loss": 2.5334, "step": 2710000 }, { "epoch": 5.16, "learning_rate": 3.7094053897509375e-05, "loss": 2.539, "step": 2710500 }, { "epoch": 5.17, "learning_rate": 3.709167701887023e-05, "loss": 2.5251, "step": 2711000 }, { "epoch": 5.17, "learning_rate": 3.708930014023108e-05, "loss": 2.5415, "step": 2711500 }, { "epoch": 5.17, "learning_rate": 3.708691849830808e-05, "loss": 2.5428, "step": 2712000 }, { "epoch": 5.17, "learning_rate": 3.708453685638509e-05, "loss": 2.5495, "step": 2712500 }, { "epoch": 5.17, "learning_rate": 3.7082155214462094e-05, "loss": 2.5392, "step": 2713000 }, { "epoch": 5.17, "learning_rate": 3.70797735725391e-05, "loss": 2.5301, "step": 2713500 }, { "epoch": 5.17, "learning_rate": 3.7077391930616106e-05, "loss": 2.5335, "step": 2714000 }, { "epoch": 5.17, "learning_rate": 3.707501028869311e-05, "loss": 2.5506, "step": 2714500 }, { "epoch": 5.17, "learning_rate": 3.707262864677011e-05, "loss": 2.5224, "step": 2715000 }, { "epoch": 5.17, "learning_rate": 3.707025653141481e-05, "loss": 2.5057, "step": 2715500 }, { "epoch": 5.17, "learning_rate": 3.7067874889491816e-05, "loss": 2.5396, "step": 2716000 }, { "epoch": 5.18, "learning_rate": 3.7065493247568825e-05, "loss": 2.5262, "step": 2716500 }, { "epoch": 5.18, "learning_rate": 3.706311160564583e-05, "loss": 2.5312, "step": 2717000 }, { "epoch": 5.18, "learning_rate": 3.706072996372284e-05, "loss": 2.5198, "step": 2717500 }, { "epoch": 5.18, "learning_rate": 3.705835784836753e-05, "loss": 2.5088, "step": 2718000 }, { "epoch": 5.18, "learning_rate": 3.7055976206444535e-05, "loss": 2.5288, "step": 2718500 }, { "epoch": 5.18, "learning_rate": 3.705359456452154e-05, "loss": 2.5291, "step": 2719000 }, { "epoch": 5.18, "learning_rate": 3.705121292259855e-05, "loss": 2.5493, "step": 2719500 }, { "epoch": 5.18, "learning_rate": 3.704883128067555e-05, "loss": 2.5175, "step": 2720000 }, { "epoch": 5.18, "learning_rate": 3.704644963875256e-05, "loss": 2.5339, "step": 2720500 }, { "epoch": 5.18, "learning_rate": 3.704406799682956e-05, "loss": 2.5246, "step": 2721000 }, { "epoch": 5.19, "learning_rate": 3.7041686354906564e-05, "loss": 2.5324, "step": 2721500 }, { "epoch": 5.19, "learning_rate": 3.703930471298357e-05, "loss": 2.55, "step": 2722000 }, { "epoch": 5.19, "learning_rate": 3.7036923071060576e-05, "loss": 2.5432, "step": 2722500 }, { "epoch": 5.19, "learning_rate": 3.703454142913758e-05, "loss": 2.5236, "step": 2723000 }, { "epoch": 5.19, "learning_rate": 3.703216455049843e-05, "loss": 2.5206, "step": 2723500 }, { "epoch": 5.19, "learning_rate": 3.702978290857544e-05, "loss": 2.5355, "step": 2724000 }, { "epoch": 5.19, "learning_rate": 3.7027401266652436e-05, "loss": 2.5168, "step": 2724500 }, { "epoch": 5.19, "learning_rate": 3.7025019624729446e-05, "loss": 2.5356, "step": 2725000 }, { "epoch": 5.19, "learning_rate": 3.7022637982806455e-05, "loss": 2.5282, "step": 2725500 }, { "epoch": 5.19, "learning_rate": 3.702025634088346e-05, "loss": 2.522, "step": 2726000 }, { "epoch": 5.19, "learning_rate": 3.701787946224431e-05, "loss": 2.5221, "step": 2726500 }, { "epoch": 5.2, "learning_rate": 3.701549782032131e-05, "loss": 2.525, "step": 2727000 }, { "epoch": 5.2, "learning_rate": 3.701311617839832e-05, "loss": 2.5086, "step": 2727500 }, { "epoch": 5.2, "learning_rate": 3.7010734536475325e-05, "loss": 2.5054, "step": 2728000 }, { "epoch": 5.2, "learning_rate": 3.700835289455233e-05, "loss": 2.5435, "step": 2728500 }, { "epoch": 5.2, "learning_rate": 3.700597125262934e-05, "loss": 2.5294, "step": 2729000 }, { "epoch": 5.2, "learning_rate": 3.700358961070634e-05, "loss": 2.539, "step": 2729500 }, { "epoch": 5.2, "learning_rate": 3.700120796878335e-05, "loss": 2.5418, "step": 2730000 }, { "epoch": 5.2, "eval_accuracy": 0.5380547486362544, "eval_loss": 2.4348089694976807, "eval_runtime": 4210.3483, "eval_samples_per_second": 65.312, "eval_steps_per_second": 6.531, "step": 2730000 }, { "epoch": 5.2, "learning_rate": 3.69988310901442e-05, "loss": 2.5328, "step": 2730500 }, { "epoch": 5.2, "learning_rate": 3.69964494482212e-05, "loss": 2.5174, "step": 2731000 }, { "epoch": 5.2, "learning_rate": 3.6994067806298206e-05, "loss": 2.5094, "step": 2731500 }, { "epoch": 5.21, "learning_rate": 3.699168616437521e-05, "loss": 2.53, "step": 2732000 }, { "epoch": 5.21, "learning_rate": 3.698930452245222e-05, "loss": 2.5107, "step": 2732500 }, { "epoch": 5.21, "learning_rate": 3.698692288052922e-05, "loss": 2.5201, "step": 2733000 }, { "epoch": 5.21, "learning_rate": 3.698454600189007e-05, "loss": 2.537, "step": 2733500 }, { "epoch": 5.21, "learning_rate": 3.6982164359967076e-05, "loss": 2.5225, "step": 2734000 }, { "epoch": 5.21, "learning_rate": 3.697978271804408e-05, "loss": 2.5334, "step": 2734500 }, { "epoch": 5.21, "learning_rate": 3.697740583940493e-05, "loss": 2.526, "step": 2735000 }, { "epoch": 5.21, "learning_rate": 3.697502419748194e-05, "loss": 2.545, "step": 2735500 }, { "epoch": 5.21, "learning_rate": 3.697264255555894e-05, "loss": 2.5117, "step": 2736000 }, { "epoch": 5.21, "learning_rate": 3.697026091363595e-05, "loss": 2.5249, "step": 2736500 }, { "epoch": 5.21, "learning_rate": 3.6967879271712955e-05, "loss": 2.5316, "step": 2737000 }, { "epoch": 5.22, "learning_rate": 3.696549762978996e-05, "loss": 2.5501, "step": 2737500 }, { "epoch": 5.22, "learning_rate": 3.696311598786697e-05, "loss": 2.5273, "step": 2738000 }, { "epoch": 5.22, "learning_rate": 3.696073434594397e-05, "loss": 2.538, "step": 2738500 }, { "epoch": 5.22, "learning_rate": 3.695835270402098e-05, "loss": 2.5246, "step": 2739000 }, { "epoch": 5.22, "learning_rate": 3.6955980588665674e-05, "loss": 2.5173, "step": 2739500 }, { "epoch": 5.22, "learning_rate": 3.695359894674268e-05, "loss": 2.5202, "step": 2740000 }, { "epoch": 5.22, "learning_rate": 3.6951217304819686e-05, "loss": 2.5234, "step": 2740500 }, { "epoch": 5.22, "learning_rate": 3.694883566289669e-05, "loss": 2.5479, "step": 2741000 }, { "epoch": 5.22, "learning_rate": 3.694645402097369e-05, "loss": 2.5358, "step": 2741500 }, { "epoch": 5.22, "learning_rate": 3.69440723790507e-05, "loss": 2.5308, "step": 2742000 }, { "epoch": 5.23, "learning_rate": 3.6941690737127703e-05, "loss": 2.5415, "step": 2742500 }, { "epoch": 5.23, "learning_rate": 3.693930909520471e-05, "loss": 2.5275, "step": 2743000 }, { "epoch": 5.23, "learning_rate": 3.693692745328171e-05, "loss": 2.5184, "step": 2743500 }, { "epoch": 5.23, "learning_rate": 3.693454581135872e-05, "loss": 2.5242, "step": 2744000 }, { "epoch": 5.23, "learning_rate": 3.693216416943572e-05, "loss": 2.5458, "step": 2744500 }, { "epoch": 5.23, "learning_rate": 3.692978252751273e-05, "loss": 2.5631, "step": 2745000 }, { "epoch": 5.23, "learning_rate": 3.692740564887358e-05, "loss": 2.5269, "step": 2745500 }, { "epoch": 5.23, "learning_rate": 3.6925024006950585e-05, "loss": 2.5133, "step": 2746000 }, { "epoch": 5.23, "learning_rate": 3.6922642365027594e-05, "loss": 2.5392, "step": 2746500 }, { "epoch": 5.23, "learning_rate": 3.69202607231046e-05, "loss": 2.5325, "step": 2747000 }, { "epoch": 5.23, "learning_rate": 3.691788384446544e-05, "loss": 2.5257, "step": 2747500 }, { "epoch": 5.24, "learning_rate": 3.691550220254245e-05, "loss": 2.5082, "step": 2748000 }, { "epoch": 5.24, "learning_rate": 3.6913120560619455e-05, "loss": 2.5293, "step": 2748500 }, { "epoch": 5.24, "learning_rate": 3.691074368198031e-05, "loss": 2.5228, "step": 2749000 }, { "epoch": 5.24, "learning_rate": 3.6908362040057316e-05, "loss": 2.5186, "step": 2749500 }, { "epoch": 5.24, "learning_rate": 3.690598039813432e-05, "loss": 2.5186, "step": 2750000 }, { "epoch": 5.24, "learning_rate": 3.690359875621132e-05, "loss": 2.5115, "step": 2750500 }, { "epoch": 5.24, "learning_rate": 3.690121711428833e-05, "loss": 2.5286, "step": 2751000 }, { "epoch": 5.24, "learning_rate": 3.6898840235649176e-05, "loss": 2.539, "step": 2751500 }, { "epoch": 5.24, "learning_rate": 3.6896458593726186e-05, "loss": 2.5276, "step": 2752000 }, { "epoch": 5.24, "learning_rate": 3.689407695180319e-05, "loss": 2.5227, "step": 2752500 }, { "epoch": 5.25, "learning_rate": 3.689170007316404e-05, "loss": 2.5368, "step": 2753000 }, { "epoch": 5.25, "learning_rate": 3.688931843124105e-05, "loss": 2.5418, "step": 2753500 }, { "epoch": 5.25, "learning_rate": 3.688693678931805e-05, "loss": 2.5314, "step": 2754000 }, { "epoch": 5.25, "learning_rate": 3.6884555147395055e-05, "loss": 2.5312, "step": 2754500 }, { "epoch": 5.25, "learning_rate": 3.6882173505472065e-05, "loss": 2.5335, "step": 2755000 }, { "epoch": 5.25, "learning_rate": 3.687979186354907e-05, "loss": 2.5287, "step": 2755500 }, { "epoch": 5.25, "learning_rate": 3.687741022162608e-05, "loss": 2.5256, "step": 2756000 }, { "epoch": 5.25, "learning_rate": 3.687502857970308e-05, "loss": 2.5322, "step": 2756500 }, { "epoch": 5.25, "learning_rate": 3.687264693778008e-05, "loss": 2.5325, "step": 2757000 }, { "epoch": 5.25, "learning_rate": 3.6870270059140934e-05, "loss": 2.5085, "step": 2757500 }, { "epoch": 5.25, "learning_rate": 3.686788841721794e-05, "loss": 2.5346, "step": 2758000 }, { "epoch": 5.26, "learning_rate": 3.6865506775294946e-05, "loss": 2.5369, "step": 2758500 }, { "epoch": 5.26, "learning_rate": 3.686312513337195e-05, "loss": 2.5224, "step": 2759000 }, { "epoch": 5.26, "learning_rate": 3.686074349144896e-05, "loss": 2.5424, "step": 2759500 }, { "epoch": 5.26, "learning_rate": 3.6858361849525954e-05, "loss": 2.5091, "step": 2760000 }, { "epoch": 5.26, "eval_accuracy": 0.5382872976623695, "eval_loss": 2.4352471828460693, "eval_runtime": 4205.7601, "eval_samples_per_second": 65.384, "eval_steps_per_second": 6.538, "step": 2760000 }, { "epoch": 5.26, "learning_rate": 3.6855980207602964e-05, "loss": 2.548, "step": 2760500 }, { "epoch": 5.26, "learning_rate": 3.685359856567997e-05, "loss": 2.5129, "step": 2761000 }, { "epoch": 5.26, "learning_rate": 3.685122645032467e-05, "loss": 2.5123, "step": 2761500 }, { "epoch": 5.26, "learning_rate": 3.684884480840167e-05, "loss": 2.5324, "step": 2762000 }, { "epoch": 5.26, "learning_rate": 3.684646316647868e-05, "loss": 2.5128, "step": 2762500 }, { "epoch": 5.26, "learning_rate": 3.684408152455568e-05, "loss": 2.5254, "step": 2763000 }, { "epoch": 5.27, "learning_rate": 3.6841699882632686e-05, "loss": 2.5545, "step": 2763500 }, { "epoch": 5.27, "learning_rate": 3.683931824070969e-05, "loss": 2.5518, "step": 2764000 }, { "epoch": 5.27, "learning_rate": 3.68369365987867e-05, "loss": 2.5476, "step": 2764500 }, { "epoch": 5.27, "learning_rate": 3.683455495686371e-05, "loss": 2.5166, "step": 2765000 }, { "epoch": 5.27, "learning_rate": 3.683217331494071e-05, "loss": 2.5382, "step": 2765500 }, { "epoch": 5.27, "learning_rate": 3.682979167301772e-05, "loss": 2.5226, "step": 2766000 }, { "epoch": 5.27, "learning_rate": 3.6827410031094715e-05, "loss": 2.5305, "step": 2766500 }, { "epoch": 5.27, "learning_rate": 3.682503315245557e-05, "loss": 2.5439, "step": 2767000 }, { "epoch": 5.27, "learning_rate": 3.6822651510532577e-05, "loss": 2.5332, "step": 2767500 }, { "epoch": 5.27, "learning_rate": 3.682026986860958e-05, "loss": 2.521, "step": 2768000 }, { "epoch": 5.27, "learning_rate": 3.681788822668659e-05, "loss": 2.5334, "step": 2768500 }, { "epoch": 5.28, "learning_rate": 3.681550658476359e-05, "loss": 2.5311, "step": 2769000 }, { "epoch": 5.28, "learning_rate": 3.6813124942840594e-05, "loss": 2.5276, "step": 2769500 }, { "epoch": 5.28, "learning_rate": 3.6810743300917596e-05, "loss": 2.528, "step": 2770000 }, { "epoch": 5.28, "learning_rate": 3.680836642227845e-05, "loss": 2.5201, "step": 2770500 }, { "epoch": 5.28, "learning_rate": 3.680598478035546e-05, "loss": 2.5353, "step": 2771000 }, { "epoch": 5.28, "learning_rate": 3.680360313843246e-05, "loss": 2.5148, "step": 2771500 }, { "epoch": 5.28, "learning_rate": 3.680122149650947e-05, "loss": 2.5408, "step": 2772000 }, { "epoch": 5.28, "learning_rate": 3.679884461787032e-05, "loss": 2.5521, "step": 2772500 }, { "epoch": 5.28, "learning_rate": 3.679646297594732e-05, "loss": 2.535, "step": 2773000 }, { "epoch": 5.28, "learning_rate": 3.679408133402433e-05, "loss": 2.5305, "step": 2773500 }, { "epoch": 5.29, "learning_rate": 3.679169969210133e-05, "loss": 2.5117, "step": 2774000 }, { "epoch": 5.29, "learning_rate": 3.678931805017834e-05, "loss": 2.5505, "step": 2774500 }, { "epoch": 5.29, "learning_rate": 3.678693640825535e-05, "loss": 2.5245, "step": 2775000 }, { "epoch": 5.29, "learning_rate": 3.678455476633235e-05, "loss": 2.543, "step": 2775500 }, { "epoch": 5.29, "learning_rate": 3.6782173124409354e-05, "loss": 2.5382, "step": 2776000 }, { "epoch": 5.29, "learning_rate": 3.677979624577021e-05, "loss": 2.5458, "step": 2776500 }, { "epoch": 5.29, "learning_rate": 3.677741460384721e-05, "loss": 2.5195, "step": 2777000 }, { "epoch": 5.29, "learning_rate": 3.677503296192422e-05, "loss": 2.5353, "step": 2777500 }, { "epoch": 5.29, "learning_rate": 3.677265132000122e-05, "loss": 2.5428, "step": 2778000 }, { "epoch": 5.29, "learning_rate": 3.6770274441362074e-05, "loss": 2.5184, "step": 2778500 }, { "epoch": 5.29, "learning_rate": 3.676789279943908e-05, "loss": 2.535, "step": 2779000 }, { "epoch": 5.3, "learning_rate": 3.676551115751608e-05, "loss": 2.5348, "step": 2779500 }, { "epoch": 5.3, "learning_rate": 3.676312951559309e-05, "loss": 2.5139, "step": 2780000 }, { "epoch": 5.3, "learning_rate": 3.676074787367009e-05, "loss": 2.5386, "step": 2780500 }, { "epoch": 5.3, "learning_rate": 3.675837099503094e-05, "loss": 2.5341, "step": 2781000 }, { "epoch": 5.3, "learning_rate": 3.675598935310795e-05, "loss": 2.5336, "step": 2781500 }, { "epoch": 5.3, "learning_rate": 3.6753607711184955e-05, "loss": 2.5239, "step": 2782000 }, { "epoch": 5.3, "learning_rate": 3.675122606926196e-05, "loss": 2.5272, "step": 2782500 }, { "epoch": 5.3, "learning_rate": 3.674884919062281e-05, "loss": 2.5392, "step": 2783000 }, { "epoch": 5.3, "learning_rate": 3.674646754869981e-05, "loss": 2.5189, "step": 2783500 }, { "epoch": 5.3, "learning_rate": 3.674408590677682e-05, "loss": 2.5259, "step": 2784000 }, { "epoch": 5.31, "learning_rate": 3.6741709028137674e-05, "loss": 2.532, "step": 2784500 }, { "epoch": 5.31, "learning_rate": 3.673932738621468e-05, "loss": 2.5247, "step": 2785000 }, { "epoch": 5.31, "learning_rate": 3.6736945744291686e-05, "loss": 2.5382, "step": 2785500 }, { "epoch": 5.31, "learning_rate": 3.673456410236868e-05, "loss": 2.53, "step": 2786000 }, { "epoch": 5.31, "learning_rate": 3.673218246044569e-05, "loss": 2.534, "step": 2786500 }, { "epoch": 5.31, "learning_rate": 3.6729800818522694e-05, "loss": 2.5374, "step": 2787000 }, { "epoch": 5.31, "learning_rate": 3.6727419176599704e-05, "loss": 2.5223, "step": 2787500 }, { "epoch": 5.31, "learning_rate": 3.6725037534676706e-05, "loss": 2.5373, "step": 2788000 }, { "epoch": 5.31, "learning_rate": 3.672266541932141e-05, "loss": 2.5403, "step": 2788500 }, { "epoch": 5.31, "learning_rate": 3.672028377739841e-05, "loss": 2.5333, "step": 2789000 }, { "epoch": 5.31, "learning_rate": 3.671790213547542e-05, "loss": 2.5234, "step": 2789500 }, { "epoch": 5.32, "learning_rate": 3.6715520493552416e-05, "loss": 2.523, "step": 2790000 }, { "epoch": 5.32, "eval_accuracy": 0.5382365676427775, "eval_loss": 2.4331858158111572, "eval_runtime": 4202.7775, "eval_samples_per_second": 65.43, "eval_steps_per_second": 6.543, "step": 2790000 }, { "epoch": 5.32, "learning_rate": 3.6713138851629426e-05, "loss": 2.5297, "step": 2790500 }, { "epoch": 5.32, "learning_rate": 3.671075720970643e-05, "loss": 2.5178, "step": 2791000 }, { "epoch": 5.32, "learning_rate": 3.670837556778344e-05, "loss": 2.5321, "step": 2791500 }, { "epoch": 5.32, "learning_rate": 3.670599392586044e-05, "loss": 2.5261, "step": 2792000 }, { "epoch": 5.32, "learning_rate": 3.670361704722129e-05, "loss": 2.5463, "step": 2792500 }, { "epoch": 5.32, "learning_rate": 3.6701235405298295e-05, "loss": 2.5299, "step": 2793000 }, { "epoch": 5.32, "learning_rate": 3.6698853763375305e-05, "loss": 2.5333, "step": 2793500 }, { "epoch": 5.32, "learning_rate": 3.669647212145231e-05, "loss": 2.5315, "step": 2794000 }, { "epoch": 5.32, "learning_rate": 3.6694090479529317e-05, "loss": 2.5246, "step": 2794500 }, { "epoch": 5.33, "learning_rate": 3.669171360089016e-05, "loss": 2.5491, "step": 2795000 }, { "epoch": 5.33, "learning_rate": 3.668933195896717e-05, "loss": 2.5303, "step": 2795500 }, { "epoch": 5.33, "learning_rate": 3.6686950317044174e-05, "loss": 2.5222, "step": 2796000 }, { "epoch": 5.33, "learning_rate": 3.668456867512118e-05, "loss": 2.5388, "step": 2796500 }, { "epoch": 5.33, "learning_rate": 3.668219179648203e-05, "loss": 2.524, "step": 2797000 }, { "epoch": 5.33, "learning_rate": 3.667981015455904e-05, "loss": 2.5357, "step": 2797500 }, { "epoch": 5.33, "learning_rate": 3.667742851263604e-05, "loss": 2.5336, "step": 2798000 }, { "epoch": 5.33, "learning_rate": 3.667504687071305e-05, "loss": 2.5218, "step": 2798500 }, { "epoch": 5.33, "learning_rate": 3.667266522879005e-05, "loss": 2.5525, "step": 2799000 }, { "epoch": 5.33, "learning_rate": 3.6670283586867056e-05, "loss": 2.5256, "step": 2799500 }, { "epoch": 5.33, "learning_rate": 3.666790194494406e-05, "loss": 2.5147, "step": 2800000 }, { "epoch": 5.34, "learning_rate": 3.666552030302107e-05, "loss": 2.5228, "step": 2800500 }, { "epoch": 5.34, "learning_rate": 3.666314342438192e-05, "loss": 2.548, "step": 2801000 }, { "epoch": 5.34, "learning_rate": 3.666076178245892e-05, "loss": 2.537, "step": 2801500 }, { "epoch": 5.34, "learning_rate": 3.665838014053593e-05, "loss": 2.5418, "step": 2802000 }, { "epoch": 5.34, "learning_rate": 3.6656003261896784e-05, "loss": 2.5201, "step": 2802500 }, { "epoch": 5.34, "learning_rate": 3.665362161997378e-05, "loss": 2.5228, "step": 2803000 }, { "epoch": 5.34, "learning_rate": 3.665123997805079e-05, "loss": 2.5185, "step": 2803500 }, { "epoch": 5.34, "learning_rate": 3.664885833612779e-05, "loss": 2.5295, "step": 2804000 }, { "epoch": 5.34, "learning_rate": 3.6646481457488645e-05, "loss": 2.5211, "step": 2804500 }, { "epoch": 5.34, "learning_rate": 3.6644099815565654e-05, "loss": 2.519, "step": 2805000 }, { "epoch": 5.35, "learning_rate": 3.6641718173642657e-05, "loss": 2.5176, "step": 2805500 }, { "epoch": 5.35, "learning_rate": 3.663933653171966e-05, "loss": 2.5296, "step": 2806000 }, { "epoch": 5.35, "learning_rate": 3.663695965308051e-05, "loss": 2.5437, "step": 2806500 }, { "epoch": 5.35, "learning_rate": 3.6634578011157514e-05, "loss": 2.5058, "step": 2807000 }, { "epoch": 5.35, "learning_rate": 3.6632196369234523e-05, "loss": 2.5133, "step": 2807500 }, { "epoch": 5.35, "learning_rate": 3.6629814727311526e-05, "loss": 2.5111, "step": 2808000 }, { "epoch": 5.35, "learning_rate": 3.6627433085388536e-05, "loss": 2.5289, "step": 2808500 }, { "epoch": 5.35, "learning_rate": 3.662505144346554e-05, "loss": 2.5619, "step": 2809000 }, { "epoch": 5.35, "learning_rate": 3.662266980154254e-05, "loss": 2.5257, "step": 2809500 }, { "epoch": 5.35, "learning_rate": 3.662029292290339e-05, "loss": 2.536, "step": 2810000 }, { "epoch": 5.35, "learning_rate": 3.66179112809804e-05, "loss": 2.5376, "step": 2810500 }, { "epoch": 5.36, "learning_rate": 3.6615529639057405e-05, "loss": 2.5549, "step": 2811000 }, { "epoch": 5.36, "learning_rate": 3.6613147997134415e-05, "loss": 2.5206, "step": 2811500 }, { "epoch": 5.36, "learning_rate": 3.661076635521142e-05, "loss": 2.537, "step": 2812000 }, { "epoch": 5.36, "learning_rate": 3.660838471328842e-05, "loss": 2.5315, "step": 2812500 }, { "epoch": 5.36, "learning_rate": 3.660600307136542e-05, "loss": 2.5369, "step": 2813000 }, { "epoch": 5.36, "learning_rate": 3.660362142944243e-05, "loss": 2.5288, "step": 2813500 }, { "epoch": 5.36, "learning_rate": 3.6601244550803284e-05, "loss": 2.5275, "step": 2814000 }, { "epoch": 5.36, "learning_rate": 3.659886290888029e-05, "loss": 2.5231, "step": 2814500 }, { "epoch": 5.36, "learning_rate": 3.6596481266957296e-05, "loss": 2.5206, "step": 2815000 }, { "epoch": 5.36, "learning_rate": 3.659409962503429e-05, "loss": 2.5289, "step": 2815500 }, { "epoch": 5.37, "learning_rate": 3.65917179831113e-05, "loss": 2.5448, "step": 2816000 }, { "epoch": 5.37, "learning_rate": 3.6589336341188304e-05, "loss": 2.5227, "step": 2816500 }, { "epoch": 5.37, "learning_rate": 3.658695469926531e-05, "loss": 2.5502, "step": 2817000 }, { "epoch": 5.37, "learning_rate": 3.658457305734232e-05, "loss": 2.5312, "step": 2817500 }, { "epoch": 5.37, "learning_rate": 3.658220094198702e-05, "loss": 2.5511, "step": 2818000 }, { "epoch": 5.37, "learning_rate": 3.657981930006402e-05, "loss": 2.5029, "step": 2818500 }, { "epoch": 5.37, "learning_rate": 3.657743765814102e-05, "loss": 2.5365, "step": 2819000 }, { "epoch": 5.37, "learning_rate": 3.6575056016218026e-05, "loss": 2.5286, "step": 2819500 }, { "epoch": 5.37, "learning_rate": 3.6572674374295035e-05, "loss": 2.5284, "step": 2820000 }, { "epoch": 5.37, "eval_accuracy": 0.5385160715304154, "eval_loss": 2.4326460361480713, "eval_runtime": 4209.9057, "eval_samples_per_second": 65.319, "eval_steps_per_second": 6.532, "step": 2820000 }, { "epoch": 5.37, "learning_rate": 3.657029273237204e-05, "loss": 2.52, "step": 2820500 }, { "epoch": 5.37, "learning_rate": 3.656791109044905e-05, "loss": 2.5424, "step": 2821000 }, { "epoch": 5.38, "learning_rate": 3.656552944852606e-05, "loss": 2.5171, "step": 2821500 }, { "epoch": 5.38, "learning_rate": 3.656314780660305e-05, "loss": 2.5369, "step": 2822000 }, { "epoch": 5.38, "learning_rate": 3.6560770927963905e-05, "loss": 2.521, "step": 2822500 }, { "epoch": 5.38, "learning_rate": 3.6558389286040914e-05, "loss": 2.5181, "step": 2823000 }, { "epoch": 5.38, "learning_rate": 3.655600764411792e-05, "loss": 2.5425, "step": 2823500 }, { "epoch": 5.38, "learning_rate": 3.6553626002194926e-05, "loss": 2.5375, "step": 2824000 }, { "epoch": 5.38, "learning_rate": 3.655124912355578e-05, "loss": 2.5401, "step": 2824500 }, { "epoch": 5.38, "learning_rate": 3.654886748163278e-05, "loss": 2.521, "step": 2825000 }, { "epoch": 5.38, "learning_rate": 3.6546485839709784e-05, "loss": 2.5221, "step": 2825500 }, { "epoch": 5.38, "learning_rate": 3.6544104197786786e-05, "loss": 2.5444, "step": 2826000 }, { "epoch": 5.39, "learning_rate": 3.654172731914764e-05, "loss": 2.5291, "step": 2826500 }, { "epoch": 5.39, "learning_rate": 3.653934567722465e-05, "loss": 2.5338, "step": 2827000 }, { "epoch": 5.39, "learning_rate": 3.653696403530165e-05, "loss": 2.529, "step": 2827500 }, { "epoch": 5.39, "learning_rate": 3.653458239337866e-05, "loss": 2.503, "step": 2828000 }, { "epoch": 5.39, "learning_rate": 3.653220551473951e-05, "loss": 2.5292, "step": 2828500 }, { "epoch": 5.39, "learning_rate": 3.652982387281651e-05, "loss": 2.5001, "step": 2829000 }, { "epoch": 5.39, "learning_rate": 3.652744223089352e-05, "loss": 2.5486, "step": 2829500 }, { "epoch": 5.39, "learning_rate": 3.652506058897052e-05, "loss": 2.5206, "step": 2830000 }, { "epoch": 5.39, "learning_rate": 3.652267894704753e-05, "loss": 2.5279, "step": 2830500 }, { "epoch": 5.39, "learning_rate": 3.652029730512453e-05, "loss": 2.5408, "step": 2831000 }, { "epoch": 5.39, "learning_rate": 3.651791566320154e-05, "loss": 2.533, "step": 2831500 }, { "epoch": 5.4, "learning_rate": 3.6515534021278544e-05, "loss": 2.5343, "step": 2832000 }, { "epoch": 5.4, "learning_rate": 3.651315714263939e-05, "loss": 2.5122, "step": 2832500 }, { "epoch": 5.4, "learning_rate": 3.651078026400024e-05, "loss": 2.5293, "step": 2833000 }, { "epoch": 5.4, "learning_rate": 3.650839862207725e-05, "loss": 2.5179, "step": 2833500 }, { "epoch": 5.4, "learning_rate": 3.6506016980154254e-05, "loss": 2.5411, "step": 2834000 }, { "epoch": 5.4, "learning_rate": 3.6503635338231264e-05, "loss": 2.55, "step": 2834500 }, { "epoch": 5.4, "learning_rate": 3.6501253696308266e-05, "loss": 2.5424, "step": 2835000 }, { "epoch": 5.4, "learning_rate": 3.649887205438527e-05, "loss": 2.5207, "step": 2835500 }, { "epoch": 5.4, "learning_rate": 3.649649517574612e-05, "loss": 2.529, "step": 2836000 }, { "epoch": 5.4, "learning_rate": 3.6494113533823124e-05, "loss": 2.5468, "step": 2836500 }, { "epoch": 5.41, "learning_rate": 3.649173189190013e-05, "loss": 2.5414, "step": 2837000 }, { "epoch": 5.41, "learning_rate": 3.6489350249977136e-05, "loss": 2.53, "step": 2837500 }, { "epoch": 5.41, "learning_rate": 3.6486968608054145e-05, "loss": 2.5392, "step": 2838000 }, { "epoch": 5.41, "learning_rate": 3.648458696613115e-05, "loss": 2.5318, "step": 2838500 }, { "epoch": 5.41, "learning_rate": 3.648220532420815e-05, "loss": 2.5337, "step": 2839000 }, { "epoch": 5.41, "learning_rate": 3.647982368228516e-05, "loss": 2.5332, "step": 2839500 }, { "epoch": 5.41, "learning_rate": 3.647744204036216e-05, "loss": 2.5333, "step": 2840000 }, { "epoch": 5.41, "learning_rate": 3.6475065161723015e-05, "loss": 2.5467, "step": 2840500 }, { "epoch": 5.41, "learning_rate": 3.647268828308387e-05, "loss": 2.5383, "step": 2841000 }, { "epoch": 5.41, "learning_rate": 3.647030664116087e-05, "loss": 2.5327, "step": 2841500 }, { "epoch": 5.41, "learning_rate": 3.646792499923788e-05, "loss": 2.5365, "step": 2842000 }, { "epoch": 5.42, "learning_rate": 3.646554335731488e-05, "loss": 2.5113, "step": 2842500 }, { "epoch": 5.42, "learning_rate": 3.6463161715391884e-05, "loss": 2.5292, "step": 2843000 }, { "epoch": 5.42, "learning_rate": 3.6460784836752737e-05, "loss": 2.5244, "step": 2843500 }, { "epoch": 5.42, "learning_rate": 3.6458403194829746e-05, "loss": 2.5243, "step": 2844000 }, { "epoch": 5.42, "learning_rate": 3.645602155290675e-05, "loss": 2.5247, "step": 2844500 }, { "epoch": 5.42, "learning_rate": 3.645363991098376e-05, "loss": 2.5327, "step": 2845000 }, { "epoch": 5.42, "learning_rate": 3.6451258269060754e-05, "loss": 2.5314, "step": 2845500 }, { "epoch": 5.42, "learning_rate": 3.6448881390421606e-05, "loss": 2.54, "step": 2846000 }, { "epoch": 5.42, "learning_rate": 3.6446499748498616e-05, "loss": 2.5302, "step": 2846500 }, { "epoch": 5.42, "learning_rate": 3.644411810657562e-05, "loss": 2.5178, "step": 2847000 }, { "epoch": 5.43, "learning_rate": 3.644173646465263e-05, "loss": 2.5368, "step": 2847500 }, { "epoch": 5.43, "learning_rate": 3.643935482272963e-05, "loss": 2.5259, "step": 2848000 }, { "epoch": 5.43, "learning_rate": 3.643697318080663e-05, "loss": 2.5195, "step": 2848500 }, { "epoch": 5.43, "learning_rate": 3.643459153888364e-05, "loss": 2.5087, "step": 2849000 }, { "epoch": 5.43, "learning_rate": 3.6432209896960645e-05, "loss": 2.5317, "step": 2849500 }, { "epoch": 5.43, "learning_rate": 3.64298330183215e-05, "loss": 2.5455, "step": 2850000 }, { "epoch": 5.43, "eval_accuracy": 0.5386967174652221, "eval_loss": 2.4316465854644775, "eval_runtime": 4203.2164, "eval_samples_per_second": 65.423, "eval_steps_per_second": 6.542, "step": 2850000 }, { "epoch": 5.43, "learning_rate": 3.64274513763985e-05, "loss": 2.5217, "step": 2850500 }, { "epoch": 5.43, "learning_rate": 3.642506973447551e-05, "loss": 2.5201, "step": 2851000 }, { "epoch": 5.43, "learning_rate": 3.642269285583636e-05, "loss": 2.5403, "step": 2851500 }, { "epoch": 5.43, "learning_rate": 3.642031121391336e-05, "loss": 2.5393, "step": 2852000 }, { "epoch": 5.43, "learning_rate": 3.641792957199037e-05, "loss": 2.5131, "step": 2852500 }, { "epoch": 5.44, "learning_rate": 3.6415547930067376e-05, "loss": 2.5252, "step": 2853000 }, { "epoch": 5.44, "learning_rate": 3.641316628814438e-05, "loss": 2.5395, "step": 2853500 }, { "epoch": 5.44, "learning_rate": 3.641078464622139e-05, "loss": 2.5409, "step": 2854000 }, { "epoch": 5.44, "learning_rate": 3.640840300429839e-05, "loss": 2.5393, "step": 2854500 }, { "epoch": 5.44, "learning_rate": 3.6406021362375393e-05, "loss": 2.5182, "step": 2855000 }, { "epoch": 5.44, "learning_rate": 3.6403644483736246e-05, "loss": 2.5164, "step": 2855500 }, { "epoch": 5.44, "learning_rate": 3.640126284181325e-05, "loss": 2.5274, "step": 2856000 }, { "epoch": 5.44, "learning_rate": 3.639888119989026e-05, "loss": 2.5318, "step": 2856500 }, { "epoch": 5.44, "learning_rate": 3.639649955796726e-05, "loss": 2.5264, "step": 2857000 }, { "epoch": 5.44, "learning_rate": 3.639412267932811e-05, "loss": 2.5169, "step": 2857500 }, { "epoch": 5.45, "learning_rate": 3.639174103740512e-05, "loss": 2.5518, "step": 2858000 }, { "epoch": 5.45, "learning_rate": 3.638936415876597e-05, "loss": 2.5185, "step": 2858500 }, { "epoch": 5.45, "learning_rate": 3.638698251684297e-05, "loss": 2.5207, "step": 2859000 }, { "epoch": 5.45, "learning_rate": 3.638460563820382e-05, "loss": 2.5046, "step": 2859500 }, { "epoch": 5.45, "learning_rate": 3.638222399628083e-05, "loss": 2.5388, "step": 2860000 }, { "epoch": 5.45, "learning_rate": 3.6379842354357834e-05, "loss": 2.5348, "step": 2860500 }, { "epoch": 5.45, "learning_rate": 3.6377460712434844e-05, "loss": 2.538, "step": 2861000 }, { "epoch": 5.45, "learning_rate": 3.6375079070511847e-05, "loss": 2.5377, "step": 2861500 }, { "epoch": 5.45, "learning_rate": 3.637269742858885e-05, "loss": 2.5212, "step": 2862000 }, { "epoch": 5.45, "learning_rate": 3.637031578666585e-05, "loss": 2.5262, "step": 2862500 }, { "epoch": 5.45, "learning_rate": 3.636793414474286e-05, "loss": 2.5352, "step": 2863000 }, { "epoch": 5.46, "learning_rate": 3.6365552502819864e-05, "loss": 2.5418, "step": 2863500 }, { "epoch": 5.46, "learning_rate": 3.636317086089687e-05, "loss": 2.5324, "step": 2864000 }, { "epoch": 5.46, "learning_rate": 3.6360793982257725e-05, "loss": 2.5363, "step": 2864500 }, { "epoch": 5.46, "learning_rate": 3.635841234033473e-05, "loss": 2.5195, "step": 2865000 }, { "epoch": 5.46, "learning_rate": 3.635603069841173e-05, "loss": 2.528, "step": 2865500 }, { "epoch": 5.46, "learning_rate": 3.635364905648873e-05, "loss": 2.5286, "step": 2866000 }, { "epoch": 5.46, "learning_rate": 3.635126741456574e-05, "loss": 2.5354, "step": 2866500 }, { "epoch": 5.46, "learning_rate": 3.634888577264275e-05, "loss": 2.5157, "step": 2867000 }, { "epoch": 5.46, "learning_rate": 3.6346504130719755e-05, "loss": 2.5269, "step": 2867500 }, { "epoch": 5.46, "learning_rate": 3.634412725208061e-05, "loss": 2.5435, "step": 2868000 }, { "epoch": 5.47, "learning_rate": 3.634174561015761e-05, "loss": 2.5149, "step": 2868500 }, { "epoch": 5.47, "learning_rate": 3.633936396823461e-05, "loss": 2.5429, "step": 2869000 }, { "epoch": 5.47, "learning_rate": 3.633698232631162e-05, "loss": 2.5272, "step": 2869500 }, { "epoch": 5.47, "learning_rate": 3.6334600684388624e-05, "loss": 2.5308, "step": 2870000 }, { "epoch": 5.47, "learning_rate": 3.6332219042465634e-05, "loss": 2.5104, "step": 2870500 }, { "epoch": 5.47, "learning_rate": 3.632983740054263e-05, "loss": 2.523, "step": 2871000 }, { "epoch": 5.47, "learning_rate": 3.632745575861964e-05, "loss": 2.5441, "step": 2871500 }, { "epoch": 5.47, "learning_rate": 3.632507411669664e-05, "loss": 2.5233, "step": 2872000 }, { "epoch": 5.47, "learning_rate": 3.6322697238057494e-05, "loss": 2.5273, "step": 2872500 }, { "epoch": 5.47, "learning_rate": 3.63203155961345e-05, "loss": 2.5471, "step": 2873000 }, { "epoch": 5.47, "learning_rate": 3.6317933954211506e-05, "loss": 2.5141, "step": 2873500 }, { "epoch": 5.48, "learning_rate": 3.6315552312288515e-05, "loss": 2.5348, "step": 2874000 }, { "epoch": 5.48, "learning_rate": 3.631317543364937e-05, "loss": 2.5388, "step": 2874500 }, { "epoch": 5.48, "learning_rate": 3.6310793791726364e-05, "loss": 2.5453, "step": 2875000 }, { "epoch": 5.48, "learning_rate": 3.630841214980337e-05, "loss": 2.5501, "step": 2875500 }, { "epoch": 5.48, "learning_rate": 3.6306030507880376e-05, "loss": 2.5285, "step": 2876000 }, { "epoch": 5.48, "learning_rate": 3.630365362924123e-05, "loss": 2.5367, "step": 2876500 }, { "epoch": 5.48, "learning_rate": 3.630127198731824e-05, "loss": 2.5415, "step": 2877000 }, { "epoch": 5.48, "learning_rate": 3.629889034539524e-05, "loss": 2.5249, "step": 2877500 }, { "epoch": 5.48, "learning_rate": 3.629650870347224e-05, "loss": 2.5449, "step": 2878000 }, { "epoch": 5.48, "learning_rate": 3.629412706154925e-05, "loss": 2.5348, "step": 2878500 }, { "epoch": 5.49, "learning_rate": 3.62917501829101e-05, "loss": 2.5335, "step": 2879000 }, { "epoch": 5.49, "learning_rate": 3.628936854098711e-05, "loss": 2.5178, "step": 2879500 }, { "epoch": 5.49, "learning_rate": 3.628698689906411e-05, "loss": 2.5233, "step": 2880000 }, { "epoch": 5.49, "eval_accuracy": 0.5385179271165851, "eval_loss": 2.430868625640869, "eval_runtime": 4203.1402, "eval_samples_per_second": 65.424, "eval_steps_per_second": 6.542, "step": 2880000 }, { "epoch": 5.49, "learning_rate": 3.628460525714112e-05, "loss": 2.5458, "step": 2880500 }, { "epoch": 5.49, "learning_rate": 3.628222837850197e-05, "loss": 2.5335, "step": 2881000 }, { "epoch": 5.49, "learning_rate": 3.6279846736578974e-05, "loss": 2.5196, "step": 2881500 }, { "epoch": 5.49, "learning_rate": 3.6277465094655976e-05, "loss": 2.5433, "step": 2882000 }, { "epoch": 5.49, "learning_rate": 3.627508821601683e-05, "loss": 2.5225, "step": 2882500 }, { "epoch": 5.49, "learning_rate": 3.627270657409383e-05, "loss": 2.5314, "step": 2883000 }, { "epoch": 5.49, "learning_rate": 3.627032493217084e-05, "loss": 2.5208, "step": 2883500 }, { "epoch": 5.49, "learning_rate": 3.626794329024784e-05, "loss": 2.5291, "step": 2884000 }, { "epoch": 5.5, "learning_rate": 3.626556164832485e-05, "loss": 2.5219, "step": 2884500 }, { "epoch": 5.5, "learning_rate": 3.6263180006401855e-05, "loss": 2.5696, "step": 2885000 }, { "epoch": 5.5, "learning_rate": 3.626080312776271e-05, "loss": 2.53, "step": 2885500 }, { "epoch": 5.5, "learning_rate": 3.625842148583971e-05, "loss": 2.5185, "step": 2886000 }, { "epoch": 5.5, "learning_rate": 3.625603984391672e-05, "loss": 2.5342, "step": 2886500 }, { "epoch": 5.5, "learning_rate": 3.625365820199372e-05, "loss": 2.5351, "step": 2887000 }, { "epoch": 5.5, "learning_rate": 3.625127656007073e-05, "loss": 2.5409, "step": 2887500 }, { "epoch": 5.5, "learning_rate": 3.6248899681431584e-05, "loss": 2.5434, "step": 2888000 }, { "epoch": 5.5, "learning_rate": 3.624651803950858e-05, "loss": 2.5455, "step": 2888500 }, { "epoch": 5.5, "learning_rate": 3.624413639758559e-05, "loss": 2.5411, "step": 2889000 }, { "epoch": 5.51, "learning_rate": 3.624175475566259e-05, "loss": 2.5324, "step": 2889500 }, { "epoch": 5.51, "learning_rate": 3.62393731137396e-05, "loss": 2.5177, "step": 2890000 }, { "epoch": 5.51, "learning_rate": 3.6236991471816604e-05, "loss": 2.5333, "step": 2890500 }, { "epoch": 5.51, "learning_rate": 3.6234614593177456e-05, "loss": 2.5143, "step": 2891000 }, { "epoch": 5.51, "learning_rate": 3.623223295125446e-05, "loss": 2.5063, "step": 2891500 }, { "epoch": 5.51, "learning_rate": 3.622985130933146e-05, "loss": 2.5156, "step": 2892000 }, { "epoch": 5.51, "learning_rate": 3.622746966740847e-05, "loss": 2.518, "step": 2892500 }, { "epoch": 5.51, "learning_rate": 3.6225088025485473e-05, "loss": 2.5309, "step": 2893000 }, { "epoch": 5.51, "learning_rate": 3.622270638356248e-05, "loss": 2.537, "step": 2893500 }, { "epoch": 5.51, "learning_rate": 3.6220329504923335e-05, "loss": 2.5364, "step": 2894000 }, { "epoch": 5.51, "learning_rate": 3.621794786300034e-05, "loss": 2.5432, "step": 2894500 }, { "epoch": 5.52, "learning_rate": 3.621556622107734e-05, "loss": 2.5408, "step": 2895000 }, { "epoch": 5.52, "learning_rate": 3.621318457915435e-05, "loss": 2.5254, "step": 2895500 }, { "epoch": 5.52, "learning_rate": 3.621080293723135e-05, "loss": 2.5278, "step": 2896000 }, { "epoch": 5.52, "learning_rate": 3.6208426058592205e-05, "loss": 2.5229, "step": 2896500 }, { "epoch": 5.52, "learning_rate": 3.620604441666921e-05, "loss": 2.5123, "step": 2897000 }, { "epoch": 5.52, "learning_rate": 3.620366277474622e-05, "loss": 2.5457, "step": 2897500 }, { "epoch": 5.52, "learning_rate": 3.620128113282322e-05, "loss": 2.5299, "step": 2898000 }, { "epoch": 5.52, "learning_rate": 3.619889949090022e-05, "loss": 2.5256, "step": 2898500 }, { "epoch": 5.52, "learning_rate": 3.6196522612261074e-05, "loss": 2.5298, "step": 2899000 }, { "epoch": 5.52, "learning_rate": 3.6194140970338084e-05, "loss": 2.5309, "step": 2899500 }, { "epoch": 5.53, "learning_rate": 3.6191759328415086e-05, "loss": 2.5298, "step": 2900000 }, { "epoch": 5.53, "learning_rate": 3.6189377686492096e-05, "loss": 2.5354, "step": 2900500 }, { "epoch": 5.53, "learning_rate": 3.618699604456909e-05, "loss": 2.5257, "step": 2901000 }, { "epoch": 5.53, "learning_rate": 3.61846144026461e-05, "loss": 2.5262, "step": 2901500 }, { "epoch": 5.53, "learning_rate": 3.618223752400695e-05, "loss": 2.5406, "step": 2902000 }, { "epoch": 5.53, "learning_rate": 3.6179855882083956e-05, "loss": 2.5188, "step": 2902500 }, { "epoch": 5.53, "learning_rate": 3.6177474240160965e-05, "loss": 2.5248, "step": 2903000 }, { "epoch": 5.53, "learning_rate": 3.617509259823797e-05, "loss": 2.5199, "step": 2903500 }, { "epoch": 5.53, "learning_rate": 3.617271095631497e-05, "loss": 2.5211, "step": 2904000 }, { "epoch": 5.53, "learning_rate": 3.617032931439197e-05, "loss": 2.5181, "step": 2904500 }, { "epoch": 5.53, "learning_rate": 3.616794767246898e-05, "loss": 2.5258, "step": 2905000 }, { "epoch": 5.54, "learning_rate": 3.616556603054599e-05, "loss": 2.5398, "step": 2905500 }, { "epoch": 5.54, "learning_rate": 3.6163184388622995e-05, "loss": 2.5205, "step": 2906000 }, { "epoch": 5.54, "learning_rate": 3.616080750998385e-05, "loss": 2.5212, "step": 2906500 }, { "epoch": 5.54, "learning_rate": 3.615842586806085e-05, "loss": 2.5229, "step": 2907000 }, { "epoch": 5.54, "learning_rate": 3.615604422613785e-05, "loss": 2.5183, "step": 2907500 }, { "epoch": 5.54, "learning_rate": 3.615366258421486e-05, "loss": 2.5054, "step": 2908000 }, { "epoch": 5.54, "learning_rate": 3.615128570557571e-05, "loss": 2.5413, "step": 2908500 }, { "epoch": 5.54, "learning_rate": 3.6148904063652716e-05, "loss": 2.5182, "step": 2909000 }, { "epoch": 5.54, "learning_rate": 3.6146522421729726e-05, "loss": 2.5404, "step": 2909500 }, { "epoch": 5.54, "learning_rate": 3.614414554309057e-05, "loss": 2.5265, "step": 2910000 }, { "epoch": 5.54, "eval_accuracy": 0.5388162072612116, "eval_loss": 2.429485321044922, "eval_runtime": 4205.8775, "eval_samples_per_second": 65.382, "eval_steps_per_second": 6.538, "step": 2910000 }, { "epoch": 5.55, "learning_rate": 3.614176390116758e-05, "loss": 2.5233, "step": 2910500 }, { "epoch": 5.55, "learning_rate": 3.613938225924458e-05, "loss": 2.5314, "step": 2911000 }, { "epoch": 5.55, "learning_rate": 3.6137000617321586e-05, "loss": 2.5264, "step": 2911500 }, { "epoch": 5.55, "learning_rate": 3.6134618975398595e-05, "loss": 2.5388, "step": 2912000 }, { "epoch": 5.55, "learning_rate": 3.61322373334756e-05, "loss": 2.5341, "step": 2912500 }, { "epoch": 5.55, "learning_rate": 3.612985569155261e-05, "loss": 2.5086, "step": 2913000 }, { "epoch": 5.55, "learning_rate": 3.61274740496296e-05, "loss": 2.5182, "step": 2913500 }, { "epoch": 5.55, "learning_rate": 3.6125097170990456e-05, "loss": 2.5337, "step": 2914000 }, { "epoch": 5.55, "learning_rate": 3.6122715529067465e-05, "loss": 2.5421, "step": 2914500 }, { "epoch": 5.55, "learning_rate": 3.612033388714447e-05, "loss": 2.5289, "step": 2915000 }, { "epoch": 5.55, "learning_rate": 3.611795224522148e-05, "loss": 2.5169, "step": 2915500 }, { "epoch": 5.56, "learning_rate": 3.611557536658233e-05, "loss": 2.5297, "step": 2916000 }, { "epoch": 5.56, "learning_rate": 3.611319372465933e-05, "loss": 2.5409, "step": 2916500 }, { "epoch": 5.56, "learning_rate": 3.611081208273634e-05, "loss": 2.5459, "step": 2917000 }, { "epoch": 5.56, "learning_rate": 3.610843044081334e-05, "loss": 2.5392, "step": 2917500 }, { "epoch": 5.56, "learning_rate": 3.6106048798890347e-05, "loss": 2.5293, "step": 2918000 }, { "epoch": 5.56, "learning_rate": 3.610366715696735e-05, "loss": 2.5293, "step": 2918500 }, { "epoch": 5.56, "learning_rate": 3.610128551504436e-05, "loss": 2.5246, "step": 2919000 }, { "epoch": 5.56, "learning_rate": 3.609890863640521e-05, "loss": 2.5476, "step": 2919500 }, { "epoch": 5.56, "learning_rate": 3.6096526994482214e-05, "loss": 2.4997, "step": 2920000 }, { "epoch": 5.56, "learning_rate": 3.6094145352559216e-05, "loss": 2.517, "step": 2920500 }, { "epoch": 5.57, "learning_rate": 3.609176847392007e-05, "loss": 2.5379, "step": 2921000 }, { "epoch": 5.57, "learning_rate": 3.608938683199707e-05, "loss": 2.5173, "step": 2921500 }, { "epoch": 5.57, "learning_rate": 3.608700519007408e-05, "loss": 2.5098, "step": 2922000 }, { "epoch": 5.57, "learning_rate": 3.608462354815108e-05, "loss": 2.5154, "step": 2922500 }, { "epoch": 5.57, "learning_rate": 3.608224190622809e-05, "loss": 2.5007, "step": 2923000 }, { "epoch": 5.57, "learning_rate": 3.6079865027588945e-05, "loss": 2.5245, "step": 2923500 }, { "epoch": 5.57, "learning_rate": 3.607748338566595e-05, "loss": 2.527, "step": 2924000 }, { "epoch": 5.57, "learning_rate": 3.607510174374295e-05, "loss": 2.5094, "step": 2924500 }, { "epoch": 5.57, "learning_rate": 3.607272010181996e-05, "loss": 2.5258, "step": 2925000 }, { "epoch": 5.57, "learning_rate": 3.607033845989696e-05, "loss": 2.5042, "step": 2925500 }, { "epoch": 5.57, "learning_rate": 3.6067961581257814e-05, "loss": 2.5233, "step": 2926000 }, { "epoch": 5.58, "learning_rate": 3.6065579939334824e-05, "loss": 2.5324, "step": 2926500 }, { "epoch": 5.58, "learning_rate": 3.6063198297411826e-05, "loss": 2.528, "step": 2927000 }, { "epoch": 5.58, "learning_rate": 3.606081665548883e-05, "loss": 2.5237, "step": 2927500 }, { "epoch": 5.58, "learning_rate": 3.605843501356583e-05, "loss": 2.5489, "step": 2928000 }, { "epoch": 5.58, "learning_rate": 3.605605337164284e-05, "loss": 2.5368, "step": 2928500 }, { "epoch": 5.58, "learning_rate": 3.6053671729719844e-05, "loss": 2.5434, "step": 2929000 }, { "epoch": 5.58, "learning_rate": 3.605129008779685e-05, "loss": 2.5395, "step": 2929500 }, { "epoch": 5.58, "learning_rate": 3.6048908445873856e-05, "loss": 2.5353, "step": 2930000 }, { "epoch": 5.58, "learning_rate": 3.604652680395086e-05, "loss": 2.5311, "step": 2930500 }, { "epoch": 5.58, "learning_rate": 3.604414992531171e-05, "loss": 2.5234, "step": 2931000 }, { "epoch": 5.59, "learning_rate": 3.604176828338871e-05, "loss": 2.5257, "step": 2931500 }, { "epoch": 5.59, "learning_rate": 3.603938664146572e-05, "loss": 2.5437, "step": 2932000 }, { "epoch": 5.59, "learning_rate": 3.6037004999542725e-05, "loss": 2.5297, "step": 2932500 }, { "epoch": 5.59, "learning_rate": 3.603462335761973e-05, "loss": 2.5294, "step": 2933000 }, { "epoch": 5.59, "learning_rate": 3.603224171569674e-05, "loss": 2.5201, "step": 2933500 }, { "epoch": 5.59, "learning_rate": 3.602986007377374e-05, "loss": 2.5404, "step": 2934000 }, { "epoch": 5.59, "learning_rate": 3.602747843185075e-05, "loss": 2.528, "step": 2934500 }, { "epoch": 5.59, "learning_rate": 3.60251015532116e-05, "loss": 2.5212, "step": 2935000 }, { "epoch": 5.59, "learning_rate": 3.6022719911288604e-05, "loss": 2.5454, "step": 2935500 }, { "epoch": 5.59, "learning_rate": 3.6020338269365614e-05, "loss": 2.5221, "step": 2936000 }, { "epoch": 5.59, "learning_rate": 3.601795662744261e-05, "loss": 2.5335, "step": 2936500 }, { "epoch": 5.6, "learning_rate": 3.601557974880346e-05, "loss": 2.5369, "step": 2937000 }, { "epoch": 5.6, "learning_rate": 3.601319810688047e-05, "loss": 2.5419, "step": 2937500 }, { "epoch": 5.6, "learning_rate": 3.6010816464957474e-05, "loss": 2.5543, "step": 2938000 }, { "epoch": 5.6, "learning_rate": 3.600843482303448e-05, "loss": 2.5461, "step": 2938500 }, { "epoch": 5.6, "learning_rate": 3.6006053181111486e-05, "loss": 2.5294, "step": 2939000 }, { "epoch": 5.6, "learning_rate": 3.600367630247234e-05, "loss": 2.5208, "step": 2939500 }, { "epoch": 5.6, "learning_rate": 3.600129466054934e-05, "loss": 2.5617, "step": 2940000 }, { "epoch": 5.6, "eval_accuracy": 0.5386772800223569, "eval_loss": 2.429373025894165, "eval_runtime": 4205.3075, "eval_samples_per_second": 65.391, "eval_steps_per_second": 6.539, "step": 2940000 }, { "epoch": 5.6, "learning_rate": 3.599891301862634e-05, "loss": 2.5181, "step": 2940500 }, { "epoch": 5.6, "learning_rate": 3.599653137670335e-05, "loss": 2.5183, "step": 2941000 }, { "epoch": 5.6, "learning_rate": 3.5994154498064205e-05, "loss": 2.5368, "step": 2941500 }, { "epoch": 5.61, "learning_rate": 3.599177285614121e-05, "loss": 2.5278, "step": 2942000 }, { "epoch": 5.61, "learning_rate": 3.598939121421822e-05, "loss": 2.5222, "step": 2942500 }, { "epoch": 5.61, "learning_rate": 3.598700957229521e-05, "loss": 2.5066, "step": 2943000 }, { "epoch": 5.61, "learning_rate": 3.598462793037222e-05, "loss": 2.5303, "step": 2943500 }, { "epoch": 5.61, "learning_rate": 3.598224628844923e-05, "loss": 2.5247, "step": 2944000 }, { "epoch": 5.61, "learning_rate": 3.5979864646526234e-05, "loss": 2.533, "step": 2944500 }, { "epoch": 5.61, "learning_rate": 3.5977483004603244e-05, "loss": 2.5006, "step": 2945000 }, { "epoch": 5.61, "learning_rate": 3.5975101362680246e-05, "loss": 2.5071, "step": 2945500 }, { "epoch": 5.61, "learning_rate": 3.59727244840411e-05, "loss": 2.5217, "step": 2946000 }, { "epoch": 5.61, "learning_rate": 3.59703428421181e-05, "loss": 2.5271, "step": 2946500 }, { "epoch": 5.61, "learning_rate": 3.5967961200195104e-05, "loss": 2.5132, "step": 2947000 }, { "epoch": 5.62, "learning_rate": 3.596557955827211e-05, "loss": 2.5298, "step": 2947500 }, { "epoch": 5.62, "learning_rate": 3.5963202679632966e-05, "loss": 2.527, "step": 2948000 }, { "epoch": 5.62, "learning_rate": 3.596082580099381e-05, "loss": 2.5321, "step": 2948500 }, { "epoch": 5.62, "learning_rate": 3.595844415907082e-05, "loss": 2.5377, "step": 2949000 }, { "epoch": 5.62, "learning_rate": 3.595606251714782e-05, "loss": 2.5324, "step": 2949500 }, { "epoch": 5.62, "learning_rate": 3.5953680875224826e-05, "loss": 2.5221, "step": 2950000 }, { "epoch": 5.62, "learning_rate": 3.5951299233301835e-05, "loss": 2.5097, "step": 2950500 }, { "epoch": 5.62, "learning_rate": 3.594891759137884e-05, "loss": 2.5215, "step": 2951000 }, { "epoch": 5.62, "learning_rate": 3.594653594945585e-05, "loss": 2.5349, "step": 2951500 }, { "epoch": 5.62, "learning_rate": 3.594415430753285e-05, "loss": 2.5557, "step": 2952000 }, { "epoch": 5.63, "learning_rate": 3.594177266560985e-05, "loss": 2.5074, "step": 2952500 }, { "epoch": 5.63, "learning_rate": 3.5939395786970705e-05, "loss": 2.5219, "step": 2953000 }, { "epoch": 5.63, "learning_rate": 3.593701414504771e-05, "loss": 2.5272, "step": 2953500 }, { "epoch": 5.63, "learning_rate": 3.593463250312472e-05, "loss": 2.5284, "step": 2954000 }, { "epoch": 5.63, "learning_rate": 3.593225562448557e-05, "loss": 2.5135, "step": 2954500 }, { "epoch": 5.63, "learning_rate": 3.592987398256257e-05, "loss": 2.5356, "step": 2955000 }, { "epoch": 5.63, "learning_rate": 3.592749234063958e-05, "loss": 2.5579, "step": 2955500 }, { "epoch": 5.63, "learning_rate": 3.592511069871658e-05, "loss": 2.5197, "step": 2956000 }, { "epoch": 5.63, "learning_rate": 3.5922729056793586e-05, "loss": 2.5317, "step": 2956500 }, { "epoch": 5.63, "learning_rate": 3.592035217815444e-05, "loss": 2.514, "step": 2957000 }, { "epoch": 5.63, "learning_rate": 3.591797053623144e-05, "loss": 2.5406, "step": 2957500 }, { "epoch": 5.64, "learning_rate": 3.591558889430845e-05, "loss": 2.5417, "step": 2958000 }, { "epoch": 5.64, "learning_rate": 3.591320725238545e-05, "loss": 2.5257, "step": 2958500 }, { "epoch": 5.64, "learning_rate": 3.591082561046246e-05, "loss": 2.5248, "step": 2959000 }, { "epoch": 5.64, "learning_rate": 3.5908443968539465e-05, "loss": 2.54, "step": 2959500 }, { "epoch": 5.64, "learning_rate": 3.590606232661647e-05, "loss": 2.5242, "step": 2960000 }, { "epoch": 5.64, "learning_rate": 3.590368068469348e-05, "loss": 2.5265, "step": 2960500 }, { "epoch": 5.64, "learning_rate": 3.590130380605432e-05, "loss": 2.5362, "step": 2961000 }, { "epoch": 5.64, "learning_rate": 3.589892216413133e-05, "loss": 2.5237, "step": 2961500 }, { "epoch": 5.64, "learning_rate": 3.589654052220834e-05, "loss": 2.5358, "step": 2962000 }, { "epoch": 5.64, "learning_rate": 3.589415888028534e-05, "loss": 2.5127, "step": 2962500 }, { "epoch": 5.65, "learning_rate": 3.589178200164619e-05, "loss": 2.5391, "step": 2963000 }, { "epoch": 5.65, "learning_rate": 3.58894003597232e-05, "loss": 2.5282, "step": 2963500 }, { "epoch": 5.65, "learning_rate": 3.58870187178002e-05, "loss": 2.5184, "step": 2964000 }, { "epoch": 5.65, "learning_rate": 3.588463707587721e-05, "loss": 2.5231, "step": 2964500 }, { "epoch": 5.65, "learning_rate": 3.588226019723806e-05, "loss": 2.5467, "step": 2965000 }, { "epoch": 5.65, "learning_rate": 3.5879878555315066e-05, "loss": 2.5336, "step": 2965500 }, { "epoch": 5.65, "learning_rate": 3.587749691339207e-05, "loss": 2.5475, "step": 2966000 }, { "epoch": 5.65, "learning_rate": 3.587511527146907e-05, "loss": 2.5288, "step": 2966500 }, { "epoch": 5.65, "learning_rate": 3.587273362954608e-05, "loss": 2.524, "step": 2967000 }, { "epoch": 5.65, "learning_rate": 3.5870351987623083e-05, "loss": 2.5399, "step": 2967500 }, { "epoch": 5.65, "learning_rate": 3.586797034570009e-05, "loss": 2.5266, "step": 2968000 }, { "epoch": 5.66, "learning_rate": 3.5865588703777095e-05, "loss": 2.5314, "step": 2968500 }, { "epoch": 5.66, "learning_rate": 3.586321182513795e-05, "loss": 2.5284, "step": 2969000 }, { "epoch": 5.66, "learning_rate": 3.586083018321495e-05, "loss": 2.5178, "step": 2969500 }, { "epoch": 5.66, "learning_rate": 3.585844854129195e-05, "loss": 2.5151, "step": 2970000 }, { "epoch": 5.66, "eval_accuracy": 0.5390215481330307, "eval_loss": 2.428499221801758, "eval_runtime": 4206.111, "eval_samples_per_second": 65.378, "eval_steps_per_second": 6.538, "step": 2970000 }, { "epoch": 5.66, "learning_rate": 3.585606689936896e-05, "loss": 2.5392, "step": 2970500 }, { "epoch": 5.66, "learning_rate": 3.5853685257445965e-05, "loss": 2.5153, "step": 2971000 }, { "epoch": 5.66, "learning_rate": 3.585130837880682e-05, "loss": 2.5124, "step": 2971500 }, { "epoch": 5.66, "learning_rate": 3.584892673688383e-05, "loss": 2.5199, "step": 2972000 }, { "epoch": 5.66, "learning_rate": 3.584654509496083e-05, "loss": 2.5262, "step": 2972500 }, { "epoch": 5.66, "learning_rate": 3.5844168216321675e-05, "loss": 2.5344, "step": 2973000 }, { "epoch": 5.67, "learning_rate": 3.5841786574398684e-05, "loss": 2.5295, "step": 2973500 }, { "epoch": 5.67, "learning_rate": 3.583940493247569e-05, "loss": 2.5293, "step": 2974000 }, { "epoch": 5.67, "learning_rate": 3.5837023290552696e-05, "loss": 2.5476, "step": 2974500 }, { "epoch": 5.67, "learning_rate": 3.58346416486297e-05, "loss": 2.5128, "step": 2975000 }, { "epoch": 5.67, "learning_rate": 3.58322600067067e-05, "loss": 2.5101, "step": 2975500 }, { "epoch": 5.67, "learning_rate": 3.582987836478371e-05, "loss": 2.5244, "step": 2976000 }, { "epoch": 5.67, "learning_rate": 3.5827496722860714e-05, "loss": 2.5358, "step": 2976500 }, { "epoch": 5.67, "learning_rate": 3.5825119844221566e-05, "loss": 2.53, "step": 2977000 }, { "epoch": 5.67, "learning_rate": 3.5822738202298575e-05, "loss": 2.5429, "step": 2977500 }, { "epoch": 5.67, "learning_rate": 3.582035656037558e-05, "loss": 2.5415, "step": 2978000 }, { "epoch": 5.67, "learning_rate": 3.581797491845259e-05, "loss": 2.5295, "step": 2978500 }, { "epoch": 5.68, "learning_rate": 3.581559327652958e-05, "loss": 2.5194, "step": 2979000 }, { "epoch": 5.68, "learning_rate": 3.5813216397890435e-05, "loss": 2.5204, "step": 2979500 }, { "epoch": 5.68, "learning_rate": 3.5810834755967445e-05, "loss": 2.5165, "step": 2980000 }, { "epoch": 5.68, "learning_rate": 3.580845311404445e-05, "loss": 2.5225, "step": 2980500 }, { "epoch": 5.68, "learning_rate": 3.580607147212146e-05, "loss": 2.5305, "step": 2981000 }, { "epoch": 5.68, "learning_rate": 3.580369459348231e-05, "loss": 2.5331, "step": 2981500 }, { "epoch": 5.68, "learning_rate": 3.580131295155931e-05, "loss": 2.5402, "step": 2982000 }, { "epoch": 5.68, "learning_rate": 3.5798936072920164e-05, "loss": 2.5272, "step": 2982500 }, { "epoch": 5.68, "learning_rate": 3.579655443099717e-05, "loss": 2.5304, "step": 2983000 }, { "epoch": 5.68, "learning_rate": 3.579417278907417e-05, "loss": 2.5335, "step": 2983500 }, { "epoch": 5.69, "learning_rate": 3.579179114715118e-05, "loss": 2.5212, "step": 2984000 }, { "epoch": 5.69, "learning_rate": 3.578940950522818e-05, "loss": 2.5423, "step": 2984500 }, { "epoch": 5.69, "learning_rate": 3.578702786330519e-05, "loss": 2.5372, "step": 2985000 }, { "epoch": 5.69, "learning_rate": 3.5784646221382187e-05, "loss": 2.5243, "step": 2985500 }, { "epoch": 5.69, "learning_rate": 3.578226934274304e-05, "loss": 2.5255, "step": 2986000 }, { "epoch": 5.69, "learning_rate": 3.577988770082005e-05, "loss": 2.5362, "step": 2986500 }, { "epoch": 5.69, "learning_rate": 3.577750605889705e-05, "loss": 2.5133, "step": 2987000 }, { "epoch": 5.69, "learning_rate": 3.577512441697406e-05, "loss": 2.5124, "step": 2987500 }, { "epoch": 5.69, "learning_rate": 3.577274277505106e-05, "loss": 2.5216, "step": 2988000 }, { "epoch": 5.69, "learning_rate": 3.5770365896411915e-05, "loss": 2.5272, "step": 2988500 }, { "epoch": 5.69, "learning_rate": 3.576798425448892e-05, "loss": 2.5154, "step": 2989000 }, { "epoch": 5.7, "learning_rate": 3.576560261256592e-05, "loss": 2.5203, "step": 2989500 }, { "epoch": 5.7, "learning_rate": 3.576322097064293e-05, "loss": 2.5006, "step": 2990000 }, { "epoch": 5.7, "learning_rate": 3.576083932871994e-05, "loss": 2.5259, "step": 2990500 }, { "epoch": 5.7, "learning_rate": 3.575845768679694e-05, "loss": 2.5343, "step": 2991000 }, { "epoch": 5.7, "learning_rate": 3.575607604487395e-05, "loss": 2.5138, "step": 2991500 }, { "epoch": 5.7, "learning_rate": 3.575369440295095e-05, "loss": 2.5409, "step": 2992000 }, { "epoch": 5.7, "learning_rate": 3.57513175243118e-05, "loss": 2.5181, "step": 2992500 }, { "epoch": 5.7, "learning_rate": 3.574893588238881e-05, "loss": 2.5206, "step": 2993000 }, { "epoch": 5.7, "learning_rate": 3.574655424046581e-05, "loss": 2.506, "step": 2993500 }, { "epoch": 5.7, "learning_rate": 3.574417259854282e-05, "loss": 2.5197, "step": 2994000 }, { "epoch": 5.71, "learning_rate": 3.5741790956619824e-05, "loss": 2.5428, "step": 2994500 }, { "epoch": 5.71, "learning_rate": 3.5739409314696826e-05, "loss": 2.4971, "step": 2995000 }, { "epoch": 5.71, "learning_rate": 3.573702767277383e-05, "loss": 2.5165, "step": 2995500 }, { "epoch": 5.71, "learning_rate": 3.573464603085084e-05, "loss": 2.5366, "step": 2996000 }, { "epoch": 5.71, "learning_rate": 3.573226915221169e-05, "loss": 2.5058, "step": 2996500 }, { "epoch": 5.71, "learning_rate": 3.572988751028869e-05, "loss": 2.5353, "step": 2997000 }, { "epoch": 5.71, "learning_rate": 3.5727510631649545e-05, "loss": 2.5423, "step": 2997500 }, { "epoch": 5.71, "learning_rate": 3.5725128989726555e-05, "loss": 2.5142, "step": 2998000 }, { "epoch": 5.71, "learning_rate": 3.572274734780356e-05, "loss": 2.5298, "step": 2998500 }, { "epoch": 5.71, "learning_rate": 3.572036570588056e-05, "loss": 2.528, "step": 2999000 }, { "epoch": 5.71, "learning_rate": 3.571798406395756e-05, "loss": 2.5245, "step": 2999500 }, { "epoch": 5.72, "learning_rate": 3.571560242203457e-05, "loss": 2.5296, "step": 3000000 }, { "epoch": 5.72, "eval_accuracy": 0.5391811605364853, "eval_loss": 2.4274587631225586, "eval_runtime": 4208.799, "eval_samples_per_second": 65.336, "eval_steps_per_second": 6.534, "step": 3000000 }, { "epoch": 5.72, "learning_rate": 3.571322078011158e-05, "loss": 2.5263, "step": 3000500 }, { "epoch": 5.72, "learning_rate": 3.5710839138188584e-05, "loss": 2.5203, "step": 3001000 }, { "epoch": 5.72, "learning_rate": 3.5708462259549436e-05, "loss": 2.5365, "step": 3001500 }, { "epoch": 5.72, "learning_rate": 3.570608538091029e-05, "loss": 2.5459, "step": 3002000 }, { "epoch": 5.72, "learning_rate": 3.5703703738987284e-05, "loss": 2.5354, "step": 3002500 }, { "epoch": 5.72, "learning_rate": 3.5701322097064294e-05, "loss": 2.508, "step": 3003000 }, { "epoch": 5.72, "learning_rate": 3.5698940455141297e-05, "loss": 2.5479, "step": 3003500 }, { "epoch": 5.72, "learning_rate": 3.5696558813218306e-05, "loss": 2.51, "step": 3004000 }, { "epoch": 5.72, "learning_rate": 3.5694177171295315e-05, "loss": 2.525, "step": 3004500 }, { "epoch": 5.73, "learning_rate": 3.569179552937231e-05, "loss": 2.5302, "step": 3005000 }, { "epoch": 5.73, "learning_rate": 3.568941388744932e-05, "loss": 2.5234, "step": 3005500 }, { "epoch": 5.73, "learning_rate": 3.568703700881017e-05, "loss": 2.5161, "step": 3006000 }, { "epoch": 5.73, "learning_rate": 3.5684655366887176e-05, "loss": 2.5229, "step": 3006500 }, { "epoch": 5.73, "learning_rate": 3.5682273724964185e-05, "loss": 2.5133, "step": 3007000 }, { "epoch": 5.73, "learning_rate": 3.567989208304119e-05, "loss": 2.5375, "step": 3007500 }, { "epoch": 5.73, "learning_rate": 3.567751520440204e-05, "loss": 2.5369, "step": 3008000 }, { "epoch": 5.73, "learning_rate": 3.567513356247904e-05, "loss": 2.5227, "step": 3008500 }, { "epoch": 5.73, "learning_rate": 3.5672751920556045e-05, "loss": 2.538, "step": 3009000 }, { "epoch": 5.73, "learning_rate": 3.5670370278633054e-05, "loss": 2.515, "step": 3009500 }, { "epoch": 5.73, "learning_rate": 3.566798863671006e-05, "loss": 2.5143, "step": 3010000 }, { "epoch": 5.74, "learning_rate": 3.566561175807091e-05, "loss": 2.5292, "step": 3010500 }, { "epoch": 5.74, "learning_rate": 3.566323011614792e-05, "loss": 2.545, "step": 3011000 }, { "epoch": 5.74, "learning_rate": 3.566084847422492e-05, "loss": 2.5479, "step": 3011500 }, { "epoch": 5.74, "learning_rate": 3.5658466832301924e-05, "loss": 2.5333, "step": 3012000 }, { "epoch": 5.74, "learning_rate": 3.5656089953662776e-05, "loss": 2.5241, "step": 3012500 }, { "epoch": 5.74, "learning_rate": 3.565370831173978e-05, "loss": 2.5224, "step": 3013000 }, { "epoch": 5.74, "learning_rate": 3.565132666981679e-05, "loss": 2.5456, "step": 3013500 }, { "epoch": 5.74, "learning_rate": 3.564894502789379e-05, "loss": 2.5381, "step": 3014000 }, { "epoch": 5.74, "learning_rate": 3.564656814925464e-05, "loss": 2.5264, "step": 3014500 }, { "epoch": 5.74, "learning_rate": 3.564418650733165e-05, "loss": 2.5257, "step": 3015000 }, { "epoch": 5.75, "learning_rate": 3.5641809628692505e-05, "loss": 2.52, "step": 3015500 }, { "epoch": 5.75, "learning_rate": 3.56394279867695e-05, "loss": 2.5362, "step": 3016000 }, { "epoch": 5.75, "learning_rate": 3.563704634484651e-05, "loss": 2.5227, "step": 3016500 }, { "epoch": 5.75, "learning_rate": 3.563466470292351e-05, "loss": 2.5097, "step": 3017000 }, { "epoch": 5.75, "learning_rate": 3.563228306100052e-05, "loss": 2.5354, "step": 3017500 }, { "epoch": 5.75, "learning_rate": 3.5629901419077525e-05, "loss": 2.5212, "step": 3018000 }, { "epoch": 5.75, "learning_rate": 3.562751977715453e-05, "loss": 2.5163, "step": 3018500 }, { "epoch": 5.75, "learning_rate": 3.562514289851538e-05, "loss": 2.5293, "step": 3019000 }, { "epoch": 5.75, "learning_rate": 3.562276125659238e-05, "loss": 2.5437, "step": 3019500 }, { "epoch": 5.75, "learning_rate": 3.562037961466939e-05, "loss": 2.5309, "step": 3020000 }, { "epoch": 5.75, "learning_rate": 3.5617997972746394e-05, "loss": 2.5322, "step": 3020500 }, { "epoch": 5.76, "learning_rate": 3.5615616330823404e-05, "loss": 2.5453, "step": 3021000 }, { "epoch": 5.76, "learning_rate": 3.5613234688900406e-05, "loss": 2.5153, "step": 3021500 }, { "epoch": 5.76, "learning_rate": 3.561085304697741e-05, "loss": 2.5378, "step": 3022000 }, { "epoch": 5.76, "learning_rate": 3.560847140505442e-05, "loss": 2.5046, "step": 3022500 }, { "epoch": 5.76, "learning_rate": 3.560608976313142e-05, "loss": 2.5324, "step": 3023000 }, { "epoch": 5.76, "learning_rate": 3.5603712884492273e-05, "loss": 2.5225, "step": 3023500 }, { "epoch": 5.76, "learning_rate": 3.560133124256928e-05, "loss": 2.5211, "step": 3024000 }, { "epoch": 5.76, "learning_rate": 3.559895436393013e-05, "loss": 2.5128, "step": 3024500 }, { "epoch": 5.76, "learning_rate": 3.559657272200714e-05, "loss": 2.5376, "step": 3025000 }, { "epoch": 5.76, "learning_rate": 3.559419108008414e-05, "loss": 2.5342, "step": 3025500 }, { "epoch": 5.77, "learning_rate": 3.559180943816114e-05, "loss": 2.5181, "step": 3026000 }, { "epoch": 5.77, "learning_rate": 3.558942779623815e-05, "loss": 2.5343, "step": 3026500 }, { "epoch": 5.77, "learning_rate": 3.5587046154315155e-05, "loss": 2.5184, "step": 3027000 }, { "epoch": 5.77, "learning_rate": 3.5584664512392164e-05, "loss": 2.5385, "step": 3027500 }, { "epoch": 5.77, "learning_rate": 3.558228287046916e-05, "loss": 2.5248, "step": 3028000 }, { "epoch": 5.77, "learning_rate": 3.557990599183001e-05, "loss": 2.5337, "step": 3028500 }, { "epoch": 5.77, "learning_rate": 3.557752434990702e-05, "loss": 2.5364, "step": 3029000 }, { "epoch": 5.77, "learning_rate": 3.5575142707984025e-05, "loss": 2.5098, "step": 3029500 }, { "epoch": 5.77, "learning_rate": 3.5572761066061034e-05, "loss": 2.519, "step": 3030000 }, { "epoch": 5.77, "eval_accuracy": 0.5391513645148877, "eval_loss": 2.427370071411133, "eval_runtime": 4209.1643, "eval_samples_per_second": 65.331, "eval_steps_per_second": 6.533, "step": 3030000 }, { "epoch": 5.77, "learning_rate": 3.5570384187421886e-05, "loss": 2.541, "step": 3030500 }, { "epoch": 5.78, "learning_rate": 3.556800254549889e-05, "loss": 2.5192, "step": 3031000 }, { "epoch": 5.78, "learning_rate": 3.556562090357589e-05, "loss": 2.5205, "step": 3031500 }, { "epoch": 5.78, "learning_rate": 3.55632392616529e-05, "loss": 2.5067, "step": 3032000 }, { "epoch": 5.78, "learning_rate": 3.5560857619729904e-05, "loss": 2.5258, "step": 3032500 }, { "epoch": 5.78, "learning_rate": 3.555847597780691e-05, "loss": 2.5025, "step": 3033000 }, { "epoch": 5.78, "learning_rate": 3.5556094335883916e-05, "loss": 2.5454, "step": 3033500 }, { "epoch": 5.78, "learning_rate": 3.5553712693960925e-05, "loss": 2.5367, "step": 3034000 }, { "epoch": 5.78, "learning_rate": 3.555133581532177e-05, "loss": 2.5473, "step": 3034500 }, { "epoch": 5.78, "learning_rate": 3.554895417339877e-05, "loss": 2.5215, "step": 3035000 }, { "epoch": 5.78, "learning_rate": 3.554657253147578e-05, "loss": 2.5211, "step": 3035500 }, { "epoch": 5.78, "learning_rate": 3.5544190889552785e-05, "loss": 2.5134, "step": 3036000 }, { "epoch": 5.79, "learning_rate": 3.5541809247629795e-05, "loss": 2.527, "step": 3036500 }, { "epoch": 5.79, "learning_rate": 3.55394276057068e-05, "loss": 2.534, "step": 3037000 }, { "epoch": 5.79, "learning_rate": 3.55370459637838e-05, "loss": 2.5453, "step": 3037500 }, { "epoch": 5.79, "learning_rate": 3.55346643218608e-05, "loss": 2.533, "step": 3038000 }, { "epoch": 5.79, "learning_rate": 3.5532287443221655e-05, "loss": 2.5329, "step": 3038500 }, { "epoch": 5.79, "learning_rate": 3.552991056458251e-05, "loss": 2.5447, "step": 3039000 }, { "epoch": 5.79, "learning_rate": 3.5527528922659516e-05, "loss": 2.5262, "step": 3039500 }, { "epoch": 5.79, "learning_rate": 3.552514728073652e-05, "loss": 2.5221, "step": 3040000 }, { "epoch": 5.79, "learning_rate": 3.552276563881353e-05, "loss": 2.541, "step": 3040500 }, { "epoch": 5.79, "learning_rate": 3.552038399689053e-05, "loss": 2.502, "step": 3041000 }, { "epoch": 5.8, "learning_rate": 3.5518002354967534e-05, "loss": 2.5092, "step": 3041500 }, { "epoch": 5.8, "learning_rate": 3.5515620713044536e-05, "loss": 2.524, "step": 3042000 }, { "epoch": 5.8, "learning_rate": 3.5513239071121546e-05, "loss": 2.5423, "step": 3042500 }, { "epoch": 5.8, "learning_rate": 3.55108621924824e-05, "loss": 2.5223, "step": 3043000 }, { "epoch": 5.8, "learning_rate": 3.550848531384325e-05, "loss": 2.5177, "step": 3043500 }, { "epoch": 5.8, "learning_rate": 3.550610367192025e-05, "loss": 2.4914, "step": 3044000 }, { "epoch": 5.8, "learning_rate": 3.550372202999726e-05, "loss": 2.5148, "step": 3044500 }, { "epoch": 5.8, "learning_rate": 3.550134038807426e-05, "loss": 2.5223, "step": 3045000 }, { "epoch": 5.8, "learning_rate": 3.549895874615127e-05, "loss": 2.5192, "step": 3045500 }, { "epoch": 5.8, "learning_rate": 3.549657710422827e-05, "loss": 2.5361, "step": 3046000 }, { "epoch": 5.8, "learning_rate": 3.549419546230528e-05, "loss": 2.5294, "step": 3046500 }, { "epoch": 5.81, "learning_rate": 3.549181382038229e-05, "loss": 2.5371, "step": 3047000 }, { "epoch": 5.81, "learning_rate": 3.5489436941743135e-05, "loss": 2.5383, "step": 3047500 }, { "epoch": 5.81, "learning_rate": 3.548705529982014e-05, "loss": 2.5295, "step": 3048000 }, { "epoch": 5.81, "learning_rate": 3.5484673657897147e-05, "loss": 2.5288, "step": 3048500 }, { "epoch": 5.81, "learning_rate": 3.548229677925799e-05, "loss": 2.5295, "step": 3049000 }, { "epoch": 5.81, "learning_rate": 3.5479915137335e-05, "loss": 2.5284, "step": 3049500 }, { "epoch": 5.81, "learning_rate": 3.547753349541201e-05, "loss": 2.536, "step": 3050000 }, { "epoch": 5.81, "learning_rate": 3.5475151853489013e-05, "loss": 2.5403, "step": 3050500 }, { "epoch": 5.81, "learning_rate": 3.5472770211566016e-05, "loss": 2.536, "step": 3051000 }, { "epoch": 5.81, "learning_rate": 3.547038856964302e-05, "loss": 2.5178, "step": 3051500 }, { "epoch": 5.82, "learning_rate": 3.546800692772003e-05, "loss": 2.5244, "step": 3052000 }, { "epoch": 5.82, "learning_rate": 3.546563004908088e-05, "loss": 2.5002, "step": 3052500 }, { "epoch": 5.82, "learning_rate": 3.546324840715788e-05, "loss": 2.5121, "step": 3053000 }, { "epoch": 5.82, "learning_rate": 3.546086676523489e-05, "loss": 2.5236, "step": 3053500 }, { "epoch": 5.82, "learning_rate": 3.5458485123311895e-05, "loss": 2.5195, "step": 3054000 }, { "epoch": 5.82, "learning_rate": 3.54561034813889e-05, "loss": 2.5141, "step": 3054500 }, { "epoch": 5.82, "learning_rate": 3.54537218394659e-05, "loss": 2.5263, "step": 3055000 }, { "epoch": 5.82, "learning_rate": 3.545134019754291e-05, "loss": 2.5185, "step": 3055500 }, { "epoch": 5.82, "learning_rate": 3.544896331890376e-05, "loss": 2.5241, "step": 3056000 }, { "epoch": 5.82, "learning_rate": 3.5446581676980765e-05, "loss": 2.5219, "step": 3056500 }, { "epoch": 5.82, "learning_rate": 3.5444200035057774e-05, "loss": 2.536, "step": 3057000 }, { "epoch": 5.83, "learning_rate": 3.544181839313478e-05, "loss": 2.5095, "step": 3057500 }, { "epoch": 5.83, "learning_rate": 3.543943675121178e-05, "loss": 2.5049, "step": 3058000 }, { "epoch": 5.83, "learning_rate": 3.543705510928879e-05, "loss": 2.5136, "step": 3058500 }, { "epoch": 5.83, "learning_rate": 3.543467346736579e-05, "loss": 2.5279, "step": 3059000 }, { "epoch": 5.83, "learning_rate": 3.54322918254428e-05, "loss": 2.516, "step": 3059500 }, { "epoch": 5.83, "learning_rate": 3.54299101835198e-05, "loss": 2.5391, "step": 3060000 }, { "epoch": 5.83, "eval_accuracy": 0.539417321201582, "eval_loss": 2.427011489868164, "eval_runtime": 4210.7514, "eval_samples_per_second": 65.306, "eval_steps_per_second": 6.531, "step": 3060000 }, { "epoch": 5.83, "learning_rate": 3.542753330488065e-05, "loss": 2.5304, "step": 3060500 }, { "epoch": 5.83, "learning_rate": 3.542515166295766e-05, "loss": 2.5471, "step": 3061000 }, { "epoch": 5.83, "learning_rate": 3.542277002103466e-05, "loss": 2.5232, "step": 3061500 }, { "epoch": 5.83, "learning_rate": 3.542039314239551e-05, "loss": 2.5258, "step": 3062000 }, { "epoch": 5.84, "learning_rate": 3.541801150047252e-05, "loss": 2.5071, "step": 3062500 }, { "epoch": 5.84, "learning_rate": 3.5415629858549525e-05, "loss": 2.5311, "step": 3063000 }, { "epoch": 5.84, "learning_rate": 3.5413248216626535e-05, "loss": 2.5123, "step": 3063500 }, { "epoch": 5.84, "learning_rate": 3.541087610127123e-05, "loss": 2.5083, "step": 3064000 }, { "epoch": 5.84, "learning_rate": 3.540849445934823e-05, "loss": 2.5209, "step": 3064500 }, { "epoch": 5.84, "learning_rate": 3.5406112817425235e-05, "loss": 2.518, "step": 3065000 }, { "epoch": 5.84, "learning_rate": 3.5403731175502244e-05, "loss": 2.5154, "step": 3065500 }, { "epoch": 5.84, "learning_rate": 3.540134953357925e-05, "loss": 2.51, "step": 3066000 }, { "epoch": 5.84, "learning_rate": 3.5398967891656256e-05, "loss": 2.5345, "step": 3066500 }, { "epoch": 5.84, "learning_rate": 3.539658624973326e-05, "loss": 2.5186, "step": 3067000 }, { "epoch": 5.84, "learning_rate": 3.539420460781026e-05, "loss": 2.5383, "step": 3067500 }, { "epoch": 5.85, "learning_rate": 3.5391822965887264e-05, "loss": 2.5228, "step": 3068000 }, { "epoch": 5.85, "learning_rate": 3.5389441323964274e-05, "loss": 2.535, "step": 3068500 }, { "epoch": 5.85, "learning_rate": 3.5387064445325126e-05, "loss": 2.5215, "step": 3069000 }, { "epoch": 5.85, "learning_rate": 3.538468280340213e-05, "loss": 2.5318, "step": 3069500 }, { "epoch": 5.85, "learning_rate": 3.538230116147914e-05, "loss": 2.5096, "step": 3070000 }, { "epoch": 5.85, "learning_rate": 3.5379919519556134e-05, "loss": 2.5212, "step": 3070500 }, { "epoch": 5.85, "learning_rate": 3.537753787763314e-05, "loss": 2.5249, "step": 3071000 }, { "epoch": 5.85, "learning_rate": 3.537515623571015e-05, "loss": 2.5152, "step": 3071500 }, { "epoch": 5.85, "learning_rate": 3.5372774593787155e-05, "loss": 2.5168, "step": 3072000 }, { "epoch": 5.85, "learning_rate": 3.5370392951864165e-05, "loss": 2.5055, "step": 3072500 }, { "epoch": 5.86, "learning_rate": 3.536801607322501e-05, "loss": 2.5255, "step": 3073000 }, { "epoch": 5.86, "learning_rate": 3.536563443130202e-05, "loss": 2.5228, "step": 3073500 }, { "epoch": 5.86, "learning_rate": 3.536325278937902e-05, "loss": 2.5168, "step": 3074000 }, { "epoch": 5.86, "learning_rate": 3.5360871147456025e-05, "loss": 2.5204, "step": 3074500 }, { "epoch": 5.86, "learning_rate": 3.535849426881688e-05, "loss": 2.519, "step": 3075000 }, { "epoch": 5.86, "learning_rate": 3.5356112626893887e-05, "loss": 2.5386, "step": 3075500 }, { "epoch": 5.86, "learning_rate": 3.535373098497089e-05, "loss": 2.5086, "step": 3076000 }, { "epoch": 5.86, "learning_rate": 3.53513493430479e-05, "loss": 2.5302, "step": 3076500 }, { "epoch": 5.86, "learning_rate": 3.5348967701124894e-05, "loss": 2.5308, "step": 3077000 }, { "epoch": 5.86, "learning_rate": 3.5346586059201904e-05, "loss": 2.5132, "step": 3077500 }, { "epoch": 5.86, "learning_rate": 3.5344204417278907e-05, "loss": 2.5317, "step": 3078000 }, { "epoch": 5.87, "learning_rate": 3.5341822775355916e-05, "loss": 2.5172, "step": 3078500 }, { "epoch": 5.87, "learning_rate": 3.533944589671677e-05, "loss": 2.5118, "step": 3079000 }, { "epoch": 5.87, "learning_rate": 3.533706425479377e-05, "loss": 2.5104, "step": 3079500 }, { "epoch": 5.87, "learning_rate": 3.5334682612870773e-05, "loss": 2.5282, "step": 3080000 }, { "epoch": 5.87, "learning_rate": 3.5332300970947776e-05, "loss": 2.529, "step": 3080500 }, { "epoch": 5.87, "learning_rate": 3.5329919329024785e-05, "loss": 2.5145, "step": 3081000 }, { "epoch": 5.87, "learning_rate": 3.5327537687101795e-05, "loss": 2.5272, "step": 3081500 }, { "epoch": 5.87, "learning_rate": 3.53251560451788e-05, "loss": 2.5312, "step": 3082000 }, { "epoch": 5.87, "learning_rate": 3.532277440325581e-05, "loss": 2.519, "step": 3082500 }, { "epoch": 5.87, "learning_rate": 3.532039752461665e-05, "loss": 2.5208, "step": 3083000 }, { "epoch": 5.88, "learning_rate": 3.5318015882693655e-05, "loss": 2.5272, "step": 3083500 }, { "epoch": 5.88, "learning_rate": 3.5315634240770664e-05, "loss": 2.5202, "step": 3084000 }, { "epoch": 5.88, "learning_rate": 3.531325259884767e-05, "loss": 2.5276, "step": 3084500 }, { "epoch": 5.88, "learning_rate": 3.531087572020852e-05, "loss": 2.5162, "step": 3085000 }, { "epoch": 5.88, "learning_rate": 3.530849407828553e-05, "loss": 2.5252, "step": 3085500 }, { "epoch": 5.88, "learning_rate": 3.530611243636253e-05, "loss": 2.5118, "step": 3086000 }, { "epoch": 5.88, "learning_rate": 3.5303730794439534e-05, "loss": 2.5161, "step": 3086500 }, { "epoch": 5.88, "learning_rate": 3.5301353915800386e-05, "loss": 2.5341, "step": 3087000 }, { "epoch": 5.88, "learning_rate": 3.529897227387739e-05, "loss": 2.5183, "step": 3087500 }, { "epoch": 5.88, "learning_rate": 3.52965906319544e-05, "loss": 2.535, "step": 3088000 }, { "epoch": 5.88, "learning_rate": 3.52942089900314e-05, "loss": 2.5114, "step": 3088500 }, { "epoch": 5.89, "learning_rate": 3.529182734810841e-05, "loss": 2.5202, "step": 3089000 }, { "epoch": 5.89, "learning_rate": 3.5289455232753106e-05, "loss": 2.5138, "step": 3089500 }, { "epoch": 5.89, "learning_rate": 3.528707359083011e-05, "loss": 2.5396, "step": 3090000 }, { "epoch": 5.89, "eval_accuracy": 0.5393954060890612, "eval_loss": 2.425180673599243, "eval_runtime": 4202.4833, "eval_samples_per_second": 65.435, "eval_steps_per_second": 6.544, "step": 3090000 }, { "epoch": 5.89, "learning_rate": 3.528469194890711e-05, "loss": 2.5157, "step": 3090500 }, { "epoch": 5.89, "learning_rate": 3.528231030698412e-05, "loss": 2.5197, "step": 3091000 }, { "epoch": 5.89, "learning_rate": 3.527992866506112e-05, "loss": 2.5027, "step": 3091500 }, { "epoch": 5.89, "learning_rate": 3.527754702313813e-05, "loss": 2.5325, "step": 3092000 }, { "epoch": 5.89, "learning_rate": 3.5275170144498985e-05, "loss": 2.5161, "step": 3092500 }, { "epoch": 5.89, "learning_rate": 3.527278850257599e-05, "loss": 2.5352, "step": 3093000 }, { "epoch": 5.89, "learning_rate": 3.527040686065299e-05, "loss": 2.5191, "step": 3093500 }, { "epoch": 5.9, "learning_rate": 3.526802521872999e-05, "loss": 2.5379, "step": 3094000 }, { "epoch": 5.9, "learning_rate": 3.5265643576807e-05, "loss": 2.5348, "step": 3094500 }, { "epoch": 5.9, "learning_rate": 3.5263261934884004e-05, "loss": 2.5289, "step": 3095000 }, { "epoch": 5.9, "learning_rate": 3.5260880292961014e-05, "loss": 2.5086, "step": 3095500 }, { "epoch": 5.9, "learning_rate": 3.5258498651038016e-05, "loss": 2.5193, "step": 3096000 }, { "epoch": 5.9, "learning_rate": 3.525612177239887e-05, "loss": 2.5276, "step": 3096500 }, { "epoch": 5.9, "learning_rate": 3.525374489375972e-05, "loss": 2.5244, "step": 3097000 }, { "epoch": 5.9, "learning_rate": 3.5251363251836724e-05, "loss": 2.5164, "step": 3097500 }, { "epoch": 5.9, "learning_rate": 3.5248981609913726e-05, "loss": 2.5351, "step": 3098000 }, { "epoch": 5.9, "learning_rate": 3.5246599967990736e-05, "loss": 2.5255, "step": 3098500 }, { "epoch": 5.9, "learning_rate": 3.524422308935159e-05, "loss": 2.5081, "step": 3099000 }, { "epoch": 5.91, "learning_rate": 3.524184144742859e-05, "loss": 2.5367, "step": 3099500 }, { "epoch": 5.91, "learning_rate": 3.52394598055056e-05, "loss": 2.5391, "step": 3100000 }, { "epoch": 5.91, "learning_rate": 3.5237078163582596e-05, "loss": 2.523, "step": 3100500 }, { "epoch": 5.91, "learning_rate": 3.5234696521659605e-05, "loss": 2.5411, "step": 3101000 }, { "epoch": 5.91, "learning_rate": 3.523231487973661e-05, "loss": 2.5452, "step": 3101500 }, { "epoch": 5.91, "learning_rate": 3.522993323781362e-05, "loss": 2.5355, "step": 3102000 }, { "epoch": 5.91, "learning_rate": 3.522755635917447e-05, "loss": 2.535, "step": 3102500 }, { "epoch": 5.91, "learning_rate": 3.522517471725147e-05, "loss": 2.5149, "step": 3103000 }, { "epoch": 5.91, "learning_rate": 3.5222793075328475e-05, "loss": 2.5163, "step": 3103500 }, { "epoch": 5.91, "learning_rate": 3.5220411433405484e-05, "loss": 2.5137, "step": 3104000 }, { "epoch": 5.92, "learning_rate": 3.521802979148249e-05, "loss": 2.5223, "step": 3104500 }, { "epoch": 5.92, "learning_rate": 3.5215648149559496e-05, "loss": 2.5437, "step": 3105000 }, { "epoch": 5.92, "learning_rate": 3.521327127092034e-05, "loss": 2.5404, "step": 3105500 }, { "epoch": 5.92, "learning_rate": 3.521088962899735e-05, "loss": 2.5206, "step": 3106000 }, { "epoch": 5.92, "learning_rate": 3.520850798707436e-05, "loss": 2.553, "step": 3106500 }, { "epoch": 5.92, "learning_rate": 3.5206126345151356e-05, "loss": 2.5111, "step": 3107000 }, { "epoch": 5.92, "learning_rate": 3.5203744703228366e-05, "loss": 2.522, "step": 3107500 }, { "epoch": 5.92, "learning_rate": 3.520136306130537e-05, "loss": 2.5098, "step": 3108000 }, { "epoch": 5.92, "learning_rate": 3.519898141938238e-05, "loss": 2.5305, "step": 3108500 }, { "epoch": 5.92, "learning_rate": 3.519660454074323e-05, "loss": 2.5162, "step": 3109000 }, { "epoch": 5.92, "learning_rate": 3.519422289882023e-05, "loss": 2.548, "step": 3109500 }, { "epoch": 5.93, "learning_rate": 3.5191841256897235e-05, "loss": 2.5275, "step": 3110000 }, { "epoch": 5.93, "learning_rate": 3.518945961497424e-05, "loss": 2.5364, "step": 3110500 }, { "epoch": 5.93, "learning_rate": 3.518707797305125e-05, "loss": 2.5587, "step": 3111000 }, { "epoch": 5.93, "learning_rate": 3.518469633112825e-05, "loss": 2.5023, "step": 3111500 }, { "epoch": 5.93, "learning_rate": 3.518231468920526e-05, "loss": 2.5094, "step": 3112000 }, { "epoch": 5.93, "learning_rate": 3.517993304728226e-05, "loss": 2.5082, "step": 3112500 }, { "epoch": 5.93, "learning_rate": 3.5177556168643114e-05, "loss": 2.5131, "step": 3113000 }, { "epoch": 5.93, "learning_rate": 3.517517452672012e-05, "loss": 2.5374, "step": 3113500 }, { "epoch": 5.93, "learning_rate": 3.517279764808097e-05, "loss": 2.5126, "step": 3114000 }, { "epoch": 5.93, "learning_rate": 3.517041600615797e-05, "loss": 2.5246, "step": 3114500 }, { "epoch": 5.94, "learning_rate": 3.516803436423498e-05, "loss": 2.528, "step": 3115000 }, { "epoch": 5.94, "learning_rate": 3.5165652722311984e-05, "loss": 2.5301, "step": 3115500 }, { "epoch": 5.94, "learning_rate": 3.516327108038899e-05, "loss": 2.5229, "step": 3116000 }, { "epoch": 5.94, "learning_rate": 3.5160889438465996e-05, "loss": 2.5289, "step": 3116500 }, { "epoch": 5.94, "learning_rate": 3.5158507796543e-05, "loss": 2.519, "step": 3117000 }, { "epoch": 5.94, "learning_rate": 3.515612615462001e-05, "loss": 2.5198, "step": 3117500 }, { "epoch": 5.94, "learning_rate": 3.51537540392647e-05, "loss": 2.5265, "step": 3118000 }, { "epoch": 5.94, "learning_rate": 3.5151372397341706e-05, "loss": 2.5535, "step": 3118500 }, { "epoch": 5.94, "learning_rate": 3.5148990755418715e-05, "loss": 2.5252, "step": 3119000 }, { "epoch": 5.94, "learning_rate": 3.514660911349572e-05, "loss": 2.519, "step": 3119500 }, { "epoch": 5.94, "learning_rate": 3.514422747157272e-05, "loss": 2.5341, "step": 3120000 }, { "epoch": 5.94, "eval_accuracy": 0.5390873752301372, "eval_loss": 2.426441192626953, "eval_runtime": 4198.7429, "eval_samples_per_second": 65.493, "eval_steps_per_second": 6.549, "step": 3120000 }, { "epoch": 5.95, "learning_rate": 3.514184582964973e-05, "loss": 2.5386, "step": 3120500 }, { "epoch": 5.95, "learning_rate": 3.513946418772673e-05, "loss": 2.536, "step": 3121000 }, { "epoch": 5.95, "learning_rate": 3.513708254580374e-05, "loss": 2.5259, "step": 3121500 }, { "epoch": 5.95, "learning_rate": 3.5134705667164594e-05, "loss": 2.5246, "step": 3122000 }, { "epoch": 5.95, "learning_rate": 3.51323240252416e-05, "loss": 2.5314, "step": 3122500 }, { "epoch": 5.95, "learning_rate": 3.51299423833186e-05, "loss": 2.5286, "step": 3123000 }, { "epoch": 5.95, "learning_rate": 3.51275607413956e-05, "loss": 2.5427, "step": 3123500 }, { "epoch": 5.95, "learning_rate": 3.512517909947261e-05, "loss": 2.5318, "step": 3124000 }, { "epoch": 5.95, "learning_rate": 3.5122797457549614e-05, "loss": 2.5269, "step": 3124500 }, { "epoch": 5.95, "learning_rate": 3.5120415815626623e-05, "loss": 2.5406, "step": 3125000 }, { "epoch": 5.96, "learning_rate": 3.5118034173703626e-05, "loss": 2.5426, "step": 3125500 }, { "epoch": 5.96, "learning_rate": 3.511565729506448e-05, "loss": 2.5224, "step": 3126000 }, { "epoch": 5.96, "learning_rate": 3.5113280416425324e-05, "loss": 2.5138, "step": 3126500 }, { "epoch": 5.96, "learning_rate": 3.511089877450233e-05, "loss": 2.5255, "step": 3127000 }, { "epoch": 5.96, "learning_rate": 3.5108521895863186e-05, "loss": 2.522, "step": 3127500 }, { "epoch": 5.96, "learning_rate": 3.510614025394019e-05, "loss": 2.509, "step": 3128000 }, { "epoch": 5.96, "learning_rate": 3.51037586120172e-05, "loss": 2.5301, "step": 3128500 }, { "epoch": 5.96, "learning_rate": 3.51013769700942e-05, "loss": 2.5284, "step": 3129000 }, { "epoch": 5.96, "learning_rate": 3.509899532817121e-05, "loss": 2.5188, "step": 3129500 }, { "epoch": 5.96, "learning_rate": 3.5096613686248205e-05, "loss": 2.5286, "step": 3130000 }, { "epoch": 5.96, "learning_rate": 3.5094232044325215e-05, "loss": 2.5356, "step": 3130500 }, { "epoch": 5.97, "learning_rate": 3.5091850402402224e-05, "loss": 2.509, "step": 3131000 }, { "epoch": 5.97, "learning_rate": 3.508946876047923e-05, "loss": 2.5324, "step": 3131500 }, { "epoch": 5.97, "learning_rate": 3.508709188184008e-05, "loss": 2.5206, "step": 3132000 }, { "epoch": 5.97, "learning_rate": 3.508471023991708e-05, "loss": 2.5236, "step": 3132500 }, { "epoch": 5.97, "learning_rate": 3.5082328597994084e-05, "loss": 2.5362, "step": 3133000 }, { "epoch": 5.97, "learning_rate": 3.5079946956071094e-05, "loss": 2.5239, "step": 3133500 }, { "epoch": 5.97, "learning_rate": 3.5077565314148096e-05, "loss": 2.5359, "step": 3134000 }, { "epoch": 5.97, "learning_rate": 3.5075183672225106e-05, "loss": 2.5195, "step": 3134500 }, { "epoch": 5.97, "learning_rate": 3.507280203030211e-05, "loss": 2.5195, "step": 3135000 }, { "epoch": 5.97, "learning_rate": 3.507042038837912e-05, "loss": 2.5312, "step": 3135500 }, { "epoch": 5.98, "learning_rate": 3.5068043509739963e-05, "loss": 2.5145, "step": 3136000 }, { "epoch": 5.98, "learning_rate": 3.5065661867816966e-05, "loss": 2.5287, "step": 3136500 }, { "epoch": 5.98, "learning_rate": 3.5063280225893975e-05, "loss": 2.5215, "step": 3137000 }, { "epoch": 5.98, "learning_rate": 3.506090334725483e-05, "loss": 2.5339, "step": 3137500 }, { "epoch": 5.98, "learning_rate": 3.505852170533183e-05, "loss": 2.5211, "step": 3138000 }, { "epoch": 5.98, "learning_rate": 3.505614006340884e-05, "loss": 2.5281, "step": 3138500 }, { "epoch": 5.98, "learning_rate": 3.505375842148584e-05, "loss": 2.5327, "step": 3139000 }, { "epoch": 5.98, "learning_rate": 3.5051376779562845e-05, "loss": 2.5264, "step": 3139500 }, { "epoch": 5.98, "learning_rate": 3.504899513763985e-05, "loss": 2.5091, "step": 3140000 }, { "epoch": 5.98, "learning_rate": 3.504661349571686e-05, "loss": 2.524, "step": 3140500 }, { "epoch": 5.98, "learning_rate": 3.504423185379386e-05, "loss": 2.5263, "step": 3141000 }, { "epoch": 5.99, "learning_rate": 3.504185497515471e-05, "loss": 2.5252, "step": 3141500 }, { "epoch": 5.99, "learning_rate": 3.503947333323172e-05, "loss": 2.5357, "step": 3142000 }, { "epoch": 5.99, "learning_rate": 3.5037096454592574e-05, "loss": 2.5279, "step": 3142500 }, { "epoch": 5.99, "learning_rate": 3.503471481266957e-05, "loss": 2.5209, "step": 3143000 }, { "epoch": 5.99, "learning_rate": 3.503233317074658e-05, "loss": 2.5147, "step": 3143500 }, { "epoch": 5.99, "learning_rate": 3.502995152882358e-05, "loss": 2.536, "step": 3144000 }, { "epoch": 5.99, "learning_rate": 3.502756988690059e-05, "loss": 2.5325, "step": 3144500 }, { "epoch": 5.99, "learning_rate": 3.50251882449776e-05, "loss": 2.5316, "step": 3145000 }, { "epoch": 5.99, "learning_rate": 3.5022811366338446e-05, "loss": 2.5212, "step": 3145500 }, { "epoch": 5.99, "learning_rate": 3.502042972441545e-05, "loss": 2.5182, "step": 3146000 }, { "epoch": 6.0, "learning_rate": 3.501804808249246e-05, "loss": 2.5246, "step": 3146500 }, { "epoch": 6.0, "learning_rate": 3.501566644056946e-05, "loss": 2.5257, "step": 3147000 }, { "epoch": 6.0, "learning_rate": 3.501328479864647e-05, "loss": 2.49, "step": 3147500 }, { "epoch": 6.0, "learning_rate": 3.5010907920007315e-05, "loss": 2.525, "step": 3148000 }, { "epoch": 6.0, "learning_rate": 3.5008526278084325e-05, "loss": 2.5165, "step": 3148500 }, { "epoch": 6.0, "learning_rate": 3.5006144636161334e-05, "loss": 2.5163, "step": 3149000 }, { "epoch": 6.0, "learning_rate": 3.500376299423833e-05, "loss": 2.52, "step": 3149500 }, { "epoch": 6.0, "learning_rate": 3.500138135231534e-05, "loss": 2.5255, "step": 3150000 }, { "epoch": 6.0, "eval_accuracy": 0.5396257404979662, "eval_loss": 2.424121379852295, "eval_runtime": 4215.4768, "eval_samples_per_second": 65.233, "eval_steps_per_second": 6.523, "step": 3150000 }, { "epoch": 6.0, "learning_rate": 3.499900447367619e-05, "loss": 2.4907, "step": 3150500 }, { "epoch": 6.0, "learning_rate": 3.4996622831753194e-05, "loss": 2.5023, "step": 3151000 }, { "epoch": 6.0, "learning_rate": 3.4994241189830204e-05, "loss": 2.4943, "step": 3151500 }, { "epoch": 6.01, "learning_rate": 3.4991859547907206e-05, "loss": 2.5179, "step": 3152000 }, { "epoch": 6.01, "learning_rate": 3.498947790598421e-05, "loss": 2.524, "step": 3152500 }, { "epoch": 6.01, "learning_rate": 3.498710102734506e-05, "loss": 2.5427, "step": 3153000 }, { "epoch": 6.01, "learning_rate": 3.4984719385422064e-05, "loss": 2.5305, "step": 3153500 }, { "epoch": 6.01, "learning_rate": 3.498233774349907e-05, "loss": 2.5221, "step": 3154000 }, { "epoch": 6.01, "learning_rate": 3.4979956101576076e-05, "loss": 2.5238, "step": 3154500 }, { "epoch": 6.01, "learning_rate": 3.497757922293693e-05, "loss": 2.5054, "step": 3155000 }, { "epoch": 6.01, "learning_rate": 3.497519758101394e-05, "loss": 2.5342, "step": 3155500 }, { "epoch": 6.01, "learning_rate": 3.4972815939090934e-05, "loss": 2.5277, "step": 3156000 }, { "epoch": 6.01, "learning_rate": 3.497043429716794e-05, "loss": 2.528, "step": 3156500 }, { "epoch": 6.02, "learning_rate": 3.4968052655244946e-05, "loss": 2.5067, "step": 3157000 }, { "epoch": 6.02, "learning_rate": 3.4965671013321955e-05, "loss": 2.5048, "step": 3157500 }, { "epoch": 6.02, "learning_rate": 3.496328937139896e-05, "loss": 2.5048, "step": 3158000 }, { "epoch": 6.02, "learning_rate": 3.496090772947597e-05, "loss": 2.5268, "step": 3158500 }, { "epoch": 6.02, "learning_rate": 3.495853085083682e-05, "loss": 2.5152, "step": 3159000 }, { "epoch": 6.02, "learning_rate": 3.495614920891382e-05, "loss": 2.5127, "step": 3159500 }, { "epoch": 6.02, "learning_rate": 3.4953767566990825e-05, "loss": 2.5142, "step": 3160000 }, { "epoch": 6.02, "learning_rate": 3.4951385925067834e-05, "loss": 2.5355, "step": 3160500 }, { "epoch": 6.02, "learning_rate": 3.494900904642868e-05, "loss": 2.5362, "step": 3161000 }, { "epoch": 6.02, "learning_rate": 3.494662740450569e-05, "loss": 2.5025, "step": 3161500 }, { "epoch": 6.02, "learning_rate": 3.494424576258269e-05, "loss": 2.4996, "step": 3162000 }, { "epoch": 6.03, "learning_rate": 3.4941864120659694e-05, "loss": 2.5276, "step": 3162500 }, { "epoch": 6.03, "learning_rate": 3.4939487242020546e-05, "loss": 2.5089, "step": 3163000 }, { "epoch": 6.03, "learning_rate": 3.4937105600097556e-05, "loss": 2.5126, "step": 3163500 }, { "epoch": 6.03, "learning_rate": 3.493472395817456e-05, "loss": 2.5057, "step": 3164000 }, { "epoch": 6.03, "learning_rate": 3.493234231625157e-05, "loss": 2.5064, "step": 3164500 }, { "epoch": 6.03, "learning_rate": 3.492996067432857e-05, "loss": 2.5091, "step": 3165000 }, { "epoch": 6.03, "learning_rate": 3.492757903240557e-05, "loss": 2.5172, "step": 3165500 }, { "epoch": 6.03, "learning_rate": 3.4925197390482576e-05, "loss": 2.5245, "step": 3166000 }, { "epoch": 6.03, "learning_rate": 3.4922815748559585e-05, "loss": 2.5127, "step": 3166500 }, { "epoch": 6.03, "learning_rate": 3.492043886992044e-05, "loss": 2.5186, "step": 3167000 }, { "epoch": 6.04, "learning_rate": 3.491805722799744e-05, "loss": 2.53, "step": 3167500 }, { "epoch": 6.04, "learning_rate": 3.491567558607445e-05, "loss": 2.489, "step": 3168000 }, { "epoch": 6.04, "learning_rate": 3.491329394415145e-05, "loss": 2.5188, "step": 3168500 }, { "epoch": 6.04, "learning_rate": 3.4910912302228455e-05, "loss": 2.5293, "step": 3169000 }, { "epoch": 6.04, "learning_rate": 3.490853542358931e-05, "loss": 2.5223, "step": 3169500 }, { "epoch": 6.04, "learning_rate": 3.490615378166631e-05, "loss": 2.514, "step": 3170000 }, { "epoch": 6.04, "learning_rate": 3.490377213974332e-05, "loss": 2.5301, "step": 3170500 }, { "epoch": 6.04, "learning_rate": 3.490139049782032e-05, "loss": 2.5148, "step": 3171000 }, { "epoch": 6.04, "learning_rate": 3.4899013619181174e-05, "loss": 2.5258, "step": 3171500 }, { "epoch": 6.04, "learning_rate": 3.489663197725818e-05, "loss": 2.4939, "step": 3172000 }, { "epoch": 6.04, "learning_rate": 3.489425033533518e-05, "loss": 2.5015, "step": 3172500 }, { "epoch": 6.05, "learning_rate": 3.489186869341219e-05, "loss": 2.4942, "step": 3173000 }, { "epoch": 6.05, "learning_rate": 3.48894870514892e-05, "loss": 2.5236, "step": 3173500 }, { "epoch": 6.05, "learning_rate": 3.4887110172850043e-05, "loss": 2.4893, "step": 3174000 }, { "epoch": 6.05, "learning_rate": 3.488472853092705e-05, "loss": 2.5251, "step": 3174500 }, { "epoch": 6.05, "learning_rate": 3.4882346889004055e-05, "loss": 2.5063, "step": 3175000 }, { "epoch": 6.05, "learning_rate": 3.487997001036491e-05, "loss": 2.4995, "step": 3175500 }, { "epoch": 6.05, "learning_rate": 3.487758836844191e-05, "loss": 2.5121, "step": 3176000 }, { "epoch": 6.05, "learning_rate": 3.487520672651891e-05, "loss": 2.493, "step": 3176500 }, { "epoch": 6.05, "learning_rate": 3.487282508459592e-05, "loss": 2.5196, "step": 3177000 }, { "epoch": 6.05, "learning_rate": 3.487044344267293e-05, "loss": 2.4992, "step": 3177500 }, { "epoch": 6.06, "learning_rate": 3.4868061800749934e-05, "loss": 2.5107, "step": 3178000 }, { "epoch": 6.06, "learning_rate": 3.486568492211079e-05, "loss": 2.5044, "step": 3178500 }, { "epoch": 6.06, "learning_rate": 3.486330328018779e-05, "loss": 2.5185, "step": 3179000 }, { "epoch": 6.06, "learning_rate": 3.486092163826479e-05, "loss": 2.5203, "step": 3179500 }, { "epoch": 6.06, "learning_rate": 3.48585399963418e-05, "loss": 2.5242, "step": 3180000 }, { "epoch": 6.06, "eval_accuracy": 0.5398359478400273, "eval_loss": 2.4234790802001953, "eval_runtime": 4206.2412, "eval_samples_per_second": 65.376, "eval_steps_per_second": 6.538, "step": 3180000 }, { "epoch": 6.06, "learning_rate": 3.4856158354418804e-05, "loss": 2.498, "step": 3180500 }, { "epoch": 6.06, "learning_rate": 3.4853776712495813e-05, "loss": 2.5267, "step": 3181000 }, { "epoch": 6.06, "learning_rate": 3.4851395070572816e-05, "loss": 2.5057, "step": 3181500 }, { "epoch": 6.06, "learning_rate": 3.484901342864982e-05, "loss": 2.5008, "step": 3182000 }, { "epoch": 6.06, "learning_rate": 3.484663178672682e-05, "loss": 2.512, "step": 3182500 }, { "epoch": 6.06, "learning_rate": 3.4844254908087674e-05, "loss": 2.5142, "step": 3183000 }, { "epoch": 6.07, "learning_rate": 3.4841878029448526e-05, "loss": 2.5237, "step": 3183500 }, { "epoch": 6.07, "learning_rate": 3.4839496387525535e-05, "loss": 2.5124, "step": 3184000 }, { "epoch": 6.07, "learning_rate": 3.483711474560254e-05, "loss": 2.5317, "step": 3184500 }, { "epoch": 6.07, "learning_rate": 3.483473310367955e-05, "loss": 2.5144, "step": 3185000 }, { "epoch": 6.07, "learning_rate": 3.483235146175654e-05, "loss": 2.5134, "step": 3185500 }, { "epoch": 6.07, "learning_rate": 3.482996981983355e-05, "loss": 2.5026, "step": 3186000 }, { "epoch": 6.07, "learning_rate": 3.4827588177910555e-05, "loss": 2.5161, "step": 3186500 }, { "epoch": 6.07, "learning_rate": 3.4825206535987565e-05, "loss": 2.5215, "step": 3187000 }, { "epoch": 6.07, "learning_rate": 3.482282965734842e-05, "loss": 2.5315, "step": 3187500 }, { "epoch": 6.07, "learning_rate": 3.482044801542542e-05, "loss": 2.5149, "step": 3188000 }, { "epoch": 6.08, "learning_rate": 3.481806637350242e-05, "loss": 2.5193, "step": 3188500 }, { "epoch": 6.08, "learning_rate": 3.4815689494863274e-05, "loss": 2.5163, "step": 3189000 }, { "epoch": 6.08, "learning_rate": 3.481330785294028e-05, "loss": 2.513, "step": 3189500 }, { "epoch": 6.08, "learning_rate": 3.4810926211017286e-05, "loss": 2.5279, "step": 3190000 }, { "epoch": 6.08, "learning_rate": 3.480854456909429e-05, "loss": 2.5182, "step": 3190500 }, { "epoch": 6.08, "learning_rate": 3.48061629271713e-05, "loss": 2.5341, "step": 3191000 }, { "epoch": 6.08, "learning_rate": 3.480378604853215e-05, "loss": 2.5272, "step": 3191500 }, { "epoch": 6.08, "learning_rate": 3.480140440660915e-05, "loss": 2.5013, "step": 3192000 }, { "epoch": 6.08, "learning_rate": 3.4799022764686156e-05, "loss": 2.5153, "step": 3192500 }, { "epoch": 6.08, "learning_rate": 3.4796641122763165e-05, "loss": 2.5322, "step": 3193000 }, { "epoch": 6.08, "learning_rate": 3.479425948084017e-05, "loss": 2.4886, "step": 3193500 }, { "epoch": 6.09, "learning_rate": 3.479187783891718e-05, "loss": 2.5149, "step": 3194000 }, { "epoch": 6.09, "learning_rate": 3.478949619699418e-05, "loss": 2.532, "step": 3194500 }, { "epoch": 6.09, "learning_rate": 3.478711455507118e-05, "loss": 2.5208, "step": 3195000 }, { "epoch": 6.09, "learning_rate": 3.4784737676432035e-05, "loss": 2.5387, "step": 3195500 }, { "epoch": 6.09, "learning_rate": 3.478236079779289e-05, "loss": 2.5071, "step": 3196000 }, { "epoch": 6.09, "learning_rate": 3.477997915586989e-05, "loss": 2.5204, "step": 3196500 }, { "epoch": 6.09, "learning_rate": 3.47775975139469e-05, "loss": 2.5067, "step": 3197000 }, { "epoch": 6.09, "learning_rate": 3.47752158720239e-05, "loss": 2.5433, "step": 3197500 }, { "epoch": 6.09, "learning_rate": 3.477283423010091e-05, "loss": 2.5225, "step": 3198000 }, { "epoch": 6.09, "learning_rate": 3.477045258817791e-05, "loss": 2.5254, "step": 3198500 }, { "epoch": 6.1, "learning_rate": 3.4768070946254917e-05, "loss": 2.5061, "step": 3199000 }, { "epoch": 6.1, "learning_rate": 3.476568930433192e-05, "loss": 2.52, "step": 3199500 }, { "epoch": 6.1, "learning_rate": 3.476331242569277e-05, "loss": 2.4904, "step": 3200000 }, { "epoch": 6.1, "learning_rate": 3.476093078376978e-05, "loss": 2.5161, "step": 3200500 }, { "epoch": 6.1, "learning_rate": 3.4758549141846784e-05, "loss": 2.4938, "step": 3201000 }, { "epoch": 6.1, "learning_rate": 3.475616749992379e-05, "loss": 2.5052, "step": 3201500 }, { "epoch": 6.1, "learning_rate": 3.475379062128464e-05, "loss": 2.5167, "step": 3202000 }, { "epoch": 6.1, "learning_rate": 3.475140897936164e-05, "loss": 2.5205, "step": 3202500 }, { "epoch": 6.1, "learning_rate": 3.474902733743865e-05, "loss": 2.5209, "step": 3203000 }, { "epoch": 6.1, "learning_rate": 3.47466504587995e-05, "loss": 2.5179, "step": 3203500 }, { "epoch": 6.1, "learning_rate": 3.4744268816876505e-05, "loss": 2.5157, "step": 3204000 }, { "epoch": 6.11, "learning_rate": 3.4741887174953515e-05, "loss": 2.5167, "step": 3204500 }, { "epoch": 6.11, "learning_rate": 3.473950553303052e-05, "loss": 2.5183, "step": 3205000 }, { "epoch": 6.11, "learning_rate": 3.473712389110752e-05, "loss": 2.5168, "step": 3205500 }, { "epoch": 6.11, "learning_rate": 3.473474224918453e-05, "loss": 2.5206, "step": 3206000 }, { "epoch": 6.11, "learning_rate": 3.473236060726153e-05, "loss": 2.491, "step": 3206500 }, { "epoch": 6.11, "learning_rate": 3.4729983728622384e-05, "loss": 2.5138, "step": 3207000 }, { "epoch": 6.11, "learning_rate": 3.472760208669939e-05, "loss": 2.5054, "step": 3207500 }, { "epoch": 6.11, "learning_rate": 3.4725220444776396e-05, "loss": 2.5276, "step": 3208000 }, { "epoch": 6.11, "learning_rate": 3.47228388028534e-05, "loss": 2.5131, "step": 3208500 }, { "epoch": 6.11, "learning_rate": 3.47204571609304e-05, "loss": 2.5306, "step": 3209000 }, { "epoch": 6.12, "learning_rate": 3.471807551900741e-05, "loss": 2.51, "step": 3209500 }, { "epoch": 6.12, "learning_rate": 3.4715693877084414e-05, "loss": 2.5338, "step": 3210000 }, { "epoch": 6.12, "eval_accuracy": 0.5399058167024112, "eval_loss": 2.4237232208251953, "eval_runtime": 4203.0472, "eval_samples_per_second": 65.426, "eval_steps_per_second": 6.543, "step": 3210000 }, { "epoch": 6.12, "learning_rate": 3.471331223516142e-05, "loss": 2.5221, "step": 3210500 }, { "epoch": 6.12, "learning_rate": 3.4710935356522275e-05, "loss": 2.5008, "step": 3211000 }, { "epoch": 6.12, "learning_rate": 3.470855371459927e-05, "loss": 2.5101, "step": 3211500 }, { "epoch": 6.12, "learning_rate": 3.470617207267628e-05, "loss": 2.5093, "step": 3212000 }, { "epoch": 6.12, "learning_rate": 3.470379043075328e-05, "loss": 2.529, "step": 3212500 }, { "epoch": 6.12, "learning_rate": 3.4701413552114136e-05, "loss": 2.5208, "step": 3213000 }, { "epoch": 6.12, "learning_rate": 3.4699031910191145e-05, "loss": 2.5133, "step": 3213500 }, { "epoch": 6.12, "learning_rate": 3.469665026826815e-05, "loss": 2.5074, "step": 3214000 }, { "epoch": 6.12, "learning_rate": 3.469426862634516e-05, "loss": 2.518, "step": 3214500 }, { "epoch": 6.13, "learning_rate": 3.469188698442215e-05, "loss": 2.5252, "step": 3215000 }, { "epoch": 6.13, "learning_rate": 3.468950534249916e-05, "loss": 2.5191, "step": 3215500 }, { "epoch": 6.13, "learning_rate": 3.468712370057617e-05, "loss": 2.524, "step": 3216000 }, { "epoch": 6.13, "learning_rate": 3.4684742058653174e-05, "loss": 2.5099, "step": 3216500 }, { "epoch": 6.13, "learning_rate": 3.4682365180014027e-05, "loss": 2.5161, "step": 3217000 }, { "epoch": 6.13, "learning_rate": 3.467998353809103e-05, "loss": 2.5098, "step": 3217500 }, { "epoch": 6.13, "learning_rate": 3.467760189616803e-05, "loss": 2.5091, "step": 3218000 }, { "epoch": 6.13, "learning_rate": 3.4675225017528884e-05, "loss": 2.5203, "step": 3218500 }, { "epoch": 6.13, "learning_rate": 3.4672843375605893e-05, "loss": 2.5168, "step": 3219000 }, { "epoch": 6.13, "learning_rate": 3.4670461733682896e-05, "loss": 2.5283, "step": 3219500 }, { "epoch": 6.14, "learning_rate": 3.4668080091759905e-05, "loss": 2.5157, "step": 3220000 }, { "epoch": 6.14, "learning_rate": 3.466569844983691e-05, "loss": 2.5107, "step": 3220500 }, { "epoch": 6.14, "learning_rate": 3.466331680791391e-05, "loss": 2.5001, "step": 3221000 }, { "epoch": 6.14, "learning_rate": 3.466093516599091e-05, "loss": 2.524, "step": 3221500 }, { "epoch": 6.14, "learning_rate": 3.465855352406792e-05, "loss": 2.5303, "step": 3222000 }, { "epoch": 6.14, "learning_rate": 3.4656176645428775e-05, "loss": 2.5418, "step": 3222500 }, { "epoch": 6.14, "learning_rate": 3.465379976678963e-05, "loss": 2.5045, "step": 3223000 }, { "epoch": 6.14, "learning_rate": 3.465142288815047e-05, "loss": 2.5205, "step": 3223500 }, { "epoch": 6.14, "learning_rate": 3.464904124622748e-05, "loss": 2.5308, "step": 3224000 }, { "epoch": 6.14, "learning_rate": 3.4646659604304485e-05, "loss": 2.5271, "step": 3224500 }, { "epoch": 6.14, "learning_rate": 3.4644277962381494e-05, "loss": 2.5242, "step": 3225000 }, { "epoch": 6.15, "learning_rate": 3.46418963204585e-05, "loss": 2.5312, "step": 3225500 }, { "epoch": 6.15, "learning_rate": 3.46395146785355e-05, "loss": 2.5089, "step": 3226000 }, { "epoch": 6.15, "learning_rate": 3.463713303661251e-05, "loss": 2.5064, "step": 3226500 }, { "epoch": 6.15, "learning_rate": 3.463475139468951e-05, "loss": 2.5246, "step": 3227000 }, { "epoch": 6.15, "learning_rate": 3.4632374516050364e-05, "loss": 2.533, "step": 3227500 }, { "epoch": 6.15, "learning_rate": 3.462999287412737e-05, "loss": 2.5118, "step": 3228000 }, { "epoch": 6.15, "learning_rate": 3.462761123220437e-05, "loss": 2.4979, "step": 3228500 }, { "epoch": 6.15, "learning_rate": 3.462522959028138e-05, "loss": 2.5137, "step": 3229000 }, { "epoch": 6.15, "learning_rate": 3.462284794835838e-05, "loss": 2.5276, "step": 3229500 }, { "epoch": 6.15, "learning_rate": 3.462046630643539e-05, "loss": 2.5272, "step": 3230000 }, { "epoch": 6.16, "learning_rate": 3.461808466451239e-05, "loss": 2.5048, "step": 3230500 }, { "epoch": 6.16, "learning_rate": 3.4615703022589396e-05, "loss": 2.5241, "step": 3231000 }, { "epoch": 6.16, "learning_rate": 3.461332614395025e-05, "loss": 2.5257, "step": 3231500 }, { "epoch": 6.16, "learning_rate": 3.461094450202725e-05, "loss": 2.5227, "step": 3232000 }, { "epoch": 6.16, "learning_rate": 3.460856286010426e-05, "loss": 2.5254, "step": 3232500 }, { "epoch": 6.16, "learning_rate": 3.460618121818127e-05, "loss": 2.5366, "step": 3233000 }, { "epoch": 6.16, "learning_rate": 3.4603804339542115e-05, "loss": 2.5268, "step": 3233500 }, { "epoch": 6.16, "learning_rate": 3.4601422697619124e-05, "loss": 2.5233, "step": 3234000 }, { "epoch": 6.16, "learning_rate": 3.459904105569613e-05, "loss": 2.5139, "step": 3234500 }, { "epoch": 6.16, "learning_rate": 3.459665941377313e-05, "loss": 2.5209, "step": 3235000 }, { "epoch": 6.16, "learning_rate": 3.459427777185014e-05, "loss": 2.5179, "step": 3235500 }, { "epoch": 6.17, "learning_rate": 3.4591900893210985e-05, "loss": 2.5289, "step": 3236000 }, { "epoch": 6.17, "learning_rate": 3.4589519251287994e-05, "loss": 2.4991, "step": 3236500 }, { "epoch": 6.17, "learning_rate": 3.4587137609365003e-05, "loss": 2.5186, "step": 3237000 }, { "epoch": 6.17, "learning_rate": 3.458476073072585e-05, "loss": 2.5219, "step": 3237500 }, { "epoch": 6.17, "learning_rate": 3.458237908880286e-05, "loss": 2.5185, "step": 3238000 }, { "epoch": 6.17, "learning_rate": 3.457999744687986e-05, "loss": 2.5071, "step": 3238500 }, { "epoch": 6.17, "learning_rate": 3.4577615804956864e-05, "loss": 2.5269, "step": 3239000 }, { "epoch": 6.17, "learning_rate": 3.457523416303387e-05, "loss": 2.5206, "step": 3239500 }, { "epoch": 6.17, "learning_rate": 3.4572852521110876e-05, "loss": 2.518, "step": 3240000 }, { "epoch": 6.17, "eval_accuracy": 0.5399985675727913, "eval_loss": 2.4218645095825195, "eval_runtime": 4210.0945, "eval_samples_per_second": 65.316, "eval_steps_per_second": 6.532, "step": 3240000 }, { "epoch": 6.17, "learning_rate": 3.4570470879187885e-05, "loss": 2.4988, "step": 3240500 }, { "epoch": 6.18, "learning_rate": 3.456808923726488e-05, "loss": 2.5301, "step": 3241000 }, { "epoch": 6.18, "learning_rate": 3.456571235862573e-05, "loss": 2.5391, "step": 3241500 }, { "epoch": 6.18, "learning_rate": 3.456333071670274e-05, "loss": 2.5031, "step": 3242000 }, { "epoch": 6.18, "learning_rate": 3.4560953838063595e-05, "loss": 2.5237, "step": 3242500 }, { "epoch": 6.18, "learning_rate": 3.45585721961406e-05, "loss": 2.5353, "step": 3243000 }, { "epoch": 6.18, "learning_rate": 3.455619055421761e-05, "loss": 2.5212, "step": 3243500 }, { "epoch": 6.18, "learning_rate": 3.455380891229461e-05, "loss": 2.524, "step": 3244000 }, { "epoch": 6.18, "learning_rate": 3.455143203365546e-05, "loss": 2.5337, "step": 3244500 }, { "epoch": 6.18, "learning_rate": 3.4549050391732464e-05, "loss": 2.5002, "step": 3245000 }, { "epoch": 6.18, "learning_rate": 3.454666874980947e-05, "loss": 2.5305, "step": 3245500 }, { "epoch": 6.18, "learning_rate": 3.4544287107886476e-05, "loss": 2.5248, "step": 3246000 }, { "epoch": 6.19, "learning_rate": 3.454190546596348e-05, "loss": 2.5324, "step": 3246500 }, { "epoch": 6.19, "learning_rate": 3.453952382404049e-05, "loss": 2.5111, "step": 3247000 }, { "epoch": 6.19, "learning_rate": 3.453714218211749e-05, "loss": 2.5138, "step": 3247500 }, { "epoch": 6.19, "learning_rate": 3.4534760540194494e-05, "loss": 2.5134, "step": 3248000 }, { "epoch": 6.19, "learning_rate": 3.45323788982715e-05, "loss": 2.5224, "step": 3248500 }, { "epoch": 6.19, "learning_rate": 3.453000201963235e-05, "loss": 2.5112, "step": 3249000 }, { "epoch": 6.19, "learning_rate": 3.452762037770936e-05, "loss": 2.5132, "step": 3249500 }, { "epoch": 6.19, "learning_rate": 3.452523873578636e-05, "loss": 2.5156, "step": 3250000 }, { "epoch": 6.19, "learning_rate": 3.452285709386337e-05, "loss": 2.5185, "step": 3250500 }, { "epoch": 6.19, "learning_rate": 3.452048021522422e-05, "loss": 2.4988, "step": 3251000 }, { "epoch": 6.2, "learning_rate": 3.4518103336585075e-05, "loss": 2.5022, "step": 3251500 }, { "epoch": 6.2, "learning_rate": 3.451572169466207e-05, "loss": 2.5493, "step": 3252000 }, { "epoch": 6.2, "learning_rate": 3.451334005273908e-05, "loss": 2.4872, "step": 3252500 }, { "epoch": 6.2, "learning_rate": 3.451095841081608e-05, "loss": 2.5139, "step": 3253000 }, { "epoch": 6.2, "learning_rate": 3.450857676889309e-05, "loss": 2.4983, "step": 3253500 }, { "epoch": 6.2, "learning_rate": 3.4506195126970095e-05, "loss": 2.5114, "step": 3254000 }, { "epoch": 6.2, "learning_rate": 3.45038134850471e-05, "loss": 2.4946, "step": 3254500 }, { "epoch": 6.2, "learning_rate": 3.4501431843124107e-05, "loss": 2.5253, "step": 3255000 }, { "epoch": 6.2, "learning_rate": 3.449905496448496e-05, "loss": 2.5195, "step": 3255500 }, { "epoch": 6.2, "learning_rate": 3.449667332256196e-05, "loss": 2.5252, "step": 3256000 }, { "epoch": 6.2, "learning_rate": 3.449429168063897e-05, "loss": 2.5236, "step": 3256500 }, { "epoch": 6.21, "learning_rate": 3.4491910038715973e-05, "loss": 2.5054, "step": 3257000 }, { "epoch": 6.21, "learning_rate": 3.4489533160076826e-05, "loss": 2.5304, "step": 3257500 }, { "epoch": 6.21, "learning_rate": 3.448715151815383e-05, "loss": 2.5368, "step": 3258000 }, { "epoch": 6.21, "learning_rate": 3.448476987623083e-05, "loss": 2.5307, "step": 3258500 }, { "epoch": 6.21, "learning_rate": 3.448238823430784e-05, "loss": 2.5174, "step": 3259000 }, { "epoch": 6.21, "learning_rate": 3.448000659238484e-05, "loss": 2.5042, "step": 3259500 }, { "epoch": 6.21, "learning_rate": 3.4477629713745695e-05, "loss": 2.5145, "step": 3260000 }, { "epoch": 6.21, "learning_rate": 3.4475248071822705e-05, "loss": 2.4973, "step": 3260500 }, { "epoch": 6.21, "learning_rate": 3.447286642989971e-05, "loss": 2.499, "step": 3261000 }, { "epoch": 6.21, "learning_rate": 3.447048478797671e-05, "loss": 2.5209, "step": 3261500 }, { "epoch": 6.22, "learning_rate": 3.446810790933756e-05, "loss": 2.488, "step": 3262000 }, { "epoch": 6.22, "learning_rate": 3.4465731030698415e-05, "loss": 2.5016, "step": 3262500 }, { "epoch": 6.22, "learning_rate": 3.446334938877542e-05, "loss": 2.5125, "step": 3263000 }, { "epoch": 6.22, "learning_rate": 3.4460967746852427e-05, "loss": 2.5207, "step": 3263500 }, { "epoch": 6.22, "learning_rate": 3.445858610492943e-05, "loss": 2.5334, "step": 3264000 }, { "epoch": 6.22, "learning_rate": 3.445620922629028e-05, "loss": 2.497, "step": 3264500 }, { "epoch": 6.22, "learning_rate": 3.445382758436729e-05, "loss": 2.5228, "step": 3265000 }, { "epoch": 6.22, "learning_rate": 3.445144594244429e-05, "loss": 2.5169, "step": 3265500 }, { "epoch": 6.22, "learning_rate": 3.4449064300521296e-05, "loss": 2.5037, "step": 3266000 }, { "epoch": 6.22, "learning_rate": 3.444668742188215e-05, "loss": 2.5189, "step": 3266500 }, { "epoch": 6.22, "learning_rate": 3.444430577995915e-05, "loss": 2.5171, "step": 3267000 }, { "epoch": 6.23, "learning_rate": 3.444192413803616e-05, "loss": 2.5072, "step": 3267500 }, { "epoch": 6.23, "learning_rate": 3.443954249611316e-05, "loss": 2.5272, "step": 3268000 }, { "epoch": 6.23, "learning_rate": 3.4437160854190166e-05, "loss": 2.5338, "step": 3268500 }, { "epoch": 6.23, "learning_rate": 3.443477921226717e-05, "loss": 2.4954, "step": 3269000 }, { "epoch": 6.23, "learning_rate": 3.443239757034418e-05, "loss": 2.5278, "step": 3269500 }, { "epoch": 6.23, "learning_rate": 3.443001592842118e-05, "loss": 2.5226, "step": 3270000 }, { "epoch": 6.23, "eval_accuracy": 0.5399358935502683, "eval_loss": 2.4205732345581055, "eval_runtime": 4203.6381, "eval_samples_per_second": 65.417, "eval_steps_per_second": 6.542, "step": 3270000 }, { "epoch": 6.23, "learning_rate": 3.442763904978203e-05, "loss": 2.4834, "step": 3270500 }, { "epoch": 6.23, "learning_rate": 3.442525740785904e-05, "loss": 2.5166, "step": 3271000 }, { "epoch": 6.23, "learning_rate": 3.4422875765936045e-05, "loss": 2.5222, "step": 3271500 }, { "epoch": 6.23, "learning_rate": 3.442049412401305e-05, "loss": 2.5157, "step": 3272000 }, { "epoch": 6.24, "learning_rate": 3.44181172453739e-05, "loss": 2.522, "step": 3272500 }, { "epoch": 6.24, "learning_rate": 3.44157356034509e-05, "loss": 2.517, "step": 3273000 }, { "epoch": 6.24, "learning_rate": 3.4413358724811755e-05, "loss": 2.5058, "step": 3273500 }, { "epoch": 6.24, "learning_rate": 3.4410977082888764e-05, "loss": 2.5077, "step": 3274000 }, { "epoch": 6.24, "learning_rate": 3.4408595440965767e-05, "loss": 2.4916, "step": 3274500 }, { "epoch": 6.24, "learning_rate": 3.4406213799042776e-05, "loss": 2.5198, "step": 3275000 }, { "epoch": 6.24, "learning_rate": 3.440383215711978e-05, "loss": 2.5206, "step": 3275500 }, { "epoch": 6.24, "learning_rate": 3.440145051519678e-05, "loss": 2.521, "step": 3276000 }, { "epoch": 6.24, "learning_rate": 3.4399073636557633e-05, "loss": 2.5178, "step": 3276500 }, { "epoch": 6.24, "learning_rate": 3.4396691994634636e-05, "loss": 2.5139, "step": 3277000 }, { "epoch": 6.24, "learning_rate": 3.439431511599549e-05, "loss": 2.5142, "step": 3277500 }, { "epoch": 6.25, "learning_rate": 3.43919334740725e-05, "loss": 2.512, "step": 3278000 }, { "epoch": 6.25, "learning_rate": 3.43895518321495e-05, "loss": 2.5141, "step": 3278500 }, { "epoch": 6.25, "learning_rate": 3.43871701902265e-05, "loss": 2.5263, "step": 3279000 }, { "epoch": 6.25, "learning_rate": 3.438478854830351e-05, "loss": 2.5251, "step": 3279500 }, { "epoch": 6.25, "learning_rate": 3.4382406906380515e-05, "loss": 2.5071, "step": 3280000 }, { "epoch": 6.25, "learning_rate": 3.4380025264457524e-05, "loss": 2.5048, "step": 3280500 }, { "epoch": 6.25, "learning_rate": 3.437764362253453e-05, "loss": 2.5202, "step": 3281000 }, { "epoch": 6.25, "learning_rate": 3.437526674389538e-05, "loss": 2.5091, "step": 3281500 }, { "epoch": 6.25, "learning_rate": 3.437288510197238e-05, "loss": 2.5396, "step": 3282000 }, { "epoch": 6.25, "learning_rate": 3.4370503460049385e-05, "loss": 2.5234, "step": 3282500 }, { "epoch": 6.26, "learning_rate": 3.4368121818126394e-05, "loss": 2.5195, "step": 3283000 }, { "epoch": 6.26, "learning_rate": 3.43657401762034e-05, "loss": 2.5277, "step": 3283500 }, { "epoch": 6.26, "learning_rate": 3.4363358534280406e-05, "loss": 2.5196, "step": 3284000 }, { "epoch": 6.26, "learning_rate": 3.436097689235741e-05, "loss": 2.5022, "step": 3284500 }, { "epoch": 6.26, "learning_rate": 3.435859525043441e-05, "loss": 2.5149, "step": 3285000 }, { "epoch": 6.26, "learning_rate": 3.4356213608511414e-05, "loss": 2.5309, "step": 3285500 }, { "epoch": 6.26, "learning_rate": 3.4353841493156116e-05, "loss": 2.4963, "step": 3286000 }, { "epoch": 6.26, "learning_rate": 3.435145985123312e-05, "loss": 2.5204, "step": 3286500 }, { "epoch": 6.26, "learning_rate": 3.434907820931013e-05, "loss": 2.522, "step": 3287000 }, { "epoch": 6.26, "learning_rate": 3.434669656738713e-05, "loss": 2.5027, "step": 3287500 }, { "epoch": 6.26, "learning_rate": 3.434431492546414e-05, "loss": 2.5263, "step": 3288000 }, { "epoch": 6.27, "learning_rate": 3.4341933283541136e-05, "loss": 2.5377, "step": 3288500 }, { "epoch": 6.27, "learning_rate": 3.4339551641618145e-05, "loss": 2.5425, "step": 3289000 }, { "epoch": 6.27, "learning_rate": 3.4337169999695155e-05, "loss": 2.5041, "step": 3289500 }, { "epoch": 6.27, "learning_rate": 3.433478835777216e-05, "loss": 2.5131, "step": 3290000 }, { "epoch": 6.27, "learning_rate": 3.433241147913301e-05, "loss": 2.5187, "step": 3290500 }, { "epoch": 6.27, "learning_rate": 3.433002983721001e-05, "loss": 2.4963, "step": 3291000 }, { "epoch": 6.27, "learning_rate": 3.4327648195287015e-05, "loss": 2.5181, "step": 3291500 }, { "epoch": 6.27, "learning_rate": 3.4325266553364024e-05, "loss": 2.5125, "step": 3292000 }, { "epoch": 6.27, "learning_rate": 3.432288967472487e-05, "loss": 2.5114, "step": 3292500 }, { "epoch": 6.27, "learning_rate": 3.432050803280188e-05, "loss": 2.5181, "step": 3293000 }, { "epoch": 6.28, "learning_rate": 3.431812639087889e-05, "loss": 2.5087, "step": 3293500 }, { "epoch": 6.28, "learning_rate": 3.431574474895589e-05, "loss": 2.5124, "step": 3294000 }, { "epoch": 6.28, "learning_rate": 3.4313367870316743e-05, "loss": 2.505, "step": 3294500 }, { "epoch": 6.28, "learning_rate": 3.4310986228393746e-05, "loss": 2.54, "step": 3295000 }, { "epoch": 6.28, "learning_rate": 3.430860934975459e-05, "loss": 2.4953, "step": 3295500 }, { "epoch": 6.28, "learning_rate": 3.43062277078316e-05, "loss": 2.5292, "step": 3296000 }, { "epoch": 6.28, "learning_rate": 3.4303846065908604e-05, "loss": 2.5178, "step": 3296500 }, { "epoch": 6.28, "learning_rate": 3.430146442398561e-05, "loss": 2.5025, "step": 3297000 }, { "epoch": 6.28, "learning_rate": 3.429908278206262e-05, "loss": 2.5176, "step": 3297500 }, { "epoch": 6.28, "learning_rate": 3.4296701140139625e-05, "loss": 2.5066, "step": 3298000 }, { "epoch": 6.28, "learning_rate": 3.429431949821663e-05, "loss": 2.5058, "step": 3298500 }, { "epoch": 6.29, "learning_rate": 3.429193785629363e-05, "loss": 2.5375, "step": 3299000 }, { "epoch": 6.29, "learning_rate": 3.428955621437064e-05, "loss": 2.504, "step": 3299500 }, { "epoch": 6.29, "learning_rate": 3.428717933573149e-05, "loss": 2.5097, "step": 3300000 }, { "epoch": 6.29, "eval_accuracy": 0.540253259216245, "eval_loss": 2.4206204414367676, "eval_runtime": 4206.1594, "eval_samples_per_second": 65.377, "eval_steps_per_second": 6.538, "step": 3300000 }, { "epoch": 6.29, "learning_rate": 3.4284797693808495e-05, "loss": 2.5069, "step": 3300500 }, { "epoch": 6.29, "learning_rate": 3.428242081516935e-05, "loss": 2.5059, "step": 3301000 }, { "epoch": 6.29, "learning_rate": 3.4280039173246356e-05, "loss": 2.5276, "step": 3301500 }, { "epoch": 6.29, "learning_rate": 3.427765753132335e-05, "loss": 2.5284, "step": 3302000 }, { "epoch": 6.29, "learning_rate": 3.427527588940036e-05, "loss": 2.5072, "step": 3302500 }, { "epoch": 6.29, "learning_rate": 3.4272894247477364e-05, "loss": 2.5067, "step": 3303000 }, { "epoch": 6.29, "learning_rate": 3.4270512605554374e-05, "loss": 2.506, "step": 3303500 }, { "epoch": 6.3, "learning_rate": 3.4268130963631376e-05, "loss": 2.4947, "step": 3304000 }, { "epoch": 6.3, "learning_rate": 3.4265749321708386e-05, "loss": 2.5171, "step": 3304500 }, { "epoch": 6.3, "learning_rate": 3.426337244306923e-05, "loss": 2.5113, "step": 3305000 }, { "epoch": 6.3, "learning_rate": 3.4260990801146234e-05, "loss": 2.5071, "step": 3305500 }, { "epoch": 6.3, "learning_rate": 3.425860915922324e-05, "loss": 2.5174, "step": 3306000 }, { "epoch": 6.3, "learning_rate": 3.4256227517300246e-05, "loss": 2.51, "step": 3306500 }, { "epoch": 6.3, "learning_rate": 3.4253845875377255e-05, "loss": 2.5166, "step": 3307000 }, { "epoch": 6.3, "learning_rate": 3.425146899673811e-05, "loss": 2.5043, "step": 3307500 }, { "epoch": 6.3, "learning_rate": 3.424908735481511e-05, "loss": 2.5039, "step": 3308000 }, { "epoch": 6.3, "learning_rate": 3.424670571289211e-05, "loss": 2.5167, "step": 3308500 }, { "epoch": 6.3, "learning_rate": 3.4244328834252965e-05, "loss": 2.5285, "step": 3309000 }, { "epoch": 6.31, "learning_rate": 3.424194719232997e-05, "loss": 2.5096, "step": 3309500 }, { "epoch": 6.31, "learning_rate": 3.423956555040698e-05, "loss": 2.5221, "step": 3310000 }, { "epoch": 6.31, "learning_rate": 3.423718390848398e-05, "loss": 2.5062, "step": 3310500 }, { "epoch": 6.31, "learning_rate": 3.423480226656099e-05, "loss": 2.5236, "step": 3311000 }, { "epoch": 6.31, "learning_rate": 3.423242062463799e-05, "loss": 2.5208, "step": 3311500 }, { "epoch": 6.31, "learning_rate": 3.4230038982714994e-05, "loss": 2.5041, "step": 3312000 }, { "epoch": 6.31, "learning_rate": 3.4227657340792004e-05, "loss": 2.5163, "step": 3312500 }, { "epoch": 6.31, "learning_rate": 3.42252852254367e-05, "loss": 2.5429, "step": 3313000 }, { "epoch": 6.31, "learning_rate": 3.42229035835137e-05, "loss": 2.5039, "step": 3313500 }, { "epoch": 6.31, "learning_rate": 3.422052194159071e-05, "loss": 2.5214, "step": 3314000 }, { "epoch": 6.32, "learning_rate": 3.421814029966772e-05, "loss": 2.5136, "step": 3314500 }, { "epoch": 6.32, "learning_rate": 3.4215758657744716e-05, "loss": 2.5178, "step": 3315000 }, { "epoch": 6.32, "learning_rate": 3.4213377015821726e-05, "loss": 2.5384, "step": 3315500 }, { "epoch": 6.32, "learning_rate": 3.421099537389873e-05, "loss": 2.5266, "step": 3316000 }, { "epoch": 6.32, "learning_rate": 3.420861373197574e-05, "loss": 2.51, "step": 3316500 }, { "epoch": 6.32, "learning_rate": 3.420623209005274e-05, "loss": 2.5244, "step": 3317000 }, { "epoch": 6.32, "learning_rate": 3.420385044812975e-05, "loss": 2.5279, "step": 3317500 }, { "epoch": 6.32, "learning_rate": 3.420146880620675e-05, "loss": 2.5185, "step": 3318000 }, { "epoch": 6.32, "learning_rate": 3.4199087164283755e-05, "loss": 2.4973, "step": 3318500 }, { "epoch": 6.32, "learning_rate": 3.419671504892845e-05, "loss": 2.5265, "step": 3319000 }, { "epoch": 6.32, "learning_rate": 3.419433340700546e-05, "loss": 2.5116, "step": 3319500 }, { "epoch": 6.33, "learning_rate": 3.419195176508246e-05, "loss": 2.5202, "step": 3320000 }, { "epoch": 6.33, "learning_rate": 3.418957012315947e-05, "loss": 2.5248, "step": 3320500 }, { "epoch": 6.33, "learning_rate": 3.4187188481236474e-05, "loss": 2.5004, "step": 3321000 }, { "epoch": 6.33, "learning_rate": 3.4184811602597326e-05, "loss": 2.5063, "step": 3321500 }, { "epoch": 6.33, "learning_rate": 3.418242996067433e-05, "loss": 2.5045, "step": 3322000 }, { "epoch": 6.33, "learning_rate": 3.418004831875133e-05, "loss": 2.5099, "step": 3322500 }, { "epoch": 6.33, "learning_rate": 3.417766667682834e-05, "loss": 2.5196, "step": 3323000 }, { "epoch": 6.33, "learning_rate": 3.4175285034905344e-05, "loss": 2.5264, "step": 3323500 }, { "epoch": 6.33, "learning_rate": 3.417290339298235e-05, "loss": 2.5166, "step": 3324000 }, { "epoch": 6.33, "learning_rate": 3.4170521751059356e-05, "loss": 2.5302, "step": 3324500 }, { "epoch": 6.34, "learning_rate": 3.416814010913636e-05, "loss": 2.5137, "step": 3325000 }, { "epoch": 6.34, "learning_rate": 3.416576323049721e-05, "loss": 2.5309, "step": 3325500 }, { "epoch": 6.34, "learning_rate": 3.416338158857422e-05, "loss": 2.5143, "step": 3326000 }, { "epoch": 6.34, "learning_rate": 3.4161004709935066e-05, "loss": 2.5156, "step": 3326500 }, { "epoch": 6.34, "learning_rate": 3.4158623068012075e-05, "loss": 2.5286, "step": 3327000 }, { "epoch": 6.34, "learning_rate": 3.415624142608908e-05, "loss": 2.5167, "step": 3327500 }, { "epoch": 6.34, "learning_rate": 3.415385978416609e-05, "loss": 2.5169, "step": 3328000 }, { "epoch": 6.34, "learning_rate": 3.415147814224309e-05, "loss": 2.5361, "step": 3328500 }, { "epoch": 6.34, "learning_rate": 3.414909650032009e-05, "loss": 2.5138, "step": 3329000 }, { "epoch": 6.34, "learning_rate": 3.41467148583971e-05, "loss": 2.5187, "step": 3329500 }, { "epoch": 6.34, "learning_rate": 3.4144333216474104e-05, "loss": 2.5075, "step": 3330000 }, { "epoch": 6.34, "eval_accuracy": 0.5403008503803048, "eval_loss": 2.419070243835449, "eval_runtime": 4198.2919, "eval_samples_per_second": 65.5, "eval_steps_per_second": 6.55, "step": 3330000 }, { "epoch": 6.35, "learning_rate": 3.4141956337834957e-05, "loss": 2.5253, "step": 3330500 }, { "epoch": 6.35, "learning_rate": 3.4139574695911966e-05, "loss": 2.4942, "step": 3331000 }, { "epoch": 6.35, "learning_rate": 3.413719305398896e-05, "loss": 2.504, "step": 3331500 }, { "epoch": 6.35, "learning_rate": 3.4134816175349814e-05, "loss": 2.532, "step": 3332000 }, { "epoch": 6.35, "learning_rate": 3.4132434533426823e-05, "loss": 2.5248, "step": 3332500 }, { "epoch": 6.35, "learning_rate": 3.4130052891503826e-05, "loss": 2.5325, "step": 3333000 }, { "epoch": 6.35, "learning_rate": 3.4127671249580835e-05, "loss": 2.517, "step": 3333500 }, { "epoch": 6.35, "learning_rate": 3.412528960765784e-05, "loss": 2.5319, "step": 3334000 }, { "epoch": 6.35, "learning_rate": 3.412290796573484e-05, "loss": 2.5082, "step": 3334500 }, { "epoch": 6.35, "learning_rate": 3.412052632381184e-05, "loss": 2.5274, "step": 3335000 }, { "epoch": 6.36, "learning_rate": 3.411814468188885e-05, "loss": 2.5125, "step": 3335500 }, { "epoch": 6.36, "learning_rate": 3.411576303996586e-05, "loss": 2.5157, "step": 3336000 }, { "epoch": 6.36, "learning_rate": 3.411338616132671e-05, "loss": 2.5249, "step": 3336500 }, { "epoch": 6.36, "learning_rate": 3.411100451940372e-05, "loss": 2.5114, "step": 3337000 }, { "epoch": 6.36, "learning_rate": 3.410862287748072e-05, "loss": 2.5266, "step": 3337500 }, { "epoch": 6.36, "learning_rate": 3.410624123555772e-05, "loss": 2.5145, "step": 3338000 }, { "epoch": 6.36, "learning_rate": 3.4103864356918575e-05, "loss": 2.5306, "step": 3338500 }, { "epoch": 6.36, "learning_rate": 3.4101482714995584e-05, "loss": 2.5172, "step": 3339000 }, { "epoch": 6.36, "learning_rate": 3.409910107307259e-05, "loss": 2.5152, "step": 3339500 }, { "epoch": 6.36, "learning_rate": 3.4096719431149596e-05, "loss": 2.5257, "step": 3340000 }, { "epoch": 6.36, "learning_rate": 3.409434255251044e-05, "loss": 2.5141, "step": 3340500 }, { "epoch": 6.37, "learning_rate": 3.4091965673871294e-05, "loss": 2.5387, "step": 3341000 }, { "epoch": 6.37, "learning_rate": 3.40895840319483e-05, "loss": 2.5359, "step": 3341500 }, { "epoch": 6.37, "learning_rate": 3.40872023900253e-05, "loss": 2.5121, "step": 3342000 }, { "epoch": 6.37, "learning_rate": 3.408482074810231e-05, "loss": 2.5308, "step": 3342500 }, { "epoch": 6.37, "learning_rate": 3.408244386946316e-05, "loss": 2.5299, "step": 3343000 }, { "epoch": 6.37, "learning_rate": 3.4080062227540163e-05, "loss": 2.5153, "step": 3343500 }, { "epoch": 6.37, "learning_rate": 3.407768058561717e-05, "loss": 2.5127, "step": 3344000 }, { "epoch": 6.37, "learning_rate": 3.4075298943694175e-05, "loss": 2.5176, "step": 3344500 }, { "epoch": 6.37, "learning_rate": 3.407291730177118e-05, "loss": 2.5301, "step": 3345000 }, { "epoch": 6.37, "learning_rate": 3.407053565984819e-05, "loss": 2.5273, "step": 3345500 }, { "epoch": 6.38, "learning_rate": 3.406815401792519e-05, "loss": 2.5093, "step": 3346000 }, { "epoch": 6.38, "learning_rate": 3.40657723760022e-05, "loss": 2.508, "step": 3346500 }, { "epoch": 6.38, "learning_rate": 3.406339549736305e-05, "loss": 2.5257, "step": 3347000 }, { "epoch": 6.38, "learning_rate": 3.4061013855440054e-05, "loss": 2.5225, "step": 3347500 }, { "epoch": 6.38, "learning_rate": 3.405863221351706e-05, "loss": 2.5392, "step": 3348000 }, { "epoch": 6.38, "learning_rate": 3.405625057159406e-05, "loss": 2.5117, "step": 3348500 }, { "epoch": 6.38, "learning_rate": 3.405387369295491e-05, "loss": 2.5092, "step": 3349000 }, { "epoch": 6.38, "learning_rate": 3.405149205103192e-05, "loss": 2.5309, "step": 3349500 }, { "epoch": 6.38, "learning_rate": 3.4049110409108924e-05, "loss": 2.4923, "step": 3350000 }, { "epoch": 6.38, "learning_rate": 3.404672876718593e-05, "loss": 2.5028, "step": 3350500 }, { "epoch": 6.38, "learning_rate": 3.4044347125262936e-05, "loss": 2.5249, "step": 3351000 }, { "epoch": 6.39, "learning_rate": 3.404197024662378e-05, "loss": 2.4986, "step": 3351500 }, { "epoch": 6.39, "learning_rate": 3.403958860470079e-05, "loss": 2.5195, "step": 3352000 }, { "epoch": 6.39, "learning_rate": 3.4037206962777794e-05, "loss": 2.513, "step": 3352500 }, { "epoch": 6.39, "learning_rate": 3.40348253208548e-05, "loss": 2.4996, "step": 3353000 }, { "epoch": 6.39, "learning_rate": 3.4032448442215655e-05, "loss": 2.5282, "step": 3353500 }, { "epoch": 6.39, "learning_rate": 3.403006680029266e-05, "loss": 2.5147, "step": 3354000 }, { "epoch": 6.39, "learning_rate": 3.402768515836967e-05, "loss": 2.5243, "step": 3354500 }, { "epoch": 6.39, "learning_rate": 3.402530351644666e-05, "loss": 2.5339, "step": 3355000 }, { "epoch": 6.39, "learning_rate": 3.4022931401091365e-05, "loss": 2.5015, "step": 3355500 }, { "epoch": 6.39, "learning_rate": 3.402054975916837e-05, "loss": 2.5194, "step": 3356000 }, { "epoch": 6.4, "learning_rate": 3.401816811724538e-05, "loss": 2.4994, "step": 3356500 }, { "epoch": 6.4, "learning_rate": 3.401578647532238e-05, "loss": 2.5316, "step": 3357000 }, { "epoch": 6.4, "learning_rate": 3.401340959668323e-05, "loss": 2.5255, "step": 3357500 }, { "epoch": 6.4, "learning_rate": 3.401102795476024e-05, "loss": 2.5228, "step": 3358000 }, { "epoch": 6.4, "learning_rate": 3.4008646312837244e-05, "loss": 2.511, "step": 3358500 }, { "epoch": 6.4, "learning_rate": 3.400626467091425e-05, "loss": 2.529, "step": 3359000 }, { "epoch": 6.4, "learning_rate": 3.40038877922751e-05, "loss": 2.5104, "step": 3359500 }, { "epoch": 6.4, "learning_rate": 3.40015061503521e-05, "loss": 2.5061, "step": 3360000 }, { "epoch": 6.4, "eval_accuracy": 0.5403307637090742, "eval_loss": 2.4190633296966553, "eval_runtime": 4199.3289, "eval_samples_per_second": 65.484, "eval_steps_per_second": 6.548, "step": 3360000 }, { "epoch": 6.4, "learning_rate": 3.399912450842911e-05, "loss": 2.5323, "step": 3360500 }, { "epoch": 6.4, "learning_rate": 3.3996742866506114e-05, "loss": 2.5053, "step": 3361000 }, { "epoch": 6.4, "learning_rate": 3.399436122458312e-05, "loss": 2.5147, "step": 3361500 }, { "epoch": 6.41, "learning_rate": 3.399197958266012e-05, "loss": 2.5239, "step": 3362000 }, { "epoch": 6.41, "learning_rate": 3.398959794073713e-05, "loss": 2.4904, "step": 3362500 }, { "epoch": 6.41, "learning_rate": 3.398721629881413e-05, "loss": 2.5156, "step": 3363000 }, { "epoch": 6.41, "learning_rate": 3.398483465689114e-05, "loss": 2.5204, "step": 3363500 }, { "epoch": 6.41, "learning_rate": 3.398245301496815e-05, "loss": 2.5117, "step": 3364000 }, { "epoch": 6.41, "learning_rate": 3.3980076136328995e-05, "loss": 2.536, "step": 3364500 }, { "epoch": 6.41, "learning_rate": 3.3977694494406005e-05, "loss": 2.5207, "step": 3365000 }, { "epoch": 6.41, "learning_rate": 3.397531285248301e-05, "loss": 2.5198, "step": 3365500 }, { "epoch": 6.41, "learning_rate": 3.397293121056001e-05, "loss": 2.5177, "step": 3366000 }, { "epoch": 6.41, "learning_rate": 3.397054956863702e-05, "loss": 2.5256, "step": 3366500 }, { "epoch": 6.42, "learning_rate": 3.3968172689997865e-05, "loss": 2.529, "step": 3367000 }, { "epoch": 6.42, "learning_rate": 3.3965791048074874e-05, "loss": 2.5159, "step": 3367500 }, { "epoch": 6.42, "learning_rate": 3.3963409406151884e-05, "loss": 2.5123, "step": 3368000 }, { "epoch": 6.42, "learning_rate": 3.396102776422888e-05, "loss": 2.5307, "step": 3368500 }, { "epoch": 6.42, "learning_rate": 3.395864612230589e-05, "loss": 2.5188, "step": 3369000 }, { "epoch": 6.42, "learning_rate": 3.395626448038289e-05, "loss": 2.5154, "step": 3369500 }, { "epoch": 6.42, "learning_rate": 3.39538828384599e-05, "loss": 2.5022, "step": 3370000 }, { "epoch": 6.42, "learning_rate": 3.3951501196536903e-05, "loss": 2.51, "step": 3370500 }, { "epoch": 6.42, "learning_rate": 3.3949124317897756e-05, "loss": 2.5265, "step": 3371000 }, { "epoch": 6.42, "learning_rate": 3.394674267597476e-05, "loss": 2.5116, "step": 3371500 }, { "epoch": 6.42, "learning_rate": 3.394436103405176e-05, "loss": 2.5292, "step": 3372000 }, { "epoch": 6.43, "learning_rate": 3.394198415541261e-05, "loss": 2.5078, "step": 3372500 }, { "epoch": 6.43, "learning_rate": 3.393960251348962e-05, "loss": 2.5123, "step": 3373000 }, { "epoch": 6.43, "learning_rate": 3.3937220871566625e-05, "loss": 2.5261, "step": 3373500 }, { "epoch": 6.43, "learning_rate": 3.3934839229643635e-05, "loss": 2.525, "step": 3374000 }, { "epoch": 6.43, "learning_rate": 3.393245758772064e-05, "loss": 2.5216, "step": 3374500 }, { "epoch": 6.43, "learning_rate": 3.393007594579764e-05, "loss": 2.528, "step": 3375000 }, { "epoch": 6.43, "learning_rate": 3.392769430387465e-05, "loss": 2.5371, "step": 3375500 }, { "epoch": 6.43, "learning_rate": 3.3925317425235495e-05, "loss": 2.5108, "step": 3376000 }, { "epoch": 6.43, "learning_rate": 3.3922935783312504e-05, "loss": 2.5388, "step": 3376500 }, { "epoch": 6.43, "learning_rate": 3.3920558904673357e-05, "loss": 2.5296, "step": 3377000 }, { "epoch": 6.44, "learning_rate": 3.391817726275036e-05, "loss": 2.5165, "step": 3377500 }, { "epoch": 6.44, "learning_rate": 3.391579562082737e-05, "loss": 2.5205, "step": 3378000 }, { "epoch": 6.44, "learning_rate": 3.391341397890437e-05, "loss": 2.5103, "step": 3378500 }, { "epoch": 6.44, "learning_rate": 3.3911032336981374e-05, "loss": 2.509, "step": 3379000 }, { "epoch": 6.44, "learning_rate": 3.390865069505838e-05, "loss": 2.5094, "step": 3379500 }, { "epoch": 6.44, "learning_rate": 3.3906269053135386e-05, "loss": 2.5163, "step": 3380000 }, { "epoch": 6.44, "learning_rate": 3.3903887411212395e-05, "loss": 2.5149, "step": 3380500 }, { "epoch": 6.44, "learning_rate": 3.390150576928939e-05, "loss": 2.5301, "step": 3381000 }, { "epoch": 6.44, "learning_rate": 3.38991241273664e-05, "loss": 2.5066, "step": 3381500 }, { "epoch": 6.44, "learning_rate": 3.38967424854434e-05, "loss": 2.5246, "step": 3382000 }, { "epoch": 6.44, "learning_rate": 3.389436084352041e-05, "loss": 2.5269, "step": 3382500 }, { "epoch": 6.45, "learning_rate": 3.3891983964881265e-05, "loss": 2.5212, "step": 3383000 }, { "epoch": 6.45, "learning_rate": 3.388960232295827e-05, "loss": 2.5161, "step": 3383500 }, { "epoch": 6.45, "learning_rate": 3.388722068103528e-05, "loss": 2.5041, "step": 3384000 }, { "epoch": 6.45, "learning_rate": 3.388483903911227e-05, "loss": 2.5149, "step": 3384500 }, { "epoch": 6.45, "learning_rate": 3.3882462160473125e-05, "loss": 2.5035, "step": 3385000 }, { "epoch": 6.45, "learning_rate": 3.3880080518550134e-05, "loss": 2.5212, "step": 3385500 }, { "epoch": 6.45, "learning_rate": 3.387769887662714e-05, "loss": 2.5101, "step": 3386000 }, { "epoch": 6.45, "learning_rate": 3.3875317234704146e-05, "loss": 2.5223, "step": 3386500 }, { "epoch": 6.45, "learning_rate": 3.3872940356065e-05, "loss": 2.5161, "step": 3387000 }, { "epoch": 6.45, "learning_rate": 3.387056347742585e-05, "loss": 2.5244, "step": 3387500 }, { "epoch": 6.46, "learning_rate": 3.3868181835502854e-05, "loss": 2.5046, "step": 3388000 }, { "epoch": 6.46, "learning_rate": 3.3865800193579856e-05, "loss": 2.5287, "step": 3388500 }, { "epoch": 6.46, "learning_rate": 3.386341855165686e-05, "loss": 2.5114, "step": 3389000 }, { "epoch": 6.46, "learning_rate": 3.386104167301771e-05, "loss": 2.5107, "step": 3389500 }, { "epoch": 6.46, "learning_rate": 3.385866003109472e-05, "loss": 2.503, "step": 3390000 }, { "epoch": 6.46, "eval_accuracy": 0.540383839872099, "eval_loss": 2.4181902408599854, "eval_runtime": 4195.3883, "eval_samples_per_second": 65.545, "eval_steps_per_second": 6.555, "step": 3390000 }, { "epoch": 6.46, "learning_rate": 3.385627838917172e-05, "loss": 2.5177, "step": 3390500 }, { "epoch": 6.46, "learning_rate": 3.385389674724873e-05, "loss": 2.5173, "step": 3391000 }, { "epoch": 6.46, "learning_rate": 3.385151510532573e-05, "loss": 2.5152, "step": 3391500 }, { "epoch": 6.46, "learning_rate": 3.384913346340274e-05, "loss": 2.5106, "step": 3392000 }, { "epoch": 6.46, "learning_rate": 3.384675182147975e-05, "loss": 2.5321, "step": 3392500 }, { "epoch": 6.46, "learning_rate": 3.384437494284059e-05, "loss": 2.5084, "step": 3393000 }, { "epoch": 6.47, "learning_rate": 3.38419933009176e-05, "loss": 2.5098, "step": 3393500 }, { "epoch": 6.47, "learning_rate": 3.3839611658994605e-05, "loss": 2.5244, "step": 3394000 }, { "epoch": 6.47, "learning_rate": 3.383723001707161e-05, "loss": 2.5172, "step": 3394500 }, { "epoch": 6.47, "learning_rate": 3.383484837514862e-05, "loss": 2.5147, "step": 3395000 }, { "epoch": 6.47, "learning_rate": 3.383246673322562e-05, "loss": 2.5291, "step": 3395500 }, { "epoch": 6.47, "learning_rate": 3.383008509130263e-05, "loss": 2.5404, "step": 3396000 }, { "epoch": 6.47, "learning_rate": 3.382770344937963e-05, "loss": 2.5218, "step": 3396500 }, { "epoch": 6.47, "learning_rate": 3.3825326570740484e-05, "loss": 2.4849, "step": 3397000 }, { "epoch": 6.47, "learning_rate": 3.382294492881749e-05, "loss": 2.5279, "step": 3397500 }, { "epoch": 6.47, "learning_rate": 3.382056328689449e-05, "loss": 2.5216, "step": 3398000 }, { "epoch": 6.48, "learning_rate": 3.38181816449715e-05, "loss": 2.5178, "step": 3398500 }, { "epoch": 6.48, "learning_rate": 3.38158000030485e-05, "loss": 2.506, "step": 3399000 }, { "epoch": 6.48, "learning_rate": 3.381342312440935e-05, "loss": 2.5211, "step": 3399500 }, { "epoch": 6.48, "learning_rate": 3.381104148248636e-05, "loss": 2.517, "step": 3400000 }, { "epoch": 6.48, "learning_rate": 3.3808659840563365e-05, "loss": 2.521, "step": 3400500 }, { "epoch": 6.48, "learning_rate": 3.380627819864037e-05, "loss": 2.4947, "step": 3401000 }, { "epoch": 6.48, "learning_rate": 3.380390132000122e-05, "loss": 2.5163, "step": 3401500 }, { "epoch": 6.48, "learning_rate": 3.380151967807822e-05, "loss": 2.5117, "step": 3402000 }, { "epoch": 6.48, "learning_rate": 3.3799142799439075e-05, "loss": 2.5085, "step": 3402500 }, { "epoch": 6.48, "learning_rate": 3.3796761157516085e-05, "loss": 2.524, "step": 3403000 }, { "epoch": 6.48, "learning_rate": 3.379437951559309e-05, "loss": 2.5136, "step": 3403500 }, { "epoch": 6.49, "learning_rate": 3.37919978736701e-05, "loss": 2.4812, "step": 3404000 }, { "epoch": 6.49, "learning_rate": 3.378961623174709e-05, "loss": 2.5205, "step": 3404500 }, { "epoch": 6.49, "learning_rate": 3.37872345898241e-05, "loss": 2.5081, "step": 3405000 }, { "epoch": 6.49, "learning_rate": 3.3784857711184954e-05, "loss": 2.5301, "step": 3405500 }, { "epoch": 6.49, "learning_rate": 3.378247606926196e-05, "loss": 2.5143, "step": 3406000 }, { "epoch": 6.49, "learning_rate": 3.3780094427338966e-05, "loss": 2.5244, "step": 3406500 }, { "epoch": 6.49, "learning_rate": 3.377771278541597e-05, "loss": 2.4984, "step": 3407000 }, { "epoch": 6.49, "learning_rate": 3.377533114349298e-05, "loss": 2.5174, "step": 3407500 }, { "epoch": 6.49, "learning_rate": 3.377294950156998e-05, "loss": 2.5277, "step": 3408000 }, { "epoch": 6.49, "learning_rate": 3.3770567859646984e-05, "loss": 2.5091, "step": 3408500 }, { "epoch": 6.5, "learning_rate": 3.376818621772399e-05, "loss": 2.5187, "step": 3409000 }, { "epoch": 6.5, "learning_rate": 3.376580933908484e-05, "loss": 2.5334, "step": 3409500 }, { "epoch": 6.5, "learning_rate": 3.376342769716185e-05, "loss": 2.5216, "step": 3410000 }, { "epoch": 6.5, "learning_rate": 3.376104605523886e-05, "loss": 2.5179, "step": 3410500 }, { "epoch": 6.5, "learning_rate": 3.375866441331585e-05, "loss": 2.5196, "step": 3411000 }, { "epoch": 6.5, "learning_rate": 3.3756287534676705e-05, "loss": 2.5281, "step": 3411500 }, { "epoch": 6.5, "learning_rate": 3.3753905892753715e-05, "loss": 2.5217, "step": 3412000 }, { "epoch": 6.5, "learning_rate": 3.375152425083072e-05, "loss": 2.5418, "step": 3412500 }, { "epoch": 6.5, "learning_rate": 3.374914260890773e-05, "loss": 2.51, "step": 3413000 }, { "epoch": 6.5, "learning_rate": 3.374676573026858e-05, "loss": 2.4981, "step": 3413500 }, { "epoch": 6.5, "learning_rate": 3.3744388851629425e-05, "loss": 2.52, "step": 3414000 }, { "epoch": 6.51, "learning_rate": 3.3742007209706434e-05, "loss": 2.5098, "step": 3414500 }, { "epoch": 6.51, "learning_rate": 3.373962556778344e-05, "loss": 2.5144, "step": 3415000 }, { "epoch": 6.51, "learning_rate": 3.373724392586044e-05, "loss": 2.5179, "step": 3415500 }, { "epoch": 6.51, "learning_rate": 3.373486228393745e-05, "loss": 2.5252, "step": 3416000 }, { "epoch": 6.51, "learning_rate": 3.373248064201445e-05, "loss": 2.5315, "step": 3416500 }, { "epoch": 6.51, "learning_rate": 3.373009900009146e-05, "loss": 2.5118, "step": 3417000 }, { "epoch": 6.51, "learning_rate": 3.3727717358168457e-05, "loss": 2.5208, "step": 3417500 }, { "epoch": 6.51, "learning_rate": 3.372534047952931e-05, "loss": 2.5132, "step": 3418000 }, { "epoch": 6.51, "learning_rate": 3.372295883760632e-05, "loss": 2.5272, "step": 3418500 }, { "epoch": 6.51, "learning_rate": 3.372057719568332e-05, "loss": 2.5264, "step": 3419000 }, { "epoch": 6.52, "learning_rate": 3.371820031704417e-05, "loss": 2.514, "step": 3419500 }, { "epoch": 6.52, "learning_rate": 3.371581867512118e-05, "loss": 2.5155, "step": 3420000 }, { "epoch": 6.52, "eval_accuracy": 0.5407120120169182, "eval_loss": 2.4169094562530518, "eval_runtime": 4195.4872, "eval_samples_per_second": 65.544, "eval_steps_per_second": 6.554, "step": 3420000 }, { "epoch": 6.52, "learning_rate": 3.3713437033198185e-05, "loss": 2.5095, "step": 3420500 }, { "epoch": 6.52, "learning_rate": 3.3711055391275195e-05, "loss": 2.5251, "step": 3421000 }, { "epoch": 6.52, "learning_rate": 3.370867374935219e-05, "loss": 2.504, "step": 3421500 }, { "epoch": 6.52, "learning_rate": 3.370629687071304e-05, "loss": 2.5032, "step": 3422000 }, { "epoch": 6.52, "learning_rate": 3.370391522879005e-05, "loss": 2.5252, "step": 3422500 }, { "epoch": 6.52, "learning_rate": 3.3701533586867055e-05, "loss": 2.5311, "step": 3423000 }, { "epoch": 6.52, "learning_rate": 3.3699151944944064e-05, "loss": 2.5082, "step": 3423500 }, { "epoch": 6.52, "learning_rate": 3.369677030302107e-05, "loss": 2.5299, "step": 3424000 }, { "epoch": 6.52, "learning_rate": 3.369438866109807e-05, "loss": 2.5024, "step": 3424500 }, { "epoch": 6.53, "learning_rate": 3.369200701917508e-05, "loss": 2.5093, "step": 3425000 }, { "epoch": 6.53, "learning_rate": 3.3689630140535924e-05, "loss": 2.5121, "step": 3425500 }, { "epoch": 6.53, "learning_rate": 3.3687248498612934e-05, "loss": 2.516, "step": 3426000 }, { "epoch": 6.53, "learning_rate": 3.3684866856689936e-05, "loss": 2.5113, "step": 3426500 }, { "epoch": 6.53, "learning_rate": 3.3682485214766946e-05, "loss": 2.514, "step": 3427000 }, { "epoch": 6.53, "learning_rate": 3.368010357284395e-05, "loss": 2.5271, "step": 3427500 }, { "epoch": 6.53, "learning_rate": 3.367772193092095e-05, "loss": 2.5124, "step": 3428000 }, { "epoch": 6.53, "learning_rate": 3.36753450522818e-05, "loss": 2.494, "step": 3428500 }, { "epoch": 6.53, "learning_rate": 3.367296341035881e-05, "loss": 2.5197, "step": 3429000 }, { "epoch": 6.53, "learning_rate": 3.3670581768435815e-05, "loss": 2.5283, "step": 3429500 }, { "epoch": 6.54, "learning_rate": 3.3668200126512825e-05, "loss": 2.5309, "step": 3430000 }, { "epoch": 6.54, "learning_rate": 3.366581848458983e-05, "loss": 2.5008, "step": 3430500 }, { "epoch": 6.54, "learning_rate": 3.366343684266683e-05, "loss": 2.5151, "step": 3431000 }, { "epoch": 6.54, "learning_rate": 3.366105520074383e-05, "loss": 2.5243, "step": 3431500 }, { "epoch": 6.54, "learning_rate": 3.3658678322104685e-05, "loss": 2.5198, "step": 3432000 }, { "epoch": 6.54, "learning_rate": 3.3656296680181694e-05, "loss": 2.5117, "step": 3432500 }, { "epoch": 6.54, "learning_rate": 3.36539150382587e-05, "loss": 2.5227, "step": 3433000 }, { "epoch": 6.54, "learning_rate": 3.3651533396335706e-05, "loss": 2.5293, "step": 3433500 }, { "epoch": 6.54, "learning_rate": 3.36491517544127e-05, "loss": 2.5194, "step": 3434000 }, { "epoch": 6.54, "learning_rate": 3.364677011248971e-05, "loss": 2.5112, "step": 3434500 }, { "epoch": 6.54, "learning_rate": 3.364438847056672e-05, "loss": 2.5087, "step": 3435000 }, { "epoch": 6.55, "learning_rate": 3.3642006828643724e-05, "loss": 2.5232, "step": 3435500 }, { "epoch": 6.55, "learning_rate": 3.3639629950004576e-05, "loss": 2.5045, "step": 3436000 }, { "epoch": 6.55, "learning_rate": 3.363724830808158e-05, "loss": 2.532, "step": 3436500 }, { "epoch": 6.55, "learning_rate": 3.363486666615858e-05, "loss": 2.5047, "step": 3437000 }, { "epoch": 6.55, "learning_rate": 3.363248502423559e-05, "loss": 2.5262, "step": 3437500 }, { "epoch": 6.55, "learning_rate": 3.363010338231259e-05, "loss": 2.5213, "step": 3438000 }, { "epoch": 6.55, "learning_rate": 3.3627726503673445e-05, "loss": 2.5119, "step": 3438500 }, { "epoch": 6.55, "learning_rate": 3.3625344861750455e-05, "loss": 2.5163, "step": 3439000 }, { "epoch": 6.55, "learning_rate": 3.362296321982746e-05, "loss": 2.5253, "step": 3439500 }, { "epoch": 6.55, "learning_rate": 3.362058157790447e-05, "loss": 2.5189, "step": 3440000 }, { "epoch": 6.56, "learning_rate": 3.361819993598146e-05, "loss": 2.505, "step": 3440500 }, { "epoch": 6.56, "learning_rate": 3.361581829405847e-05, "loss": 2.5286, "step": 3441000 }, { "epoch": 6.56, "learning_rate": 3.3613436652135475e-05, "loss": 2.5383, "step": 3441500 }, { "epoch": 6.56, "learning_rate": 3.3611055010212484e-05, "loss": 2.5156, "step": 3442000 }, { "epoch": 6.56, "learning_rate": 3.3608678131573336e-05, "loss": 2.529, "step": 3442500 }, { "epoch": 6.56, "learning_rate": 3.360629648965034e-05, "loss": 2.5245, "step": 3443000 }, { "epoch": 6.56, "learning_rate": 3.360391484772734e-05, "loss": 2.5, "step": 3443500 }, { "epoch": 6.56, "learning_rate": 3.3601533205804344e-05, "loss": 2.4979, "step": 3444000 }, { "epoch": 6.56, "learning_rate": 3.3599161090449046e-05, "loss": 2.5252, "step": 3444500 }, { "epoch": 6.56, "learning_rate": 3.359677944852605e-05, "loss": 2.5147, "step": 3445000 }, { "epoch": 6.56, "learning_rate": 3.359439780660306e-05, "loss": 2.5465, "step": 3445500 }, { "epoch": 6.57, "learning_rate": 3.359201616468006e-05, "loss": 2.5092, "step": 3446000 }, { "epoch": 6.57, "learning_rate": 3.358963452275707e-05, "loss": 2.5173, "step": 3446500 }, { "epoch": 6.57, "learning_rate": 3.358725764411792e-05, "loss": 2.5144, "step": 3447000 }, { "epoch": 6.57, "learning_rate": 3.358487600219492e-05, "loss": 2.5164, "step": 3447500 }, { "epoch": 6.57, "learning_rate": 3.358249436027193e-05, "loss": 2.5059, "step": 3448000 }, { "epoch": 6.57, "learning_rate": 3.358011271834893e-05, "loss": 2.4955, "step": 3448500 }, { "epoch": 6.57, "learning_rate": 3.357773107642594e-05, "loss": 2.5112, "step": 3449000 }, { "epoch": 6.57, "learning_rate": 3.357534943450294e-05, "loss": 2.5043, "step": 3449500 }, { "epoch": 6.57, "learning_rate": 3.3572972555863795e-05, "loss": 2.529, "step": 3450000 }, { "epoch": 6.57, "eval_accuracy": 0.5405826186518318, "eval_loss": 2.417257070541382, "eval_runtime": 4199.4919, "eval_samples_per_second": 65.481, "eval_steps_per_second": 6.548, "step": 3450000 }, { "epoch": 6.57, "learning_rate": 3.35705909139408e-05, "loss": 2.4895, "step": 3450500 }, { "epoch": 6.58, "learning_rate": 3.35682092720178e-05, "loss": 2.5357, "step": 3451000 }, { "epoch": 6.58, "learning_rate": 3.356582763009481e-05, "loss": 2.4889, "step": 3451500 }, { "epoch": 6.58, "learning_rate": 3.356344598817182e-05, "loss": 2.5128, "step": 3452000 }, { "epoch": 6.58, "learning_rate": 3.356106434624882e-05, "loss": 2.5107, "step": 3452500 }, { "epoch": 6.58, "learning_rate": 3.355868270432583e-05, "loss": 2.5222, "step": 3453000 }, { "epoch": 6.58, "learning_rate": 3.3556305825686676e-05, "loss": 2.5192, "step": 3453500 }, { "epoch": 6.58, "learning_rate": 3.355392418376368e-05, "loss": 2.52, "step": 3454000 }, { "epoch": 6.58, "learning_rate": 3.355154254184069e-05, "loss": 2.5066, "step": 3454500 }, { "epoch": 6.58, "learning_rate": 3.354916089991769e-05, "loss": 2.518, "step": 3455000 }, { "epoch": 6.58, "learning_rate": 3.35467792579947e-05, "loss": 2.5155, "step": 3455500 }, { "epoch": 6.58, "learning_rate": 3.35443976160717e-05, "loss": 2.5083, "step": 3456000 }, { "epoch": 6.59, "learning_rate": 3.3542015974148706e-05, "loss": 2.5068, "step": 3456500 }, { "epoch": 6.59, "learning_rate": 3.353963433222571e-05, "loss": 2.5207, "step": 3457000 }, { "epoch": 6.59, "learning_rate": 3.353725745358656e-05, "loss": 2.525, "step": 3457500 }, { "epoch": 6.59, "learning_rate": 3.353488057494741e-05, "loss": 2.5253, "step": 3458000 }, { "epoch": 6.59, "learning_rate": 3.353249893302442e-05, "loss": 2.4996, "step": 3458500 }, { "epoch": 6.59, "learning_rate": 3.3530117291101425e-05, "loss": 2.5117, "step": 3459000 }, { "epoch": 6.59, "learning_rate": 3.3527735649178434e-05, "loss": 2.5223, "step": 3459500 }, { "epoch": 6.59, "learning_rate": 3.352535400725543e-05, "loss": 2.5238, "step": 3460000 }, { "epoch": 6.59, "learning_rate": 3.352297236533244e-05, "loss": 2.5149, "step": 3460500 }, { "epoch": 6.59, "learning_rate": 3.352059072340944e-05, "loss": 2.4934, "step": 3461000 }, { "epoch": 6.6, "learning_rate": 3.351820908148645e-05, "loss": 2.4923, "step": 3461500 }, { "epoch": 6.6, "learning_rate": 3.3515832202847304e-05, "loss": 2.5166, "step": 3462000 }, { "epoch": 6.6, "learning_rate": 3.3513455324208156e-05, "loss": 2.5028, "step": 3462500 }, { "epoch": 6.6, "learning_rate": 3.351107368228516e-05, "loss": 2.548, "step": 3463000 }, { "epoch": 6.6, "learning_rate": 3.350869204036217e-05, "loss": 2.5122, "step": 3463500 }, { "epoch": 6.6, "learning_rate": 3.3506310398439164e-05, "loss": 2.5154, "step": 3464000 }, { "epoch": 6.6, "learning_rate": 3.3503928756516173e-05, "loss": 2.5174, "step": 3464500 }, { "epoch": 6.6, "learning_rate": 3.3501547114593176e-05, "loss": 2.52, "step": 3465000 }, { "epoch": 6.6, "learning_rate": 3.3499165472670186e-05, "loss": 2.5269, "step": 3465500 }, { "epoch": 6.6, "learning_rate": 3.349678383074719e-05, "loss": 2.5126, "step": 3466000 }, { "epoch": 6.6, "learning_rate": 3.349440695210804e-05, "loss": 2.5203, "step": 3466500 }, { "epoch": 6.61, "learning_rate": 3.349202531018504e-05, "loss": 2.5259, "step": 3467000 }, { "epoch": 6.61, "learning_rate": 3.348964366826205e-05, "loss": 2.5102, "step": 3467500 }, { "epoch": 6.61, "learning_rate": 3.3487262026339055e-05, "loss": 2.5213, "step": 3468000 }, { "epoch": 6.61, "learning_rate": 3.3484880384416064e-05, "loss": 2.514, "step": 3468500 }, { "epoch": 6.61, "learning_rate": 3.348249874249307e-05, "loss": 2.513, "step": 3469000 }, { "epoch": 6.61, "learning_rate": 3.348011710057007e-05, "loss": 2.5159, "step": 3469500 }, { "epoch": 6.61, "learning_rate": 3.347773545864707e-05, "loss": 2.5272, "step": 3470000 }, { "epoch": 6.61, "learning_rate": 3.347535381672408e-05, "loss": 2.5297, "step": 3470500 }, { "epoch": 6.61, "learning_rate": 3.3472972174801084e-05, "loss": 2.5024, "step": 3471000 }, { "epoch": 6.61, "learning_rate": 3.3470590532878094e-05, "loss": 2.5109, "step": 3471500 }, { "epoch": 6.62, "learning_rate": 3.3468208890955096e-05, "loss": 2.5028, "step": 3472000 }, { "epoch": 6.62, "learning_rate": 3.346583201231595e-05, "loss": 2.5178, "step": 3472500 }, { "epoch": 6.62, "learning_rate": 3.346345037039295e-05, "loss": 2.4998, "step": 3473000 }, { "epoch": 6.62, "learning_rate": 3.346106872846996e-05, "loss": 2.5048, "step": 3473500 }, { "epoch": 6.62, "learning_rate": 3.3458691849830806e-05, "loss": 2.5002, "step": 3474000 }, { "epoch": 6.62, "learning_rate": 3.3456310207907816e-05, "loss": 2.4928, "step": 3474500 }, { "epoch": 6.62, "learning_rate": 3.345392856598482e-05, "loss": 2.5399, "step": 3475000 }, { "epoch": 6.62, "learning_rate": 3.345154692406183e-05, "loss": 2.5244, "step": 3475500 }, { "epoch": 6.62, "learning_rate": 3.344916528213883e-05, "loss": 2.5225, "step": 3476000 }, { "epoch": 6.62, "learning_rate": 3.344678364021583e-05, "loss": 2.5102, "step": 3476500 }, { "epoch": 6.62, "learning_rate": 3.344440199829284e-05, "loss": 2.5217, "step": 3477000 }, { "epoch": 6.63, "learning_rate": 3.3442020356369845e-05, "loss": 2.5134, "step": 3477500 }, { "epoch": 6.63, "learning_rate": 3.34396434777307e-05, "loss": 2.5234, "step": 3478000 }, { "epoch": 6.63, "learning_rate": 3.343726183580771e-05, "loss": 2.5264, "step": 3478500 }, { "epoch": 6.63, "learning_rate": 3.34348801938847e-05, "loss": 2.5249, "step": 3479000 }, { "epoch": 6.63, "learning_rate": 3.343249855196171e-05, "loss": 2.53, "step": 3479500 }, { "epoch": 6.63, "learning_rate": 3.343012643660641e-05, "loss": 2.5116, "step": 3480000 }, { "epoch": 6.63, "eval_accuracy": 0.5406201676110463, "eval_loss": 2.416478157043457, "eval_runtime": 4195.4798, "eval_samples_per_second": 65.544, "eval_steps_per_second": 6.554, "step": 3480000 }, { "epoch": 6.63, "learning_rate": 3.3427744794683416e-05, "loss": 2.5162, "step": 3480500 }, { "epoch": 6.63, "learning_rate": 3.342536315276042e-05, "loss": 2.536, "step": 3481000 }, { "epoch": 6.63, "learning_rate": 3.342298151083743e-05, "loss": 2.5274, "step": 3481500 }, { "epoch": 6.63, "learning_rate": 3.3420604632198274e-05, "loss": 2.5105, "step": 3482000 }, { "epoch": 6.64, "learning_rate": 3.3418222990275283e-05, "loss": 2.5199, "step": 3482500 }, { "epoch": 6.64, "learning_rate": 3.3415841348352286e-05, "loss": 2.5277, "step": 3483000 }, { "epoch": 6.64, "learning_rate": 3.341345970642929e-05, "loss": 2.519, "step": 3483500 }, { "epoch": 6.64, "learning_rate": 3.34110780645063e-05, "loss": 2.5265, "step": 3484000 }, { "epoch": 6.64, "learning_rate": 3.340870118586715e-05, "loss": 2.5107, "step": 3484500 }, { "epoch": 6.64, "learning_rate": 3.340631954394415e-05, "loss": 2.5198, "step": 3485000 }, { "epoch": 6.64, "learning_rate": 3.340393790202116e-05, "loss": 2.5138, "step": 3485500 }, { "epoch": 6.64, "learning_rate": 3.3401556260098165e-05, "loss": 2.5149, "step": 3486000 }, { "epoch": 6.64, "learning_rate": 3.339917461817517e-05, "loss": 2.5195, "step": 3486500 }, { "epoch": 6.64, "learning_rate": 3.339679297625217e-05, "loss": 2.5187, "step": 3487000 }, { "epoch": 6.64, "learning_rate": 3.339441133432918e-05, "loss": 2.5047, "step": 3487500 }, { "epoch": 6.65, "learning_rate": 3.339202969240618e-05, "loss": 2.5043, "step": 3488000 }, { "epoch": 6.65, "learning_rate": 3.3389652813767035e-05, "loss": 2.5203, "step": 3488500 }, { "epoch": 6.65, "learning_rate": 3.3387271171844044e-05, "loss": 2.4942, "step": 3489000 }, { "epoch": 6.65, "learning_rate": 3.3384894293204896e-05, "loss": 2.512, "step": 3489500 }, { "epoch": 6.65, "learning_rate": 3.338251265128189e-05, "loss": 2.5277, "step": 3490000 }, { "epoch": 6.65, "learning_rate": 3.33801310093589e-05, "loss": 2.5335, "step": 3490500 }, { "epoch": 6.65, "learning_rate": 3.3377749367435904e-05, "loss": 2.5117, "step": 3491000 }, { "epoch": 6.65, "learning_rate": 3.3375367725512914e-05, "loss": 2.5175, "step": 3491500 }, { "epoch": 6.65, "learning_rate": 3.3372986083589916e-05, "loss": 2.5164, "step": 3492000 }, { "epoch": 6.65, "learning_rate": 3.3370604441666926e-05, "loss": 2.5134, "step": 3492500 }, { "epoch": 6.66, "learning_rate": 3.336822279974393e-05, "loss": 2.516, "step": 3493000 }, { "epoch": 6.66, "learning_rate": 3.3365845921104774e-05, "loss": 2.5356, "step": 3493500 }, { "epoch": 6.66, "learning_rate": 3.3363469042465626e-05, "loss": 2.502, "step": 3494000 }, { "epoch": 6.66, "learning_rate": 3.3361087400542635e-05, "loss": 2.5007, "step": 3494500 }, { "epoch": 6.66, "learning_rate": 3.335870575861964e-05, "loss": 2.5088, "step": 3495000 }, { "epoch": 6.66, "learning_rate": 3.335632411669665e-05, "loss": 2.5289, "step": 3495500 }, { "epoch": 6.66, "learning_rate": 3.335394247477365e-05, "loss": 2.5074, "step": 3496000 }, { "epoch": 6.66, "learning_rate": 3.335156083285065e-05, "loss": 2.5181, "step": 3496500 }, { "epoch": 6.66, "learning_rate": 3.334917919092766e-05, "loss": 2.5066, "step": 3497000 }, { "epoch": 6.66, "learning_rate": 3.3346797549004665e-05, "loss": 2.5276, "step": 3497500 }, { "epoch": 6.66, "learning_rate": 3.334442067036552e-05, "loss": 2.5124, "step": 3498000 }, { "epoch": 6.67, "learning_rate": 3.334204379172637e-05, "loss": 2.5141, "step": 3498500 }, { "epoch": 6.67, "learning_rate": 3.333966214980337e-05, "loss": 2.5043, "step": 3499000 }, { "epoch": 6.67, "learning_rate": 3.333728050788038e-05, "loss": 2.531, "step": 3499500 }, { "epoch": 6.67, "learning_rate": 3.3334898865957384e-05, "loss": 2.5096, "step": 3500000 }, { "epoch": 6.67, "learning_rate": 3.3332517224034387e-05, "loss": 2.5182, "step": 3500500 }, { "epoch": 6.67, "learning_rate": 3.333014034539524e-05, "loss": 2.5123, "step": 3501000 }, { "epoch": 6.67, "learning_rate": 3.332775870347225e-05, "loss": 2.5103, "step": 3501500 }, { "epoch": 6.67, "learning_rate": 3.332537706154925e-05, "loss": 2.4988, "step": 3502000 }, { "epoch": 6.67, "learning_rate": 3.332299541962626e-05, "loss": 2.5144, "step": 3502500 }, { "epoch": 6.67, "learning_rate": 3.3320613777703256e-05, "loss": 2.529, "step": 3503000 }, { "epoch": 6.68, "learning_rate": 3.331823689906411e-05, "loss": 2.5015, "step": 3503500 }, { "epoch": 6.68, "learning_rate": 3.331585525714112e-05, "loss": 2.5109, "step": 3504000 }, { "epoch": 6.68, "learning_rate": 3.331347361521812e-05, "loss": 2.5202, "step": 3504500 }, { "epoch": 6.68, "learning_rate": 3.331109197329513e-05, "loss": 2.4886, "step": 3505000 }, { "epoch": 6.68, "learning_rate": 3.330871033137213e-05, "loss": 2.5169, "step": 3505500 }, { "epoch": 6.68, "learning_rate": 3.330632868944914e-05, "loss": 2.5267, "step": 3506000 }, { "epoch": 6.68, "learning_rate": 3.330394704752614e-05, "loss": 2.507, "step": 3506500 }, { "epoch": 6.68, "learning_rate": 3.330157016888699e-05, "loss": 2.5256, "step": 3507000 }, { "epoch": 6.68, "learning_rate": 3.3299188526964e-05, "loss": 2.5229, "step": 3507500 }, { "epoch": 6.68, "learning_rate": 3.3296806885041e-05, "loss": 2.4987, "step": 3508000 }, { "epoch": 6.68, "learning_rate": 3.329442524311801e-05, "loss": 2.5277, "step": 3508500 }, { "epoch": 6.69, "learning_rate": 3.3292043601195014e-05, "loss": 2.511, "step": 3509000 }, { "epoch": 6.69, "learning_rate": 3.328966195927202e-05, "loss": 2.509, "step": 3509500 }, { "epoch": 6.69, "learning_rate": 3.3287280317349026e-05, "loss": 2.5147, "step": 3510000 }, { "epoch": 6.69, "eval_accuracy": 0.5409083451198603, "eval_loss": 2.4149062633514404, "eval_runtime": 4205.8133, "eval_samples_per_second": 65.383, "eval_steps_per_second": 6.538, "step": 3510000 }, { "epoch": 6.69, "learning_rate": 3.328489867542603e-05, "loss": 2.5189, "step": 3510500 }, { "epoch": 6.69, "learning_rate": 3.328252179678688e-05, "loss": 2.5284, "step": 3511000 }, { "epoch": 6.69, "learning_rate": 3.3280140154863884e-05, "loss": 2.5106, "step": 3511500 }, { "epoch": 6.69, "learning_rate": 3.327775851294089e-05, "loss": 2.4961, "step": 3512000 }, { "epoch": 6.69, "learning_rate": 3.3275376871017896e-05, "loss": 2.5122, "step": 3512500 }, { "epoch": 6.69, "learning_rate": 3.32729952290949e-05, "loss": 2.4959, "step": 3513000 }, { "epoch": 6.69, "learning_rate": 3.327061358717191e-05, "loss": 2.535, "step": 3513500 }, { "epoch": 6.7, "learning_rate": 3.326823194524891e-05, "loss": 2.5085, "step": 3514000 }, { "epoch": 6.7, "learning_rate": 3.326585030332592e-05, "loss": 2.4986, "step": 3514500 }, { "epoch": 6.7, "learning_rate": 3.326347342468677e-05, "loss": 2.5342, "step": 3515000 }, { "epoch": 6.7, "learning_rate": 3.3261091782763775e-05, "loss": 2.5096, "step": 3515500 }, { "epoch": 6.7, "learning_rate": 3.325871014084078e-05, "loss": 2.5007, "step": 3516000 }, { "epoch": 6.7, "learning_rate": 3.325632849891778e-05, "loss": 2.5136, "step": 3516500 }, { "epoch": 6.7, "learning_rate": 3.325395162027863e-05, "loss": 2.5154, "step": 3517000 }, { "epoch": 6.7, "learning_rate": 3.325156997835564e-05, "loss": 2.5209, "step": 3517500 }, { "epoch": 6.7, "learning_rate": 3.3249188336432644e-05, "loss": 2.4936, "step": 3518000 }, { "epoch": 6.7, "learning_rate": 3.3246806694509654e-05, "loss": 2.5232, "step": 3518500 }, { "epoch": 6.7, "learning_rate": 3.3244429815870506e-05, "loss": 2.5235, "step": 3519000 }, { "epoch": 6.71, "learning_rate": 3.32420481739475e-05, "loss": 2.5047, "step": 3519500 }, { "epoch": 6.71, "learning_rate": 3.323966653202451e-05, "loss": 2.4919, "step": 3520000 }, { "epoch": 6.71, "learning_rate": 3.3237284890101514e-05, "loss": 2.5208, "step": 3520500 }, { "epoch": 6.71, "learning_rate": 3.3234908011462366e-05, "loss": 2.5306, "step": 3521000 }, { "epoch": 6.71, "learning_rate": 3.3232526369539375e-05, "loss": 2.5137, "step": 3521500 }, { "epoch": 6.71, "learning_rate": 3.323014472761638e-05, "loss": 2.5067, "step": 3522000 }, { "epoch": 6.71, "learning_rate": 3.322776308569338e-05, "loss": 2.5216, "step": 3522500 }, { "epoch": 6.71, "learning_rate": 3.322538144377039e-05, "loss": 2.5013, "step": 3523000 }, { "epoch": 6.71, "learning_rate": 3.3223009328415085e-05, "loss": 2.4951, "step": 3523500 }, { "epoch": 6.71, "learning_rate": 3.322062768649209e-05, "loss": 2.5258, "step": 3524000 }, { "epoch": 6.72, "learning_rate": 3.32182460445691e-05, "loss": 2.4951, "step": 3524500 }, { "epoch": 6.72, "learning_rate": 3.32158644026461e-05, "loss": 2.5274, "step": 3525000 }, { "epoch": 6.72, "learning_rate": 3.321348276072311e-05, "loss": 2.5054, "step": 3525500 }, { "epoch": 6.72, "learning_rate": 3.321110111880011e-05, "loss": 2.5025, "step": 3526000 }, { "epoch": 6.72, "learning_rate": 3.3208719476877115e-05, "loss": 2.5033, "step": 3526500 }, { "epoch": 6.72, "learning_rate": 3.320634259823797e-05, "loss": 2.4978, "step": 3527000 }, { "epoch": 6.72, "learning_rate": 3.320396095631497e-05, "loss": 2.5256, "step": 3527500 }, { "epoch": 6.72, "learning_rate": 3.320157931439198e-05, "loss": 2.4953, "step": 3528000 }, { "epoch": 6.72, "learning_rate": 3.319919767246898e-05, "loss": 2.5203, "step": 3528500 }, { "epoch": 6.72, "learning_rate": 3.319681603054599e-05, "loss": 2.5115, "step": 3529000 }, { "epoch": 6.72, "learning_rate": 3.3194434388622994e-05, "loss": 2.5194, "step": 3529500 }, { "epoch": 6.73, "learning_rate": 3.3192052746699996e-05, "loss": 2.5088, "step": 3530000 }, { "epoch": 6.73, "learning_rate": 3.3189671104777006e-05, "loss": 2.5272, "step": 3530500 }, { "epoch": 6.73, "learning_rate": 3.318728946285401e-05, "loss": 2.5422, "step": 3531000 }, { "epoch": 6.73, "learning_rate": 3.318491258421486e-05, "loss": 2.5152, "step": 3531500 }, { "epoch": 6.73, "learning_rate": 3.318253094229187e-05, "loss": 2.514, "step": 3532000 }, { "epoch": 6.73, "learning_rate": 3.3180154063652715e-05, "loss": 2.5306, "step": 3532500 }, { "epoch": 6.73, "learning_rate": 3.317777242172972e-05, "loss": 2.5143, "step": 3533000 }, { "epoch": 6.73, "learning_rate": 3.317539077980673e-05, "loss": 2.5256, "step": 3533500 }, { "epoch": 6.73, "learning_rate": 3.317300913788373e-05, "loss": 2.5007, "step": 3534000 }, { "epoch": 6.73, "learning_rate": 3.317062749596074e-05, "loss": 2.5275, "step": 3534500 }, { "epoch": 6.74, "learning_rate": 3.316824585403774e-05, "loss": 2.502, "step": 3535000 }, { "epoch": 6.74, "learning_rate": 3.3165864212114745e-05, "loss": 2.5045, "step": 3535500 }, { "epoch": 6.74, "learning_rate": 3.316348257019175e-05, "loss": 2.5231, "step": 3536000 }, { "epoch": 6.74, "learning_rate": 3.31611056915526e-05, "loss": 2.5099, "step": 3536500 }, { "epoch": 6.74, "learning_rate": 3.315872881291345e-05, "loss": 2.5194, "step": 3537000 }, { "epoch": 6.74, "learning_rate": 3.315634717099046e-05, "loss": 2.5155, "step": 3537500 }, { "epoch": 6.74, "learning_rate": 3.3153965529067464e-05, "loss": 2.5019, "step": 3538000 }, { "epoch": 6.74, "learning_rate": 3.315158388714447e-05, "loss": 2.511, "step": 3538500 }, { "epoch": 6.74, "learning_rate": 3.3149202245221476e-05, "loss": 2.5217, "step": 3539000 }, { "epoch": 6.74, "learning_rate": 3.314682536658232e-05, "loss": 2.5095, "step": 3539500 }, { "epoch": 6.74, "learning_rate": 3.314444372465933e-05, "loss": 2.5124, "step": 3540000 }, { "epoch": 6.74, "eval_accuracy": 0.5408667330667916, "eval_loss": 2.4147274494171143, "eval_runtime": 4196.28, "eval_samples_per_second": 65.531, "eval_steps_per_second": 6.553, "step": 3540000 }, { "epoch": 6.75, "learning_rate": 3.3142062082736334e-05, "loss": 2.5026, "step": 3540500 }, { "epoch": 6.75, "learning_rate": 3.313968044081334e-05, "loss": 2.5255, "step": 3541000 }, { "epoch": 6.75, "learning_rate": 3.3137298798890346e-05, "loss": 2.5253, "step": 3541500 }, { "epoch": 6.75, "learning_rate": 3.3134917156967355e-05, "loss": 2.511, "step": 3542000 }, { "epoch": 6.75, "learning_rate": 3.313253551504436e-05, "loss": 2.5401, "step": 3542500 }, { "epoch": 6.75, "learning_rate": 3.313015387312136e-05, "loss": 2.5396, "step": 3543000 }, { "epoch": 6.75, "learning_rate": 3.312777699448221e-05, "loss": 2.5281, "step": 3543500 }, { "epoch": 6.75, "learning_rate": 3.312539535255922e-05, "loss": 2.4865, "step": 3544000 }, { "epoch": 6.75, "learning_rate": 3.3123013710636225e-05, "loss": 2.5231, "step": 3544500 }, { "epoch": 6.75, "learning_rate": 3.3120632068713234e-05, "loss": 2.5042, "step": 3545000 }, { "epoch": 6.76, "learning_rate": 3.311825519007408e-05, "loss": 2.5154, "step": 3545500 }, { "epoch": 6.76, "learning_rate": 3.311587831143493e-05, "loss": 2.5054, "step": 3546000 }, { "epoch": 6.76, "learning_rate": 3.3113496669511934e-05, "loss": 2.5022, "step": 3546500 }, { "epoch": 6.76, "learning_rate": 3.311111502758894e-05, "loss": 2.5105, "step": 3547000 }, { "epoch": 6.76, "learning_rate": 3.3108733385665946e-05, "loss": 2.5141, "step": 3547500 }, { "epoch": 6.76, "learning_rate": 3.3106351743742956e-05, "loss": 2.4996, "step": 3548000 }, { "epoch": 6.76, "learning_rate": 3.310397010181996e-05, "loss": 2.5281, "step": 3548500 }, { "epoch": 6.76, "learning_rate": 3.310159322318081e-05, "loss": 2.5224, "step": 3549000 }, { "epoch": 6.76, "learning_rate": 3.309921158125781e-05, "loss": 2.5201, "step": 3549500 }, { "epoch": 6.76, "learning_rate": 3.3096829939334816e-05, "loss": 2.5054, "step": 3550000 }, { "epoch": 6.76, "learning_rate": 3.3094448297411825e-05, "loss": 2.5286, "step": 3550500 }, { "epoch": 6.77, "learning_rate": 3.309206665548883e-05, "loss": 2.5192, "step": 3551000 }, { "epoch": 6.77, "learning_rate": 3.308968501356584e-05, "loss": 2.5057, "step": 3551500 }, { "epoch": 6.77, "learning_rate": 3.308730337164284e-05, "loss": 2.5187, "step": 3552000 }, { "epoch": 6.77, "learning_rate": 3.308492649300369e-05, "loss": 2.5142, "step": 3552500 }, { "epoch": 6.77, "learning_rate": 3.3082544851080695e-05, "loss": 2.5046, "step": 3553000 }, { "epoch": 6.77, "learning_rate": 3.30801632091577e-05, "loss": 2.5055, "step": 3553500 }, { "epoch": 6.77, "learning_rate": 3.307778156723471e-05, "loss": 2.4952, "step": 3554000 }, { "epoch": 6.77, "learning_rate": 3.307539992531171e-05, "loss": 2.5051, "step": 3554500 }, { "epoch": 6.77, "learning_rate": 3.307301828338872e-05, "loss": 2.5241, "step": 3555000 }, { "epoch": 6.77, "learning_rate": 3.307063664146572e-05, "loss": 2.5251, "step": 3555500 }, { "epoch": 6.78, "learning_rate": 3.3068254999542724e-05, "loss": 2.5058, "step": 3556000 }, { "epoch": 6.78, "learning_rate": 3.3065873357619734e-05, "loss": 2.5316, "step": 3556500 }, { "epoch": 6.78, "learning_rate": 3.306349647898058e-05, "loss": 2.5256, "step": 3557000 }, { "epoch": 6.78, "learning_rate": 3.306111483705759e-05, "loss": 2.5294, "step": 3557500 }, { "epoch": 6.78, "learning_rate": 3.30587331951346e-05, "loss": 2.5223, "step": 3558000 }, { "epoch": 6.78, "learning_rate": 3.30563515532116e-05, "loss": 2.5164, "step": 3558500 }, { "epoch": 6.78, "learning_rate": 3.3053974674572446e-05, "loss": 2.5238, "step": 3559000 }, { "epoch": 6.78, "learning_rate": 3.3051593032649456e-05, "loss": 2.4958, "step": 3559500 }, { "epoch": 6.78, "learning_rate": 3.30492161540103e-05, "loss": 2.5083, "step": 3560000 }, { "epoch": 6.78, "learning_rate": 3.304683451208731e-05, "loss": 2.544, "step": 3560500 }, { "epoch": 6.78, "learning_rate": 3.304445287016431e-05, "loss": 2.5318, "step": 3561000 }, { "epoch": 6.79, "learning_rate": 3.304207122824132e-05, "loss": 2.5246, "step": 3561500 }, { "epoch": 6.79, "learning_rate": 3.303968958631833e-05, "loss": 2.5135, "step": 3562000 }, { "epoch": 6.79, "learning_rate": 3.303730794439533e-05, "loss": 2.5105, "step": 3562500 }, { "epoch": 6.79, "learning_rate": 3.303492630247234e-05, "loss": 2.5089, "step": 3563000 }, { "epoch": 6.79, "learning_rate": 3.303254466054934e-05, "loss": 2.5265, "step": 3563500 }, { "epoch": 6.79, "learning_rate": 3.303016778191019e-05, "loss": 2.5349, "step": 3564000 }, { "epoch": 6.79, "learning_rate": 3.30277861399872e-05, "loss": 2.5357, "step": 3564500 }, { "epoch": 6.79, "learning_rate": 3.3025409261348054e-05, "loss": 2.5096, "step": 3565000 }, { "epoch": 6.79, "learning_rate": 3.3023027619425056e-05, "loss": 2.517, "step": 3565500 }, { "epoch": 6.79, "learning_rate": 3.302064597750206e-05, "loss": 2.4976, "step": 3566000 }, { "epoch": 6.8, "learning_rate": 3.301826433557906e-05, "loss": 2.5286, "step": 3566500 }, { "epoch": 6.8, "learning_rate": 3.301588269365607e-05, "loss": 2.5229, "step": 3567000 }, { "epoch": 6.8, "learning_rate": 3.3013501051733074e-05, "loss": 2.5222, "step": 3567500 }, { "epoch": 6.8, "learning_rate": 3.301111940981008e-05, "loss": 2.5156, "step": 3568000 }, { "epoch": 6.8, "learning_rate": 3.3008742531170935e-05, "loss": 2.5099, "step": 3568500 }, { "epoch": 6.8, "learning_rate": 3.300636088924793e-05, "loss": 2.5154, "step": 3569000 }, { "epoch": 6.8, "learning_rate": 3.300397924732494e-05, "loss": 2.5109, "step": 3569500 }, { "epoch": 6.8, "learning_rate": 3.300160236868579e-05, "loss": 2.5286, "step": 3570000 }, { "epoch": 6.8, "eval_accuracy": 0.5411871025073149, "eval_loss": 2.4144821166992188, "eval_runtime": 4188.724, "eval_samples_per_second": 65.65, "eval_steps_per_second": 6.565, "step": 3570000 }, { "epoch": 6.8, "learning_rate": 3.2999220726762795e-05, "loss": 2.5066, "step": 3570500 }, { "epoch": 6.8, "learning_rate": 3.2996839084839805e-05, "loss": 2.5221, "step": 3571000 }, { "epoch": 6.8, "learning_rate": 3.299445744291681e-05, "loss": 2.5248, "step": 3571500 }, { "epoch": 6.81, "learning_rate": 3.299207580099382e-05, "loss": 2.5272, "step": 3572000 }, { "epoch": 6.81, "learning_rate": 3.298969415907082e-05, "loss": 2.518, "step": 3572500 }, { "epoch": 6.81, "learning_rate": 3.298731251714782e-05, "loss": 2.5114, "step": 3573000 }, { "epoch": 6.81, "learning_rate": 3.2984935638508674e-05, "loss": 2.5216, "step": 3573500 }, { "epoch": 6.81, "learning_rate": 3.298255399658568e-05, "loss": 2.5102, "step": 3574000 }, { "epoch": 6.81, "learning_rate": 3.2980172354662686e-05, "loss": 2.525, "step": 3574500 }, { "epoch": 6.81, "learning_rate": 3.297779071273969e-05, "loss": 2.5168, "step": 3575000 }, { "epoch": 6.81, "learning_rate": 3.297540907081669e-05, "loss": 2.5056, "step": 3575500 }, { "epoch": 6.81, "learning_rate": 3.29730274288937e-05, "loss": 2.5208, "step": 3576000 }, { "epoch": 6.81, "learning_rate": 3.2970645786970704e-05, "loss": 2.5091, "step": 3576500 }, { "epoch": 6.82, "learning_rate": 3.296826414504771e-05, "loss": 2.5217, "step": 3577000 }, { "epoch": 6.82, "learning_rate": 3.2965882503124716e-05, "loss": 2.5026, "step": 3577500 }, { "epoch": 6.82, "learning_rate": 3.296350086120172e-05, "loss": 2.5282, "step": 3578000 }, { "epoch": 6.82, "learning_rate": 3.296111921927872e-05, "loss": 2.5225, "step": 3578500 }, { "epoch": 6.82, "learning_rate": 3.295873757735573e-05, "loss": 2.5136, "step": 3579000 }, { "epoch": 6.82, "learning_rate": 3.295636069871658e-05, "loss": 2.5188, "step": 3579500 }, { "epoch": 6.82, "learning_rate": 3.2953983820077435e-05, "loss": 2.4976, "step": 3580000 }, { "epoch": 6.82, "learning_rate": 3.295160217815444e-05, "loss": 2.5171, "step": 3580500 }, { "epoch": 6.82, "learning_rate": 3.294922053623145e-05, "loss": 2.5086, "step": 3581000 }, { "epoch": 6.82, "learning_rate": 3.294683889430845e-05, "loss": 2.5222, "step": 3581500 }, { "epoch": 6.82, "learning_rate": 3.294445725238545e-05, "loss": 2.4926, "step": 3582000 }, { "epoch": 6.83, "learning_rate": 3.294207561046246e-05, "loss": 2.5017, "step": 3582500 }, { "epoch": 6.83, "learning_rate": 3.293969873182331e-05, "loss": 2.4893, "step": 3583000 }, { "epoch": 6.83, "learning_rate": 3.2937317089900317e-05, "loss": 2.5057, "step": 3583500 }, { "epoch": 6.83, "learning_rate": 3.293493544797732e-05, "loss": 2.5093, "step": 3584000 }, { "epoch": 6.83, "learning_rate": 3.293255380605433e-05, "loss": 2.5275, "step": 3584500 }, { "epoch": 6.83, "learning_rate": 3.293017692741518e-05, "loss": 2.5142, "step": 3585000 }, { "epoch": 6.83, "learning_rate": 3.292779528549218e-05, "loss": 2.524, "step": 3585500 }, { "epoch": 6.83, "learning_rate": 3.2925413643569186e-05, "loss": 2.514, "step": 3586000 }, { "epoch": 6.83, "learning_rate": 3.2923032001646196e-05, "loss": 2.5171, "step": 3586500 }, { "epoch": 6.83, "learning_rate": 3.29206503597232e-05, "loss": 2.5331, "step": 3587000 }, { "epoch": 6.84, "learning_rate": 3.291826871780021e-05, "loss": 2.501, "step": 3587500 }, { "epoch": 6.84, "learning_rate": 3.2915887075877203e-05, "loss": 2.5194, "step": 3588000 }, { "epoch": 6.84, "learning_rate": 3.291350543395421e-05, "loss": 2.5107, "step": 3588500 }, { "epoch": 6.84, "learning_rate": 3.2911128555315065e-05, "loss": 2.481, "step": 3589000 }, { "epoch": 6.84, "learning_rate": 3.290874691339207e-05, "loss": 2.4982, "step": 3589500 }, { "epoch": 6.84, "learning_rate": 3.290636527146908e-05, "loss": 2.5211, "step": 3590000 }, { "epoch": 6.84, "learning_rate": 3.290398839282993e-05, "loss": 2.51, "step": 3590500 }, { "epoch": 6.84, "learning_rate": 3.290160675090693e-05, "loss": 2.519, "step": 3591000 }, { "epoch": 6.84, "learning_rate": 3.289922510898394e-05, "loss": 2.5219, "step": 3591500 }, { "epoch": 6.84, "learning_rate": 3.289684346706094e-05, "loss": 2.5267, "step": 3592000 }, { "epoch": 6.84, "learning_rate": 3.289446182513795e-05, "loss": 2.5302, "step": 3592500 }, { "epoch": 6.85, "learning_rate": 3.289208018321495e-05, "loss": 2.5111, "step": 3593000 }, { "epoch": 6.85, "learning_rate": 3.288969854129196e-05, "loss": 2.5126, "step": 3593500 }, { "epoch": 6.85, "learning_rate": 3.288731689936896e-05, "loss": 2.5323, "step": 3594000 }, { "epoch": 6.85, "learning_rate": 3.2884935257445964e-05, "loss": 2.4978, "step": 3594500 }, { "epoch": 6.85, "learning_rate": 3.2882558378806816e-05, "loss": 2.509, "step": 3595000 }, { "epoch": 6.85, "learning_rate": 3.288017673688382e-05, "loss": 2.5194, "step": 3595500 }, { "epoch": 6.85, "learning_rate": 3.287779509496083e-05, "loss": 2.5177, "step": 3596000 }, { "epoch": 6.85, "learning_rate": 3.287541345303784e-05, "loss": 2.5136, "step": 3596500 }, { "epoch": 6.85, "learning_rate": 3.287303181111484e-05, "loss": 2.5202, "step": 3597000 }, { "epoch": 6.85, "learning_rate": 3.287065016919184e-05, "loss": 2.5435, "step": 3597500 }, { "epoch": 6.86, "learning_rate": 3.2868273290552695e-05, "loss": 2.5007, "step": 3598000 }, { "epoch": 6.86, "learning_rate": 3.28658916486297e-05, "loss": 2.5349, "step": 3598500 }, { "epoch": 6.86, "learning_rate": 3.286351000670671e-05, "loss": 2.4991, "step": 3599000 }, { "epoch": 6.86, "learning_rate": 3.286112836478371e-05, "loss": 2.5286, "step": 3599500 }, { "epoch": 6.86, "learning_rate": 3.285874672286072e-05, "loss": 2.5133, "step": 3600000 }, { "epoch": 6.86, "eval_accuracy": 0.5411024813794061, "eval_loss": 2.412621259689331, "eval_runtime": 4197.4837, "eval_samples_per_second": 65.513, "eval_steps_per_second": 6.551, "step": 3600000 }, { "epoch": 6.86, "learning_rate": 3.285636984422157e-05, "loss": 2.5169, "step": 3600500 }, { "epoch": 6.86, "learning_rate": 3.2853988202298574e-05, "loss": 2.5216, "step": 3601000 }, { "epoch": 6.86, "learning_rate": 3.285160656037558e-05, "loss": 2.5312, "step": 3601500 }, { "epoch": 6.86, "learning_rate": 3.284922491845258e-05, "loss": 2.5169, "step": 3602000 }, { "epoch": 6.86, "learning_rate": 3.284684327652959e-05, "loss": 2.524, "step": 3602500 }, { "epoch": 6.86, "learning_rate": 3.284446163460659e-05, "loss": 2.5222, "step": 3603000 }, { "epoch": 6.87, "learning_rate": 3.28420799926836e-05, "loss": 2.5419, "step": 3603500 }, { "epoch": 6.87, "learning_rate": 3.283970311404445e-05, "loss": 2.5141, "step": 3604000 }, { "epoch": 6.87, "learning_rate": 3.283732147212145e-05, "loss": 2.513, "step": 3604500 }, { "epoch": 6.87, "learning_rate": 3.283493983019846e-05, "loss": 2.5068, "step": 3605000 }, { "epoch": 6.87, "learning_rate": 3.283255818827546e-05, "loss": 2.5144, "step": 3605500 }, { "epoch": 6.87, "learning_rate": 3.283017654635247e-05, "loss": 2.5145, "step": 3606000 }, { "epoch": 6.87, "learning_rate": 3.282779490442947e-05, "loss": 2.5063, "step": 3606500 }, { "epoch": 6.87, "learning_rate": 3.2825413262506476e-05, "loss": 2.5186, "step": 3607000 }, { "epoch": 6.87, "learning_rate": 3.2823031620583485e-05, "loss": 2.5175, "step": 3607500 }, { "epoch": 6.87, "learning_rate": 3.282065474194434e-05, "loss": 2.5116, "step": 3608000 }, { "epoch": 6.88, "learning_rate": 3.281827310002134e-05, "loss": 2.5024, "step": 3608500 }, { "epoch": 6.88, "learning_rate": 3.281589145809835e-05, "loss": 2.5152, "step": 3609000 }, { "epoch": 6.88, "learning_rate": 3.281350981617535e-05, "loss": 2.4994, "step": 3609500 }, { "epoch": 6.88, "learning_rate": 3.281113770082005e-05, "loss": 2.5122, "step": 3610000 }, { "epoch": 6.88, "learning_rate": 3.280875605889706e-05, "loss": 2.5103, "step": 3610500 }, { "epoch": 6.88, "learning_rate": 3.280637441697406e-05, "loss": 2.5096, "step": 3611000 }, { "epoch": 6.88, "learning_rate": 3.280399277505106e-05, "loss": 2.5089, "step": 3611500 }, { "epoch": 6.88, "learning_rate": 3.280161113312807e-05, "loss": 2.5193, "step": 3612000 }, { "epoch": 6.88, "learning_rate": 3.2799229491205074e-05, "loss": 2.5009, "step": 3612500 }, { "epoch": 6.88, "learning_rate": 3.279684784928208e-05, "loss": 2.4902, "step": 3613000 }, { "epoch": 6.88, "learning_rate": 3.2794466207359086e-05, "loss": 2.5299, "step": 3613500 }, { "epoch": 6.89, "learning_rate": 3.279208932871994e-05, "loss": 2.5011, "step": 3614000 }, { "epoch": 6.89, "learning_rate": 3.278970768679694e-05, "loss": 2.5096, "step": 3614500 }, { "epoch": 6.89, "learning_rate": 3.2787326044873944e-05, "loss": 2.5236, "step": 3615000 }, { "epoch": 6.89, "learning_rate": 3.278494440295095e-05, "loss": 2.5234, "step": 3615500 }, { "epoch": 6.89, "learning_rate": 3.2782562761027956e-05, "loss": 2.5182, "step": 3616000 }, { "epoch": 6.89, "learning_rate": 3.278018588238881e-05, "loss": 2.5299, "step": 3616500 }, { "epoch": 6.89, "learning_rate": 3.277780424046582e-05, "loss": 2.5036, "step": 3617000 }, { "epoch": 6.89, "learning_rate": 3.277542259854281e-05, "loss": 2.5114, "step": 3617500 }, { "epoch": 6.89, "learning_rate": 3.277304095661982e-05, "loss": 2.4991, "step": 3618000 }, { "epoch": 6.89, "learning_rate": 3.2770659314696825e-05, "loss": 2.5264, "step": 3618500 }, { "epoch": 6.9, "learning_rate": 3.2768277672773835e-05, "loss": 2.4982, "step": 3619000 }, { "epoch": 6.9, "learning_rate": 3.276589603085084e-05, "loss": 2.5056, "step": 3619500 }, { "epoch": 6.9, "learning_rate": 3.2763514388927847e-05, "loss": 2.5206, "step": 3620000 }, { "epoch": 6.9, "learning_rate": 3.276113751028869e-05, "loss": 2.5126, "step": 3620500 }, { "epoch": 6.9, "learning_rate": 3.27587558683657e-05, "loss": 2.5227, "step": 3621000 }, { "epoch": 6.9, "learning_rate": 3.2756374226442704e-05, "loss": 2.4947, "step": 3621500 }, { "epoch": 6.9, "learning_rate": 3.2753992584519714e-05, "loss": 2.5052, "step": 3622000 }, { "epoch": 6.9, "learning_rate": 3.275161570588056e-05, "loss": 2.5211, "step": 3622500 }, { "epoch": 6.9, "learning_rate": 3.274923406395757e-05, "loss": 2.5108, "step": 3623000 }, { "epoch": 6.9, "learning_rate": 3.274685242203457e-05, "loss": 2.5122, "step": 3623500 }, { "epoch": 6.9, "learning_rate": 3.274447554339542e-05, "loss": 2.5008, "step": 3624000 }, { "epoch": 6.91, "learning_rate": 3.2742093901472426e-05, "loss": 2.5139, "step": 3624500 }, { "epoch": 6.91, "learning_rate": 3.2739712259549435e-05, "loss": 2.5088, "step": 3625000 }, { "epoch": 6.91, "learning_rate": 3.273733061762644e-05, "loss": 2.5079, "step": 3625500 }, { "epoch": 6.91, "learning_rate": 3.273494897570345e-05, "loss": 2.5127, "step": 3626000 }, { "epoch": 6.91, "learning_rate": 3.273256733378045e-05, "loss": 2.5005, "step": 3626500 }, { "epoch": 6.91, "learning_rate": 3.273018569185745e-05, "loss": 2.5184, "step": 3627000 }, { "epoch": 6.91, "learning_rate": 3.2727804049934455e-05, "loss": 2.4984, "step": 3627500 }, { "epoch": 6.91, "learning_rate": 3.272542717129531e-05, "loss": 2.4901, "step": 3628000 }, { "epoch": 6.91, "learning_rate": 3.272304552937232e-05, "loss": 2.5097, "step": 3628500 }, { "epoch": 6.91, "learning_rate": 3.272066388744932e-05, "loss": 2.5177, "step": 3629000 }, { "epoch": 6.92, "learning_rate": 3.271828700881017e-05, "loss": 2.4977, "step": 3629500 }, { "epoch": 6.92, "learning_rate": 3.271590536688718e-05, "loss": 2.5085, "step": 3630000 }, { "epoch": 6.92, "eval_accuracy": 0.5411880018622975, "eval_loss": 2.4133033752441406, "eval_runtime": 4199.785, "eval_samples_per_second": 65.477, "eval_steps_per_second": 6.548, "step": 3630000 }, { "epoch": 6.92, "learning_rate": 3.271352372496418e-05, "loss": 2.5042, "step": 3630500 }, { "epoch": 6.92, "learning_rate": 3.2711142083041187e-05, "loss": 2.4981, "step": 3631000 }, { "epoch": 6.92, "learning_rate": 3.270876044111819e-05, "loss": 2.5063, "step": 3631500 }, { "epoch": 6.92, "learning_rate": 3.270638356247904e-05, "loss": 2.5005, "step": 3632000 }, { "epoch": 6.92, "learning_rate": 3.270400192055605e-05, "loss": 2.5166, "step": 3632500 }, { "epoch": 6.92, "learning_rate": 3.2701620278633053e-05, "loss": 2.5278, "step": 3633000 }, { "epoch": 6.92, "learning_rate": 3.269923863671006e-05, "loss": 2.5279, "step": 3633500 }, { "epoch": 6.92, "learning_rate": 3.269685699478706e-05, "loss": 2.5136, "step": 3634000 }, { "epoch": 6.92, "learning_rate": 3.269447535286407e-05, "loss": 2.5065, "step": 3634500 }, { "epoch": 6.93, "learning_rate": 3.269209371094107e-05, "loss": 2.5061, "step": 3635000 }, { "epoch": 6.93, "learning_rate": 3.268971206901808e-05, "loss": 2.5022, "step": 3635500 }, { "epoch": 6.93, "learning_rate": 3.268733519037893e-05, "loss": 2.5126, "step": 3636000 }, { "epoch": 6.93, "learning_rate": 3.2684958311739785e-05, "loss": 2.526, "step": 3636500 }, { "epoch": 6.93, "learning_rate": 3.268257666981679e-05, "loss": 2.5129, "step": 3637000 }, { "epoch": 6.93, "learning_rate": 3.268019502789379e-05, "loss": 2.5129, "step": 3637500 }, { "epoch": 6.93, "learning_rate": 3.267781338597079e-05, "loss": 2.5037, "step": 3638000 }, { "epoch": 6.93, "learning_rate": 3.26754317440478e-05, "loss": 2.5233, "step": 3638500 }, { "epoch": 6.93, "learning_rate": 3.267305010212481e-05, "loss": 2.5235, "step": 3639000 }, { "epoch": 6.93, "learning_rate": 3.2670668460201814e-05, "loss": 2.5141, "step": 3639500 }, { "epoch": 6.94, "learning_rate": 3.266828681827882e-05, "loss": 2.4934, "step": 3640000 }, { "epoch": 6.94, "learning_rate": 3.266590517635582e-05, "loss": 2.5188, "step": 3640500 }, { "epoch": 6.94, "learning_rate": 3.266352829771667e-05, "loss": 2.5083, "step": 3641000 }, { "epoch": 6.94, "learning_rate": 3.2661151419077524e-05, "loss": 2.5239, "step": 3641500 }, { "epoch": 6.94, "learning_rate": 3.2658769777154526e-05, "loss": 2.5158, "step": 3642000 }, { "epoch": 6.94, "learning_rate": 3.2656388135231536e-05, "loss": 2.5155, "step": 3642500 }, { "epoch": 6.94, "learning_rate": 3.2654006493308545e-05, "loss": 2.5238, "step": 3643000 }, { "epoch": 6.94, "learning_rate": 3.265162485138555e-05, "loss": 2.496, "step": 3643500 }, { "epoch": 6.94, "learning_rate": 3.264924320946255e-05, "loss": 2.5329, "step": 3644000 }, { "epoch": 6.94, "learning_rate": 3.264686156753955e-05, "loss": 2.4883, "step": 3644500 }, { "epoch": 6.94, "learning_rate": 3.264447992561656e-05, "loss": 2.5079, "step": 3645000 }, { "epoch": 6.95, "learning_rate": 3.2642098283693565e-05, "loss": 2.5162, "step": 3645500 }, { "epoch": 6.95, "learning_rate": 3.263972140505442e-05, "loss": 2.5138, "step": 3646000 }, { "epoch": 6.95, "learning_rate": 3.263733976313143e-05, "loss": 2.5053, "step": 3646500 }, { "epoch": 6.95, "learning_rate": 3.263495812120842e-05, "loss": 2.5088, "step": 3647000 }, { "epoch": 6.95, "learning_rate": 3.2632581242569275e-05, "loss": 2.5207, "step": 3647500 }, { "epoch": 6.95, "learning_rate": 3.2630199600646284e-05, "loss": 2.5026, "step": 3648000 }, { "epoch": 6.95, "learning_rate": 3.262781795872329e-05, "loss": 2.5091, "step": 3648500 }, { "epoch": 6.95, "learning_rate": 3.2625436316800296e-05, "loss": 2.5072, "step": 3649000 }, { "epoch": 6.95, "learning_rate": 3.26230546748773e-05, "loss": 2.5195, "step": 3649500 }, { "epoch": 6.95, "learning_rate": 3.26206730329543e-05, "loss": 2.4977, "step": 3650000 }, { "epoch": 6.96, "learning_rate": 3.261829139103131e-05, "loss": 2.5138, "step": 3650500 }, { "epoch": 6.96, "learning_rate": 3.2615909749108314e-05, "loss": 2.4918, "step": 3651000 }, { "epoch": 6.96, "learning_rate": 3.2613532870469166e-05, "loss": 2.5052, "step": 3651500 }, { "epoch": 6.96, "learning_rate": 3.261115599183002e-05, "loss": 2.5262, "step": 3652000 }, { "epoch": 6.96, "learning_rate": 3.260877434990702e-05, "loss": 2.5074, "step": 3652500 }, { "epoch": 6.96, "learning_rate": 3.260639270798403e-05, "loss": 2.512, "step": 3653000 }, { "epoch": 6.96, "learning_rate": 3.260401106606103e-05, "loss": 2.5209, "step": 3653500 }, { "epoch": 6.96, "learning_rate": 3.2601629424138036e-05, "loss": 2.4955, "step": 3654000 }, { "epoch": 6.96, "learning_rate": 3.2599247782215045e-05, "loss": 2.4898, "step": 3654500 }, { "epoch": 6.96, "learning_rate": 3.259687090357589e-05, "loss": 2.516, "step": 3655000 }, { "epoch": 6.96, "learning_rate": 3.25944892616529e-05, "loss": 2.5071, "step": 3655500 }, { "epoch": 6.97, "learning_rate": 3.25921076197299e-05, "loss": 2.5326, "step": 3656000 }, { "epoch": 6.97, "learning_rate": 3.258972597780691e-05, "loss": 2.5208, "step": 3656500 }, { "epoch": 6.97, "learning_rate": 3.2587344335883915e-05, "loss": 2.5201, "step": 3657000 }, { "epoch": 6.97, "learning_rate": 3.258496269396092e-05, "loss": 2.5212, "step": 3657500 }, { "epoch": 6.97, "learning_rate": 3.2582581052037927e-05, "loss": 2.5371, "step": 3658000 }, { "epoch": 6.97, "learning_rate": 3.258019941011493e-05, "loss": 2.5133, "step": 3658500 }, { "epoch": 6.97, "learning_rate": 3.257782253147578e-05, "loss": 2.5237, "step": 3659000 }, { "epoch": 6.97, "learning_rate": 3.257544088955279e-05, "loss": 2.5139, "step": 3659500 }, { "epoch": 6.97, "learning_rate": 3.257305924762979e-05, "loss": 2.5094, "step": 3660000 }, { "epoch": 6.97, "eval_accuracy": 0.5412268980766046, "eval_loss": 2.4130136966705322, "eval_runtime": 4193.1691, "eval_samples_per_second": 65.58, "eval_steps_per_second": 6.558, "step": 3660000 }, { "epoch": 6.97, "learning_rate": 3.2570677605706796e-05, "loss": 2.5129, "step": 3660500 }, { "epoch": 6.98, "learning_rate": 3.256830072706765e-05, "loss": 2.5109, "step": 3661000 }, { "epoch": 6.98, "learning_rate": 3.25659238484285e-05, "loss": 2.5231, "step": 3661500 }, { "epoch": 6.98, "learning_rate": 3.25635422065055e-05, "loss": 2.5052, "step": 3662000 }, { "epoch": 6.98, "learning_rate": 3.256116056458251e-05, "loss": 2.4999, "step": 3662500 }, { "epoch": 6.98, "learning_rate": 3.2558778922659515e-05, "loss": 2.5097, "step": 3663000 }, { "epoch": 6.98, "learning_rate": 3.255639728073652e-05, "loss": 2.505, "step": 3663500 }, { "epoch": 6.98, "learning_rate": 3.255401563881352e-05, "loss": 2.5105, "step": 3664000 }, { "epoch": 6.98, "learning_rate": 3.255163399689053e-05, "loss": 2.5194, "step": 3664500 }, { "epoch": 6.98, "learning_rate": 3.254925235496753e-05, "loss": 2.5039, "step": 3665000 }, { "epoch": 6.98, "learning_rate": 3.2546880239612235e-05, "loss": 2.5202, "step": 3665500 }, { "epoch": 6.98, "learning_rate": 3.254449859768924e-05, "loss": 2.4776, "step": 3666000 }, { "epoch": 6.99, "learning_rate": 3.254211695576625e-05, "loss": 2.5071, "step": 3666500 }, { "epoch": 6.99, "learning_rate": 3.253973531384325e-05, "loss": 2.5015, "step": 3667000 }, { "epoch": 6.99, "learning_rate": 3.253735367192025e-05, "loss": 2.4993, "step": 3667500 }, { "epoch": 6.99, "learning_rate": 3.2534976793281104e-05, "loss": 2.5026, "step": 3668000 }, { "epoch": 6.99, "learning_rate": 3.253259515135811e-05, "loss": 2.5154, "step": 3668500 }, { "epoch": 6.99, "learning_rate": 3.2530213509435116e-05, "loss": 2.5132, "step": 3669000 }, { "epoch": 6.99, "learning_rate": 3.252783186751212e-05, "loss": 2.5056, "step": 3669500 }, { "epoch": 6.99, "learning_rate": 3.252545022558913e-05, "loss": 2.5139, "step": 3670000 }, { "epoch": 6.99, "learning_rate": 3.252306858366613e-05, "loss": 2.505, "step": 3670500 }, { "epoch": 6.99, "learning_rate": 3.2520686941743133e-05, "loss": 2.5123, "step": 3671000 }, { "epoch": 7.0, "learning_rate": 3.251830529982014e-05, "loss": 2.5064, "step": 3671500 }, { "epoch": 7.0, "learning_rate": 3.251592842118099e-05, "loss": 2.519, "step": 3672000 }, { "epoch": 7.0, "learning_rate": 3.251355154254184e-05, "loss": 2.5049, "step": 3672500 }, { "epoch": 7.0, "learning_rate": 3.251116990061885e-05, "loss": 2.5228, "step": 3673000 }, { "epoch": 7.0, "learning_rate": 3.250878825869585e-05, "loss": 2.5082, "step": 3673500 } ], "max_steps": 10496960, "num_train_epochs": 20, "total_flos": 1.9199378450939904e+19, "trial_name": null, "trial_params": null }