{ "best_metric": 3.5057616233825684, "best_model_checkpoint": "MIReADNeuro_3e-05/checkpoint-66500", "epoch": 6.0, "global_step": 99750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 2.9849624060150376e-05, "loss": 7.0752, "step": 500 }, { "epoch": 0.06, "learning_rate": 2.9699248120300752e-05, "loss": 6.588, "step": 1000 }, { "epoch": 0.09, "learning_rate": 2.9548872180451128e-05, "loss": 6.3893, "step": 1500 }, { "epoch": 0.12, "learning_rate": 2.9398496240601503e-05, "loss": 6.1736, "step": 2000 }, { "epoch": 0.15, "learning_rate": 2.924812030075188e-05, "loss": 6.0491, "step": 2500 }, { "epoch": 0.18, "learning_rate": 2.9097744360902258e-05, "loss": 5.8392, "step": 3000 }, { "epoch": 0.21, "learning_rate": 2.8947368421052634e-05, "loss": 5.7665, "step": 3500 }, { "epoch": 0.24, "learning_rate": 2.879699248120301e-05, "loss": 5.6406, "step": 4000 }, { "epoch": 0.27, "learning_rate": 2.8646616541353385e-05, "loss": 5.4408, "step": 4500 }, { "epoch": 0.3, "learning_rate": 2.849624060150376e-05, "loss": 5.381, "step": 5000 }, { "epoch": 0.33, "learning_rate": 2.8345864661654136e-05, "loss": 5.2809, "step": 5500 }, { "epoch": 0.36, "learning_rate": 2.819548872180451e-05, "loss": 5.1722, "step": 6000 }, { "epoch": 0.39, "learning_rate": 2.8045112781954887e-05, "loss": 5.2068, "step": 6500 }, { "epoch": 0.42, "learning_rate": 2.7894736842105263e-05, "loss": 5.0432, "step": 7000 }, { "epoch": 0.45, "learning_rate": 2.774436090225564e-05, "loss": 4.9497, "step": 7500 }, { "epoch": 0.48, "learning_rate": 2.7593984962406017e-05, "loss": 4.9384, "step": 8000 }, { "epoch": 0.51, "learning_rate": 2.7443609022556393e-05, "loss": 4.847, "step": 8500 }, { "epoch": 0.54, "learning_rate": 2.729323308270677e-05, "loss": 4.8078, "step": 9000 }, { "epoch": 0.57, "learning_rate": 2.7142857142857144e-05, "loss": 4.7734, "step": 9500 }, { "epoch": 0.6, "learning_rate": 2.699248120300752e-05, "loss": 4.6804, "step": 10000 }, { "epoch": 0.63, "learning_rate": 2.6842105263157896e-05, "loss": 4.6768, "step": 10500 }, { "epoch": 0.66, "learning_rate": 2.669172932330827e-05, "loss": 4.6783, "step": 11000 }, { "epoch": 0.69, "learning_rate": 2.6541353383458647e-05, "loss": 4.5171, "step": 11500 }, { "epoch": 0.72, "learning_rate": 2.6390977443609022e-05, "loss": 4.5836, "step": 12000 }, { "epoch": 0.75, "learning_rate": 2.6240601503759398e-05, "loss": 4.4883, "step": 12500 }, { "epoch": 0.78, "learning_rate": 2.6090225563909774e-05, "loss": 4.4158, "step": 13000 }, { "epoch": 0.81, "learning_rate": 2.5939849624060153e-05, "loss": 4.4106, "step": 13500 }, { "epoch": 0.84, "learning_rate": 2.578947368421053e-05, "loss": 4.3406, "step": 14000 }, { "epoch": 0.87, "learning_rate": 2.5639097744360904e-05, "loss": 4.3537, "step": 14500 }, { "epoch": 0.9, "learning_rate": 2.548872180451128e-05, "loss": 4.3262, "step": 15000 }, { "epoch": 0.93, "learning_rate": 2.5338345864661655e-05, "loss": 4.314, "step": 15500 }, { "epoch": 0.96, "learning_rate": 2.518796992481203e-05, "loss": 4.2858, "step": 16000 }, { "epoch": 0.99, "learning_rate": 2.5037593984962406e-05, "loss": 4.2601, "step": 16500 }, { "epoch": 1.0, "eval_accuracy": 0.21903355138214814, "eval_f1": 0.04313887931948169, "eval_loss": 4.24895715713501, "eval_precision": 0.0479982600718407, "eval_recall": 0.05640984310450227, "eval_runtime": 241.3717, "eval_samples_per_second": 117.802, "eval_steps_per_second": 14.728, "step": 16625 }, { "epoch": 1.02, "learning_rate": 2.4887218045112782e-05, "loss": 4.131, "step": 17000 }, { "epoch": 1.05, "learning_rate": 2.4736842105263158e-05, "loss": 4.0202, "step": 17500 }, { "epoch": 1.08, "learning_rate": 2.4586466165413533e-05, "loss": 4.0053, "step": 18000 }, { "epoch": 1.11, "learning_rate": 2.4436090225563912e-05, "loss": 3.9312, "step": 18500 }, { "epoch": 1.14, "learning_rate": 2.4285714285714288e-05, "loss": 3.9707, "step": 19000 }, { "epoch": 1.17, "learning_rate": 2.4135338345864664e-05, "loss": 3.9047, "step": 19500 }, { "epoch": 1.2, "learning_rate": 2.398496240601504e-05, "loss": 3.8578, "step": 20000 }, { "epoch": 1.23, "learning_rate": 2.3834586466165415e-05, "loss": 3.8515, "step": 20500 }, { "epoch": 1.26, "learning_rate": 2.368421052631579e-05, "loss": 3.7787, "step": 21000 }, { "epoch": 1.29, "learning_rate": 2.3533834586466166e-05, "loss": 3.8549, "step": 21500 }, { "epoch": 1.32, "learning_rate": 2.338345864661654e-05, "loss": 3.8012, "step": 22000 }, { "epoch": 1.35, "learning_rate": 2.3233082706766917e-05, "loss": 3.7865, "step": 22500 }, { "epoch": 1.38, "learning_rate": 2.3082706766917293e-05, "loss": 3.7387, "step": 23000 }, { "epoch": 1.41, "learning_rate": 2.293233082706767e-05, "loss": 3.7936, "step": 23500 }, { "epoch": 1.44, "learning_rate": 2.2781954887218048e-05, "loss": 3.7485, "step": 24000 }, { "epoch": 1.47, "learning_rate": 2.2631578947368423e-05, "loss": 3.6973, "step": 24500 }, { "epoch": 1.5, "learning_rate": 2.24812030075188e-05, "loss": 3.7168, "step": 25000 }, { "epoch": 1.53, "learning_rate": 2.2330827067669174e-05, "loss": 3.677, "step": 25500 }, { "epoch": 1.56, "learning_rate": 2.218045112781955e-05, "loss": 3.731, "step": 26000 }, { "epoch": 1.59, "learning_rate": 2.2030075187969926e-05, "loss": 3.7097, "step": 26500 }, { "epoch": 1.62, "learning_rate": 2.18796992481203e-05, "loss": 3.6737, "step": 27000 }, { "epoch": 1.65, "learning_rate": 2.1729323308270677e-05, "loss": 3.6045, "step": 27500 }, { "epoch": 1.68, "learning_rate": 2.1578947368421053e-05, "loss": 3.6213, "step": 28000 }, { "epoch": 1.71, "learning_rate": 2.1428571428571428e-05, "loss": 3.6703, "step": 28500 }, { "epoch": 1.74, "learning_rate": 2.1278195488721807e-05, "loss": 3.6156, "step": 29000 }, { "epoch": 1.77, "learning_rate": 2.1127819548872183e-05, "loss": 3.6843, "step": 29500 }, { "epoch": 1.8, "learning_rate": 2.097744360902256e-05, "loss": 3.703, "step": 30000 }, { "epoch": 1.83, "learning_rate": 2.0827067669172934e-05, "loss": 3.5849, "step": 30500 }, { "epoch": 1.86, "learning_rate": 2.067669172932331e-05, "loss": 3.5669, "step": 31000 }, { "epoch": 1.89, "learning_rate": 2.0526315789473685e-05, "loss": 3.5471, "step": 31500 }, { "epoch": 1.92, "learning_rate": 2.037593984962406e-05, "loss": 3.5496, "step": 32000 }, { "epoch": 1.95, "learning_rate": 2.0225563909774437e-05, "loss": 3.5342, "step": 32500 }, { "epoch": 1.98, "learning_rate": 2.0075187969924812e-05, "loss": 3.5937, "step": 33000 }, { "epoch": 2.0, "eval_accuracy": 0.2797355278891468, "eval_f1": 0.0903492692549776, "eval_loss": 3.727938413619995, "eval_precision": 0.09982434922230758, "eval_recall": 0.10664659921675672, "eval_runtime": 238.7563, "eval_samples_per_second": 119.092, "eval_steps_per_second": 14.89, "step": 33250 }, { "epoch": 2.02, "learning_rate": 1.9924812030075188e-05, "loss": 3.2646, "step": 33500 }, { "epoch": 2.05, "learning_rate": 1.9774436090225563e-05, "loss": 3.2013, "step": 34000 }, { "epoch": 2.08, "learning_rate": 1.9624060150375942e-05, "loss": 3.1563, "step": 34500 }, { "epoch": 2.11, "learning_rate": 1.9473684210526318e-05, "loss": 3.1373, "step": 35000 }, { "epoch": 2.14, "learning_rate": 1.9323308270676694e-05, "loss": 3.1235, "step": 35500 }, { "epoch": 2.17, "learning_rate": 1.917293233082707e-05, "loss": 3.1204, "step": 36000 }, { "epoch": 2.2, "learning_rate": 1.9022556390977445e-05, "loss": 3.1349, "step": 36500 }, { "epoch": 2.23, "learning_rate": 1.887218045112782e-05, "loss": 3.0985, "step": 37000 }, { "epoch": 2.26, "learning_rate": 1.8721804511278196e-05, "loss": 3.0835, "step": 37500 }, { "epoch": 2.29, "learning_rate": 1.8571428571428572e-05, "loss": 3.1539, "step": 38000 }, { "epoch": 2.32, "learning_rate": 1.8421052631578947e-05, "loss": 3.0683, "step": 38500 }, { "epoch": 2.35, "learning_rate": 1.8270676691729323e-05, "loss": 3.079, "step": 39000 }, { "epoch": 2.38, "learning_rate": 1.8120300751879702e-05, "loss": 3.0684, "step": 39500 }, { "epoch": 2.41, "learning_rate": 1.7969924812030078e-05, "loss": 3.1205, "step": 40000 }, { "epoch": 2.44, "learning_rate": 1.7819548872180453e-05, "loss": 3.0898, "step": 40500 }, { "epoch": 2.47, "learning_rate": 1.766917293233083e-05, "loss": 3.1235, "step": 41000 }, { "epoch": 2.5, "learning_rate": 1.7518796992481204e-05, "loss": 3.0515, "step": 41500 }, { "epoch": 2.53, "learning_rate": 1.736842105263158e-05, "loss": 3.0574, "step": 42000 }, { "epoch": 2.56, "learning_rate": 1.7218045112781956e-05, "loss": 3.0646, "step": 42500 }, { "epoch": 2.59, "learning_rate": 1.706766917293233e-05, "loss": 3.0787, "step": 43000 }, { "epoch": 2.62, "learning_rate": 1.6917293233082707e-05, "loss": 3.0776, "step": 43500 }, { "epoch": 2.65, "learning_rate": 1.6766917293233083e-05, "loss": 3.0509, "step": 44000 }, { "epoch": 2.68, "learning_rate": 1.6616541353383458e-05, "loss": 3.0462, "step": 44500 }, { "epoch": 2.71, "learning_rate": 1.6466165413533837e-05, "loss": 3.0383, "step": 45000 }, { "epoch": 2.74, "learning_rate": 1.6315789473684213e-05, "loss": 3.0674, "step": 45500 }, { "epoch": 2.77, "learning_rate": 1.616541353383459e-05, "loss": 3.0155, "step": 46000 }, { "epoch": 2.8, "learning_rate": 1.6015037593984964e-05, "loss": 3.0936, "step": 46500 }, { "epoch": 2.83, "learning_rate": 1.586466165413534e-05, "loss": 3.0657, "step": 47000 }, { "epoch": 2.86, "learning_rate": 1.5714285714285715e-05, "loss": 3.0275, "step": 47500 }, { "epoch": 2.89, "learning_rate": 1.556390977443609e-05, "loss": 3.0265, "step": 48000 }, { "epoch": 2.92, "learning_rate": 1.5413533834586467e-05, "loss": 3.0118, "step": 48500 }, { "epoch": 2.95, "learning_rate": 1.5263157894736842e-05, "loss": 2.9924, "step": 49000 }, { "epoch": 2.98, "learning_rate": 1.511278195488722e-05, "loss": 3.0082, "step": 49500 }, { "epoch": 3.0, "eval_accuracy": 0.30794119715833157, "eval_f1": 0.1317337990094712, "eval_loss": 3.523235559463501, "eval_precision": 0.14522851171454873, "eval_recall": 0.1484531543448168, "eval_runtime": 241.6864, "eval_samples_per_second": 117.648, "eval_steps_per_second": 14.709, "step": 49875 }, { "epoch": 3.01, "learning_rate": 1.4962406015037593e-05, "loss": 2.9208, "step": 50000 }, { "epoch": 3.04, "learning_rate": 1.4812030075187969e-05, "loss": 2.599, "step": 50500 }, { "epoch": 3.07, "learning_rate": 1.4661654135338345e-05, "loss": 2.5956, "step": 51000 }, { "epoch": 3.1, "learning_rate": 1.4511278195488722e-05, "loss": 2.554, "step": 51500 }, { "epoch": 3.13, "learning_rate": 1.4360902255639098e-05, "loss": 2.5659, "step": 52000 }, { "epoch": 3.16, "learning_rate": 1.4210526315789473e-05, "loss": 2.5728, "step": 52500 }, { "epoch": 3.19, "learning_rate": 1.4060150375939849e-05, "loss": 2.5427, "step": 53000 }, { "epoch": 3.22, "learning_rate": 1.3909774436090224e-05, "loss": 2.5317, "step": 53500 }, { "epoch": 3.25, "learning_rate": 1.3759398496240602e-05, "loss": 2.5205, "step": 54000 }, { "epoch": 3.28, "learning_rate": 1.3609022556390977e-05, "loss": 2.5857, "step": 54500 }, { "epoch": 3.31, "learning_rate": 1.3458646616541353e-05, "loss": 2.5955, "step": 55000 }, { "epoch": 3.34, "learning_rate": 1.3308270676691729e-05, "loss": 2.5892, "step": 55500 }, { "epoch": 3.37, "learning_rate": 1.3157894736842104e-05, "loss": 2.5631, "step": 56000 }, { "epoch": 3.4, "learning_rate": 1.3007518796992482e-05, "loss": 2.6229, "step": 56500 }, { "epoch": 3.43, "learning_rate": 1.2857142857142857e-05, "loss": 2.6204, "step": 57000 }, { "epoch": 3.46, "learning_rate": 1.2706766917293233e-05, "loss": 2.5644, "step": 57500 }, { "epoch": 3.49, "learning_rate": 1.2556390977443608e-05, "loss": 2.489, "step": 58000 }, { "epoch": 3.52, "learning_rate": 1.2406015037593984e-05, "loss": 2.5243, "step": 58500 }, { "epoch": 3.55, "learning_rate": 1.225563909774436e-05, "loss": 2.5323, "step": 59000 }, { "epoch": 3.58, "learning_rate": 1.2105263157894737e-05, "loss": 2.5484, "step": 59500 }, { "epoch": 3.61, "learning_rate": 1.1954887218045113e-05, "loss": 2.5526, "step": 60000 }, { "epoch": 3.64, "learning_rate": 1.1804511278195488e-05, "loss": 2.546, "step": 60500 }, { "epoch": 3.67, "learning_rate": 1.1654135338345864e-05, "loss": 2.4828, "step": 61000 }, { "epoch": 3.7, "learning_rate": 1.150375939849624e-05, "loss": 2.5012, "step": 61500 }, { "epoch": 3.73, "learning_rate": 1.1353383458646617e-05, "loss": 2.6209, "step": 62000 }, { "epoch": 3.76, "learning_rate": 1.1203007518796992e-05, "loss": 2.474, "step": 62500 }, { "epoch": 3.79, "learning_rate": 1.1052631578947368e-05, "loss": 2.5335, "step": 63000 }, { "epoch": 3.82, "learning_rate": 1.0902255639097744e-05, "loss": 2.5015, "step": 63500 }, { "epoch": 3.85, "learning_rate": 1.075187969924812e-05, "loss": 2.4746, "step": 64000 }, { "epoch": 3.88, "learning_rate": 1.0601503759398497e-05, "loss": 2.4877, "step": 64500 }, { "epoch": 3.91, "learning_rate": 1.0451127819548872e-05, "loss": 2.4325, "step": 65000 }, { "epoch": 3.94, "learning_rate": 1.0300751879699248e-05, "loss": 2.5299, "step": 65500 }, { "epoch": 3.97, "learning_rate": 1.0150375939849624e-05, "loss": 2.5546, "step": 66000 }, { "epoch": 4.0, "learning_rate": 9.999999999999999e-06, "loss": 2.4999, "step": 66500 }, { "epoch": 4.0, "eval_accuracy": 0.3128648800731519, "eval_f1": 0.15025379830203955, "eval_loss": 3.5057616233825684, "eval_precision": 0.16130440997164983, "eval_recall": 0.16584776990015507, "eval_runtime": 243.3854, "eval_samples_per_second": 116.827, "eval_steps_per_second": 14.606, "step": 66500 }, { "epoch": 4.03, "learning_rate": 9.849624060150376e-06, "loss": 2.1264, "step": 67000 }, { "epoch": 4.06, "learning_rate": 9.699248120300752e-06, "loss": 2.1454, "step": 67500 }, { "epoch": 4.09, "learning_rate": 9.548872180451128e-06, "loss": 2.1236, "step": 68000 }, { "epoch": 4.12, "learning_rate": 9.398496240601503e-06, "loss": 2.1299, "step": 68500 }, { "epoch": 4.15, "learning_rate": 9.248120300751879e-06, "loss": 2.1006, "step": 69000 }, { "epoch": 4.18, "learning_rate": 9.097744360902255e-06, "loss": 2.105, "step": 69500 }, { "epoch": 4.21, "learning_rate": 8.947368421052632e-06, "loss": 2.1096, "step": 70000 }, { "epoch": 4.24, "learning_rate": 8.796992481203007e-06, "loss": 2.0984, "step": 70500 }, { "epoch": 4.27, "learning_rate": 8.646616541353383e-06, "loss": 2.1179, "step": 71000 }, { "epoch": 4.3, "learning_rate": 8.496240601503759e-06, "loss": 2.1057, "step": 71500 }, { "epoch": 4.33, "learning_rate": 8.345864661654134e-06, "loss": 2.0953, "step": 72000 }, { "epoch": 4.36, "learning_rate": 8.195488721804512e-06, "loss": 2.0757, "step": 72500 }, { "epoch": 4.39, "learning_rate": 8.045112781954887e-06, "loss": 2.1196, "step": 73000 }, { "epoch": 4.42, "learning_rate": 7.894736842105263e-06, "loss": 2.1006, "step": 73500 }, { "epoch": 4.45, "learning_rate": 7.744360902255639e-06, "loss": 2.0635, "step": 74000 }, { "epoch": 4.48, "learning_rate": 7.593984962406015e-06, "loss": 2.1135, "step": 74500 }, { "epoch": 4.51, "learning_rate": 7.443609022556391e-06, "loss": 2.0996, "step": 75000 }, { "epoch": 4.54, "learning_rate": 7.293233082706767e-06, "loss": 2.1062, "step": 75500 }, { "epoch": 4.57, "learning_rate": 7.142857142857143e-06, "loss": 2.0833, "step": 76000 }, { "epoch": 4.6, "learning_rate": 6.992481203007518e-06, "loss": 2.0442, "step": 76500 }, { "epoch": 4.63, "learning_rate": 6.842105263157895e-06, "loss": 2.126, "step": 77000 }, { "epoch": 4.66, "learning_rate": 6.6917293233082704e-06, "loss": 2.1046, "step": 77500 }, { "epoch": 4.69, "learning_rate": 6.541353383458646e-06, "loss": 2.0697, "step": 78000 }, { "epoch": 4.72, "learning_rate": 6.3909774436090225e-06, "loss": 2.0902, "step": 78500 }, { "epoch": 4.75, "learning_rate": 6.240601503759398e-06, "loss": 2.09, "step": 79000 }, { "epoch": 4.78, "learning_rate": 6.090225563909775e-06, "loss": 2.0961, "step": 79500 }, { "epoch": 4.81, "learning_rate": 5.93984962406015e-06, "loss": 2.0841, "step": 80000 }, { "epoch": 4.84, "learning_rate": 5.789473684210526e-06, "loss": 2.0838, "step": 80500 }, { "epoch": 4.87, "learning_rate": 5.639097744360902e-06, "loss": 2.0492, "step": 81000 }, { "epoch": 4.9, "learning_rate": 5.488721804511278e-06, "loss": 2.0804, "step": 81500 }, { "epoch": 4.93, "learning_rate": 5.3383458646616536e-06, "loss": 2.105, "step": 82000 }, { "epoch": 4.96, "learning_rate": 5.18796992481203e-06, "loss": 2.1231, "step": 82500 }, { "epoch": 4.99, "learning_rate": 5.037593984962406e-06, "loss": 2.0048, "step": 83000 }, { "epoch": 5.0, "eval_accuracy": 0.3194063445171274, "eval_f1": 0.16919354439457027, "eval_loss": 3.5483460426330566, "eval_precision": 0.1821520394597461, "eval_recall": 0.18324334265649903, "eval_runtime": 239.2709, "eval_samples_per_second": 118.836, "eval_steps_per_second": 14.858, "step": 83125 }, { "epoch": 5.02, "learning_rate": 4.887218045112782e-06, "loss": 1.8463, "step": 83500 }, { "epoch": 5.05, "learning_rate": 4.736842105263158e-06, "loss": 1.8074, "step": 84000 }, { "epoch": 5.08, "learning_rate": 4.586466165413533e-06, "loss": 1.7579, "step": 84500 }, { "epoch": 5.11, "learning_rate": 4.43609022556391e-06, "loss": 1.7725, "step": 85000 }, { "epoch": 5.14, "learning_rate": 4.2857142857142855e-06, "loss": 1.7747, "step": 85500 }, { "epoch": 5.17, "learning_rate": 4.135338345864662e-06, "loss": 1.7711, "step": 86000 }, { "epoch": 5.2, "learning_rate": 3.9849624060150376e-06, "loss": 1.8442, "step": 86500 }, { "epoch": 5.23, "learning_rate": 3.834586466165413e-06, "loss": 1.8341, "step": 87000 }, { "epoch": 5.26, "learning_rate": 3.6842105263157892e-06, "loss": 1.7366, "step": 87500 }, { "epoch": 5.29, "learning_rate": 3.5338345864661653e-06, "loss": 1.7525, "step": 88000 }, { "epoch": 5.32, "learning_rate": 3.3834586466165413e-06, "loss": 1.7782, "step": 88500 }, { "epoch": 5.35, "learning_rate": 3.2330827067669174e-06, "loss": 1.8131, "step": 89000 }, { "epoch": 5.38, "learning_rate": 3.082706766917293e-06, "loss": 1.7687, "step": 89500 }, { "epoch": 5.41, "learning_rate": 2.932330827067669e-06, "loss": 1.7685, "step": 90000 }, { "epoch": 5.44, "learning_rate": 2.781954887218045e-06, "loss": 1.7978, "step": 90500 }, { "epoch": 5.47, "learning_rate": 2.631578947368421e-06, "loss": 1.8071, "step": 91000 }, { "epoch": 5.5, "learning_rate": 2.4812030075187967e-06, "loss": 1.7622, "step": 91500 }, { "epoch": 5.53, "learning_rate": 2.3308270676691728e-06, "loss": 1.756, "step": 92000 }, { "epoch": 5.56, "learning_rate": 2.180451127819549e-06, "loss": 1.8045, "step": 92500 }, { "epoch": 5.59, "learning_rate": 2.030075187969925e-06, "loss": 1.6949, "step": 93000 }, { "epoch": 5.62, "learning_rate": 1.8796992481203007e-06, "loss": 1.7255, "step": 93500 }, { "epoch": 5.65, "learning_rate": 1.7293233082706765e-06, "loss": 1.7631, "step": 94000 }, { "epoch": 5.68, "learning_rate": 1.5789473684210526e-06, "loss": 1.7723, "step": 94500 }, { "epoch": 5.71, "learning_rate": 1.4285714285714286e-06, "loss": 1.7405, "step": 95000 }, { "epoch": 5.74, "learning_rate": 1.2781954887218045e-06, "loss": 1.8144, "step": 95500 }, { "epoch": 5.77, "learning_rate": 1.1278195488721805e-06, "loss": 1.7847, "step": 96000 }, { "epoch": 5.8, "learning_rate": 9.774436090225563e-07, "loss": 1.7579, "step": 96500 }, { "epoch": 5.83, "learning_rate": 8.270676691729323e-07, "loss": 1.8196, "step": 97000 }, { "epoch": 5.86, "learning_rate": 6.766917293233082e-07, "loss": 1.744, "step": 97500 }, { "epoch": 5.89, "learning_rate": 5.263157894736842e-07, "loss": 1.7594, "step": 98000 }, { "epoch": 5.92, "learning_rate": 3.7593984962406015e-07, "loss": 1.7179, "step": 98500 }, { "epoch": 5.95, "learning_rate": 2.255639097744361e-07, "loss": 1.7427, "step": 99000 }, { "epoch": 5.98, "learning_rate": 7.518796992481203e-08, "loss": 1.7591, "step": 99500 }, { "epoch": 6.0, "eval_accuracy": 0.317647886333263, "eval_f1": 0.1720864596377023, "eval_loss": 3.6147267818450928, "eval_precision": 0.18196956006589313, "eval_recall": 0.18618078808353178, "eval_runtime": 239.2791, "eval_samples_per_second": 118.832, "eval_steps_per_second": 14.857, "step": 99750 } ], "max_steps": 99750, "num_train_epochs": 6, "total_flos": 2.142716050814976e+17, "trial_name": null, "trial_params": null }