{ "best_metric": 1.013974905014038, "best_model_checkpoint": "models_gitignored/distilgpt2-finetuned-sentence-classification/checkpoint-50503", "epoch": 5.0, "global_step": 252515, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.9960398392174724e-05, "loss": 1.5614, "step": 500 }, { "epoch": 0.02, "learning_rate": 1.9920796784349446e-05, "loss": 1.2673, "step": 1000 }, { "epoch": 0.03, "learning_rate": 1.988119517652417e-05, "loss": 1.2992, "step": 1500 }, { "epoch": 0.04, "learning_rate": 1.984159356869889e-05, "loss": 1.1991, "step": 2000 }, { "epoch": 0.05, "learning_rate": 1.9801991960873613e-05, "loss": 1.3129, "step": 2500 }, { "epoch": 0.06, "learning_rate": 1.9762390353048335e-05, "loss": 1.2455, "step": 3000 }, { "epoch": 0.07, "learning_rate": 1.9722788745223057e-05, "loss": 1.185, "step": 3500 }, { "epoch": 0.08, "learning_rate": 1.968318713739778e-05, "loss": 1.2117, "step": 4000 }, { "epoch": 0.09, "learning_rate": 1.9643585529572502e-05, "loss": 1.2481, "step": 4500 }, { "epoch": 0.1, "learning_rate": 1.9603983921747224e-05, "loss": 1.1966, "step": 5000 }, { "epoch": 0.11, "learning_rate": 1.9564382313921946e-05, "loss": 1.2012, "step": 5500 }, { "epoch": 0.12, "learning_rate": 1.952478070609667e-05, "loss": 1.138, "step": 6000 }, { "epoch": 0.13, "learning_rate": 1.948517909827139e-05, "loss": 1.1528, "step": 6500 }, { "epoch": 0.14, "learning_rate": 1.9445577490446116e-05, "loss": 1.1302, "step": 7000 }, { "epoch": 0.15, "learning_rate": 1.9405975882620835e-05, "loss": 1.1111, "step": 7500 }, { "epoch": 0.16, "learning_rate": 1.9366374274795557e-05, "loss": 1.1396, "step": 8000 }, { "epoch": 0.17, "learning_rate": 1.9326772666970283e-05, "loss": 1.2655, "step": 8500 }, { "epoch": 0.18, "learning_rate": 1.9287171059145002e-05, "loss": 1.1342, "step": 9000 }, { "epoch": 0.19, "learning_rate": 1.9247569451319724e-05, "loss": 1.0246, "step": 9500 }, { "epoch": 0.2, "learning_rate": 1.920796784349445e-05, "loss": 1.1806, "step": 10000 }, { "epoch": 0.21, "learning_rate": 1.916836623566917e-05, "loss": 1.1712, "step": 10500 }, { "epoch": 0.22, "learning_rate": 1.912876462784389e-05, "loss": 1.0438, "step": 11000 }, { "epoch": 0.23, "learning_rate": 1.9089163020018616e-05, "loss": 1.0374, "step": 11500 }, { "epoch": 0.24, "learning_rate": 1.9049561412193335e-05, "loss": 1.0874, "step": 12000 }, { "epoch": 0.25, "learning_rate": 1.9009959804368057e-05, "loss": 1.1464, "step": 12500 }, { "epoch": 0.26, "learning_rate": 1.8970358196542783e-05, "loss": 1.0696, "step": 13000 }, { "epoch": 0.27, "learning_rate": 1.8930756588717502e-05, "loss": 0.9505, "step": 13500 }, { "epoch": 0.28, "learning_rate": 1.8891154980892224e-05, "loss": 1.0116, "step": 14000 }, { "epoch": 0.29, "learning_rate": 1.885155337306695e-05, "loss": 1.0468, "step": 14500 }, { "epoch": 0.3, "learning_rate": 1.881195176524167e-05, "loss": 1.1196, "step": 15000 }, { "epoch": 0.31, "learning_rate": 1.877235015741639e-05, "loss": 1.0663, "step": 15500 }, { "epoch": 0.32, "learning_rate": 1.8732748549591116e-05, "loss": 1.0019, "step": 16000 }, { "epoch": 0.33, "learning_rate": 1.8693146941765835e-05, "loss": 1.1987, "step": 16500 }, { "epoch": 0.34, "learning_rate": 1.865354533394056e-05, "loss": 1.0079, "step": 17000 }, { "epoch": 0.35, "learning_rate": 1.8613943726115283e-05, "loss": 1.1582, "step": 17500 }, { "epoch": 0.36, "learning_rate": 1.8574342118290002e-05, "loss": 1.0791, "step": 18000 }, { "epoch": 0.37, "learning_rate": 1.8534740510464728e-05, "loss": 1.0507, "step": 18500 }, { "epoch": 0.38, "learning_rate": 1.849513890263945e-05, "loss": 1.0958, "step": 19000 }, { "epoch": 0.39, "learning_rate": 1.8455537294814172e-05, "loss": 1.0225, "step": 19500 }, { "epoch": 0.4, "learning_rate": 1.8415935686988894e-05, "loss": 1.0784, "step": 20000 }, { "epoch": 0.41, "learning_rate": 1.8376334079163616e-05, "loss": 1.1167, "step": 20500 }, { "epoch": 0.42, "learning_rate": 1.833673247133834e-05, "loss": 1.0876, "step": 21000 }, { "epoch": 0.43, "learning_rate": 1.829713086351306e-05, "loss": 0.9754, "step": 21500 }, { "epoch": 0.44, "learning_rate": 1.8257529255687783e-05, "loss": 1.1337, "step": 22000 }, { "epoch": 0.45, "learning_rate": 1.8217927647862505e-05, "loss": 1.0121, "step": 22500 }, { "epoch": 0.46, "learning_rate": 1.8178326040037228e-05, "loss": 1.0769, "step": 23000 }, { "epoch": 0.47, "learning_rate": 1.813872443221195e-05, "loss": 1.0366, "step": 23500 }, { "epoch": 0.48, "learning_rate": 1.8099122824386672e-05, "loss": 1.0228, "step": 24000 }, { "epoch": 0.49, "learning_rate": 1.8059521216561394e-05, "loss": 1.0451, "step": 24500 }, { "epoch": 0.5, "learning_rate": 1.8019919608736117e-05, "loss": 1.0227, "step": 25000 }, { "epoch": 0.5, "learning_rate": 1.798031800091084e-05, "loss": 0.9651, "step": 25500 }, { "epoch": 0.51, "learning_rate": 1.794071639308556e-05, "loss": 1.1018, "step": 26000 }, { "epoch": 0.52, "learning_rate": 1.7901114785260283e-05, "loss": 0.9523, "step": 26500 }, { "epoch": 0.53, "learning_rate": 1.7861513177435005e-05, "loss": 0.95, "step": 27000 }, { "epoch": 0.54, "learning_rate": 1.7821911569609728e-05, "loss": 1.112, "step": 27500 }, { "epoch": 0.55, "learning_rate": 1.778230996178445e-05, "loss": 1.0035, "step": 28000 }, { "epoch": 0.56, "learning_rate": 1.7742708353959172e-05, "loss": 0.9983, "step": 28500 }, { "epoch": 0.57, "learning_rate": 1.7703106746133894e-05, "loss": 1.0854, "step": 29000 }, { "epoch": 0.58, "learning_rate": 1.7663505138308617e-05, "loss": 0.9696, "step": 29500 }, { "epoch": 0.59, "learning_rate": 1.762390353048334e-05, "loss": 0.9931, "step": 30000 }, { "epoch": 0.6, "learning_rate": 1.758430192265806e-05, "loss": 0.9197, "step": 30500 }, { "epoch": 0.61, "learning_rate": 1.7544700314832783e-05, "loss": 1.0011, "step": 31000 }, { "epoch": 0.62, "learning_rate": 1.7505098707007506e-05, "loss": 1.0841, "step": 31500 }, { "epoch": 0.63, "learning_rate": 1.7465497099182228e-05, "loss": 1.0603, "step": 32000 }, { "epoch": 0.64, "learning_rate": 1.742589549135695e-05, "loss": 0.9529, "step": 32500 }, { "epoch": 0.65, "learning_rate": 1.7386293883531672e-05, "loss": 1.0164, "step": 33000 }, { "epoch": 0.66, "learning_rate": 1.7346692275706394e-05, "loss": 0.9713, "step": 33500 }, { "epoch": 0.67, "learning_rate": 1.7307090667881117e-05, "loss": 1.1603, "step": 34000 }, { "epoch": 0.68, "learning_rate": 1.726748906005584e-05, "loss": 0.9445, "step": 34500 }, { "epoch": 0.69, "learning_rate": 1.722788745223056e-05, "loss": 1.0699, "step": 35000 }, { "epoch": 0.7, "learning_rate": 1.7188285844405283e-05, "loss": 1.0315, "step": 35500 }, { "epoch": 0.71, "learning_rate": 1.7148684236580006e-05, "loss": 1.0873, "step": 36000 }, { "epoch": 0.72, "learning_rate": 1.710908262875473e-05, "loss": 0.9844, "step": 36500 }, { "epoch": 0.73, "learning_rate": 1.706948102092945e-05, "loss": 1.0472, "step": 37000 }, { "epoch": 0.74, "learning_rate": 1.7029879413104172e-05, "loss": 0.9816, "step": 37500 }, { "epoch": 0.75, "learning_rate": 1.6990277805278898e-05, "loss": 0.9801, "step": 38000 }, { "epoch": 0.76, "learning_rate": 1.6950676197453617e-05, "loss": 1.0128, "step": 38500 }, { "epoch": 0.77, "learning_rate": 1.691107458962834e-05, "loss": 1.0612, "step": 39000 }, { "epoch": 0.78, "learning_rate": 1.6871472981803065e-05, "loss": 1.0238, "step": 39500 }, { "epoch": 0.79, "learning_rate": 1.6831871373977783e-05, "loss": 1.0085, "step": 40000 }, { "epoch": 0.8, "learning_rate": 1.6792269766152506e-05, "loss": 0.9598, "step": 40500 }, { "epoch": 0.81, "learning_rate": 1.675266815832723e-05, "loss": 0.9872, "step": 41000 }, { "epoch": 0.82, "learning_rate": 1.671306655050195e-05, "loss": 1.0327, "step": 41500 }, { "epoch": 0.83, "learning_rate": 1.6673464942676672e-05, "loss": 1.0589, "step": 42000 }, { "epoch": 0.84, "learning_rate": 1.6633863334851398e-05, "loss": 0.8884, "step": 42500 }, { "epoch": 0.85, "learning_rate": 1.659426172702612e-05, "loss": 0.8987, "step": 43000 }, { "epoch": 0.86, "learning_rate": 1.655466011920084e-05, "loss": 1.0039, "step": 43500 }, { "epoch": 0.87, "learning_rate": 1.6515058511375565e-05, "loss": 0.9672, "step": 44000 }, { "epoch": 0.88, "learning_rate": 1.6475456903550287e-05, "loss": 0.9889, "step": 44500 }, { "epoch": 0.89, "learning_rate": 1.6435855295725006e-05, "loss": 1.008, "step": 45000 }, { "epoch": 0.9, "learning_rate": 1.639625368789973e-05, "loss": 1.0918, "step": 45500 }, { "epoch": 0.91, "learning_rate": 1.6356652080074454e-05, "loss": 0.9732, "step": 46000 }, { "epoch": 0.92, "learning_rate": 1.6317050472249172e-05, "loss": 0.9143, "step": 46500 }, { "epoch": 0.93, "learning_rate": 1.6277448864423898e-05, "loss": 0.9441, "step": 47000 }, { "epoch": 0.94, "learning_rate": 1.623784725659862e-05, "loss": 0.9826, "step": 47500 }, { "epoch": 0.95, "learning_rate": 1.619824564877334e-05, "loss": 1.0017, "step": 48000 }, { "epoch": 0.96, "learning_rate": 1.6158644040948065e-05, "loss": 1.0458, "step": 48500 }, { "epoch": 0.97, "learning_rate": 1.6119042433122787e-05, "loss": 0.9426, "step": 49000 }, { "epoch": 0.98, "learning_rate": 1.607944082529751e-05, "loss": 0.9778, "step": 49500 }, { "epoch": 0.99, "learning_rate": 1.603983921747223e-05, "loss": 1.0585, "step": 50000 }, { "epoch": 1.0, "learning_rate": 1.6000237609646954e-05, "loss": 1.0143, "step": 50500 }, { "epoch": 1.0, "eval_accuracy": 0.7751532573684757, "eval_f1": 0.7701484228300445, "eval_kappa": 0.694933712523312, "eval_loss": 1.013974905014038, "eval_precision": 0.7739018848891264, "eval_recall": 0.7751532573684757, "eval_runtime": 75.5237, "eval_samples_per_second": 382.304, "eval_steps_per_second": 191.159, "step": 50503 }, { "epoch": 1.01, "learning_rate": 1.5960636001821676e-05, "loss": 0.9274, "step": 51000 }, { "epoch": 1.02, "learning_rate": 1.5921034393996398e-05, "loss": 0.9629, "step": 51500 }, { "epoch": 1.03, "learning_rate": 1.588143278617112e-05, "loss": 0.884, "step": 52000 }, { "epoch": 1.04, "learning_rate": 1.5841831178345843e-05, "loss": 0.8756, "step": 52500 }, { "epoch": 1.05, "learning_rate": 1.5802229570520565e-05, "loss": 0.9358, "step": 53000 }, { "epoch": 1.06, "learning_rate": 1.5762627962695287e-05, "loss": 0.8703, "step": 53500 }, { "epoch": 1.07, "learning_rate": 1.572302635487001e-05, "loss": 0.9352, "step": 54000 }, { "epoch": 1.08, "learning_rate": 1.568342474704473e-05, "loss": 0.9129, "step": 54500 }, { "epoch": 1.09, "learning_rate": 1.5643823139219454e-05, "loss": 0.8904, "step": 55000 }, { "epoch": 1.1, "learning_rate": 1.5604221531394176e-05, "loss": 0.8068, "step": 55500 }, { "epoch": 1.11, "learning_rate": 1.5564619923568898e-05, "loss": 0.8357, "step": 56000 }, { "epoch": 1.12, "learning_rate": 1.552501831574362e-05, "loss": 0.9478, "step": 56500 }, { "epoch": 1.13, "learning_rate": 1.5485416707918343e-05, "loss": 0.8811, "step": 57000 }, { "epoch": 1.14, "learning_rate": 1.5445815100093065e-05, "loss": 0.9865, "step": 57500 }, { "epoch": 1.15, "learning_rate": 1.5406213492267787e-05, "loss": 0.9492, "step": 58000 }, { "epoch": 1.16, "learning_rate": 1.536661188444251e-05, "loss": 0.8644, "step": 58500 }, { "epoch": 1.17, "learning_rate": 1.532701027661723e-05, "loss": 0.9351, "step": 59000 }, { "epoch": 1.18, "learning_rate": 1.5287408668791954e-05, "loss": 0.7533, "step": 59500 }, { "epoch": 1.19, "learning_rate": 1.5247807060966678e-05, "loss": 0.9762, "step": 60000 }, { "epoch": 1.2, "learning_rate": 1.5208205453141398e-05, "loss": 0.9428, "step": 60500 }, { "epoch": 1.21, "learning_rate": 1.516860384531612e-05, "loss": 0.9179, "step": 61000 }, { "epoch": 1.22, "learning_rate": 1.5129002237490844e-05, "loss": 0.9966, "step": 61500 }, { "epoch": 1.23, "learning_rate": 1.5089400629665565e-05, "loss": 0.9816, "step": 62000 }, { "epoch": 1.24, "learning_rate": 1.5049799021840287e-05, "loss": 0.9758, "step": 62500 }, { "epoch": 1.25, "learning_rate": 1.5010197414015011e-05, "loss": 0.9364, "step": 63000 }, { "epoch": 1.26, "learning_rate": 1.4970595806189732e-05, "loss": 0.9155, "step": 63500 }, { "epoch": 1.27, "learning_rate": 1.4930994198364454e-05, "loss": 0.9483, "step": 64000 }, { "epoch": 1.28, "learning_rate": 1.4891392590539178e-05, "loss": 0.9508, "step": 64500 }, { "epoch": 1.29, "learning_rate": 1.4851790982713898e-05, "loss": 0.8543, "step": 65000 }, { "epoch": 1.3, "learning_rate": 1.481218937488862e-05, "loss": 0.8652, "step": 65500 }, { "epoch": 1.31, "learning_rate": 1.4772587767063345e-05, "loss": 0.9519, "step": 66000 }, { "epoch": 1.32, "learning_rate": 1.4732986159238065e-05, "loss": 0.9341, "step": 66500 }, { "epoch": 1.33, "learning_rate": 1.4693384551412789e-05, "loss": 0.9183, "step": 67000 }, { "epoch": 1.34, "learning_rate": 1.4653782943587511e-05, "loss": 1.0077, "step": 67500 }, { "epoch": 1.35, "learning_rate": 1.4614181335762233e-05, "loss": 1.009, "step": 68000 }, { "epoch": 1.36, "learning_rate": 1.4574579727936956e-05, "loss": 0.9462, "step": 68500 }, { "epoch": 1.37, "learning_rate": 1.4534978120111678e-05, "loss": 0.9164, "step": 69000 }, { "epoch": 1.38, "learning_rate": 1.44953765122864e-05, "loss": 0.8548, "step": 69500 }, { "epoch": 1.39, "learning_rate": 1.4455774904461122e-05, "loss": 0.9376, "step": 70000 }, { "epoch": 1.4, "learning_rate": 1.4416173296635845e-05, "loss": 0.9584, "step": 70500 }, { "epoch": 1.41, "learning_rate": 1.4376571688810569e-05, "loss": 0.963, "step": 71000 }, { "epoch": 1.42, "learning_rate": 1.4336970080985289e-05, "loss": 0.9145, "step": 71500 }, { "epoch": 1.43, "learning_rate": 1.4297368473160011e-05, "loss": 0.9862, "step": 72000 }, { "epoch": 1.44, "learning_rate": 1.4257766865334735e-05, "loss": 0.858, "step": 72500 }, { "epoch": 1.45, "learning_rate": 1.4218165257509456e-05, "loss": 0.8759, "step": 73000 }, { "epoch": 1.46, "learning_rate": 1.4178563649684178e-05, "loss": 0.9743, "step": 73500 }, { "epoch": 1.47, "learning_rate": 1.4138962041858902e-05, "loss": 0.9357, "step": 74000 }, { "epoch": 1.48, "learning_rate": 1.4099360434033622e-05, "loss": 0.8462, "step": 74500 }, { "epoch": 1.49, "learning_rate": 1.4059758826208345e-05, "loss": 0.9703, "step": 75000 }, { "epoch": 1.49, "learning_rate": 1.4020157218383069e-05, "loss": 0.9413, "step": 75500 }, { "epoch": 1.5, "learning_rate": 1.3980555610557789e-05, "loss": 0.9236, "step": 76000 }, { "epoch": 1.51, "learning_rate": 1.3940954002732511e-05, "loss": 0.9251, "step": 76500 }, { "epoch": 1.52, "learning_rate": 1.3901352394907235e-05, "loss": 0.9331, "step": 77000 }, { "epoch": 1.53, "learning_rate": 1.3861750787081956e-05, "loss": 0.9106, "step": 77500 }, { "epoch": 1.54, "learning_rate": 1.3822149179256678e-05, "loss": 0.8905, "step": 78000 }, { "epoch": 1.55, "learning_rate": 1.3782547571431402e-05, "loss": 0.8568, "step": 78500 }, { "epoch": 1.56, "learning_rate": 1.3742945963606123e-05, "loss": 0.8797, "step": 79000 }, { "epoch": 1.57, "learning_rate": 1.3703344355780845e-05, "loss": 0.9236, "step": 79500 }, { "epoch": 1.58, "learning_rate": 1.3663742747955569e-05, "loss": 0.9197, "step": 80000 }, { "epoch": 1.59, "learning_rate": 1.3624141140130291e-05, "loss": 0.9806, "step": 80500 }, { "epoch": 1.6, "learning_rate": 1.3584539532305011e-05, "loss": 0.8769, "step": 81000 }, { "epoch": 1.61, "learning_rate": 1.3544937924479735e-05, "loss": 0.8763, "step": 81500 }, { "epoch": 1.62, "learning_rate": 1.3505336316654458e-05, "loss": 0.9818, "step": 82000 }, { "epoch": 1.63, "learning_rate": 1.346573470882918e-05, "loss": 0.8387, "step": 82500 }, { "epoch": 1.64, "learning_rate": 1.3426133101003902e-05, "loss": 0.9898, "step": 83000 }, { "epoch": 1.65, "learning_rate": 1.3386531493178624e-05, "loss": 0.9446, "step": 83500 }, { "epoch": 1.66, "learning_rate": 1.3346929885353347e-05, "loss": 0.9371, "step": 84000 }, { "epoch": 1.67, "learning_rate": 1.3307328277528069e-05, "loss": 0.8356, "step": 84500 }, { "epoch": 1.68, "learning_rate": 1.3267726669702791e-05, "loss": 0.9958, "step": 85000 }, { "epoch": 1.69, "learning_rate": 1.3228125061877513e-05, "loss": 0.9615, "step": 85500 }, { "epoch": 1.7, "learning_rate": 1.3188523454052235e-05, "loss": 0.9137, "step": 86000 }, { "epoch": 1.71, "learning_rate": 1.314892184622696e-05, "loss": 0.9037, "step": 86500 }, { "epoch": 1.72, "learning_rate": 1.310932023840168e-05, "loss": 0.9638, "step": 87000 }, { "epoch": 1.73, "learning_rate": 1.3069718630576402e-05, "loss": 0.915, "step": 87500 }, { "epoch": 1.74, "learning_rate": 1.3030117022751126e-05, "loss": 0.8954, "step": 88000 }, { "epoch": 1.75, "learning_rate": 1.2990515414925847e-05, "loss": 0.947, "step": 88500 }, { "epoch": 1.76, "learning_rate": 1.2950913807100569e-05, "loss": 0.9149, "step": 89000 }, { "epoch": 1.77, "learning_rate": 1.2911312199275293e-05, "loss": 0.8418, "step": 89500 }, { "epoch": 1.78, "learning_rate": 1.2871710591450013e-05, "loss": 1.0158, "step": 90000 }, { "epoch": 1.79, "learning_rate": 1.2832108983624736e-05, "loss": 0.8991, "step": 90500 }, { "epoch": 1.8, "learning_rate": 1.279250737579946e-05, "loss": 0.8777, "step": 91000 }, { "epoch": 1.81, "learning_rate": 1.2752905767974182e-05, "loss": 0.9285, "step": 91500 }, { "epoch": 1.82, "learning_rate": 1.2713304160148902e-05, "loss": 0.8699, "step": 92000 }, { "epoch": 1.83, "learning_rate": 1.2673702552323626e-05, "loss": 0.9436, "step": 92500 }, { "epoch": 1.84, "learning_rate": 1.2634100944498348e-05, "loss": 0.809, "step": 93000 }, { "epoch": 1.85, "learning_rate": 1.2594499336673069e-05, "loss": 0.9753, "step": 93500 }, { "epoch": 1.86, "learning_rate": 1.2554897728847793e-05, "loss": 0.8746, "step": 94000 }, { "epoch": 1.87, "learning_rate": 1.2515296121022515e-05, "loss": 0.8436, "step": 94500 }, { "epoch": 1.88, "learning_rate": 1.2475694513197236e-05, "loss": 0.9384, "step": 95000 }, { "epoch": 1.89, "learning_rate": 1.243609290537196e-05, "loss": 0.93, "step": 95500 }, { "epoch": 1.9, "learning_rate": 1.2396491297546682e-05, "loss": 0.9228, "step": 96000 }, { "epoch": 1.91, "learning_rate": 1.2356889689721402e-05, "loss": 0.8928, "step": 96500 }, { "epoch": 1.92, "learning_rate": 1.2317288081896126e-05, "loss": 0.9077, "step": 97000 }, { "epoch": 1.93, "learning_rate": 1.2277686474070848e-05, "loss": 0.9978, "step": 97500 }, { "epoch": 1.94, "learning_rate": 1.2238084866245569e-05, "loss": 0.8285, "step": 98000 }, { "epoch": 1.95, "learning_rate": 1.2198483258420293e-05, "loss": 0.8898, "step": 98500 }, { "epoch": 1.96, "learning_rate": 1.2158881650595015e-05, "loss": 0.8911, "step": 99000 }, { "epoch": 1.97, "learning_rate": 1.2119280042769737e-05, "loss": 0.902, "step": 99500 }, { "epoch": 1.98, "learning_rate": 1.207967843494446e-05, "loss": 0.9847, "step": 100000 }, { "epoch": 1.99, "learning_rate": 1.2040076827119182e-05, "loss": 0.9343, "step": 100500 }, { "epoch": 2.0, "learning_rate": 1.2000475219293904e-05, "loss": 0.9232, "step": 101000 }, { "epoch": 2.0, "eval_accuracy": 0.7872753091123195, "eval_f1": 0.7845130762948601, "eval_kappa": 0.7138788888139294, "eval_loss": 1.0672938823699951, "eval_precision": 0.786434208273758, "eval_recall": 0.7872753091123195, "eval_runtime": 75.511, "eval_samples_per_second": 382.368, "eval_steps_per_second": 191.191, "step": 101006 }, { "epoch": 2.01, "learning_rate": 1.1960873611468626e-05, "loss": 0.7632, "step": 101500 }, { "epoch": 2.02, "learning_rate": 1.192127200364335e-05, "loss": 0.7952, "step": 102000 }, { "epoch": 2.03, "learning_rate": 1.188167039581807e-05, "loss": 1.0146, "step": 102500 }, { "epoch": 2.04, "learning_rate": 1.1842068787992793e-05, "loss": 0.7993, "step": 103000 }, { "epoch": 2.05, "learning_rate": 1.1802467180167517e-05, "loss": 0.7097, "step": 103500 }, { "epoch": 2.06, "learning_rate": 1.1762865572342239e-05, "loss": 0.7502, "step": 104000 }, { "epoch": 2.07, "learning_rate": 1.172326396451696e-05, "loss": 0.8191, "step": 104500 }, { "epoch": 2.08, "learning_rate": 1.1683662356691684e-05, "loss": 0.8042, "step": 105000 }, { "epoch": 2.09, "learning_rate": 1.1644060748866406e-05, "loss": 0.8106, "step": 105500 }, { "epoch": 2.1, "learning_rate": 1.1604459141041126e-05, "loss": 0.8584, "step": 106000 }, { "epoch": 2.11, "learning_rate": 1.156485753321585e-05, "loss": 0.7914, "step": 106500 }, { "epoch": 2.12, "learning_rate": 1.1525255925390573e-05, "loss": 0.8682, "step": 107000 }, { "epoch": 2.13, "learning_rate": 1.1485654317565293e-05, "loss": 0.8331, "step": 107500 }, { "epoch": 2.14, "learning_rate": 1.1446052709740017e-05, "loss": 0.8967, "step": 108000 }, { "epoch": 2.15, "learning_rate": 1.140645110191474e-05, "loss": 0.8408, "step": 108500 }, { "epoch": 2.16, "learning_rate": 1.136684949408946e-05, "loss": 0.8152, "step": 109000 }, { "epoch": 2.17, "learning_rate": 1.1327247886264184e-05, "loss": 0.7146, "step": 109500 }, { "epoch": 2.18, "learning_rate": 1.1287646278438906e-05, "loss": 0.8338, "step": 110000 }, { "epoch": 2.19, "learning_rate": 1.1248044670613626e-05, "loss": 0.8648, "step": 110500 }, { "epoch": 2.2, "learning_rate": 1.120844306278835e-05, "loss": 0.94, "step": 111000 }, { "epoch": 2.21, "learning_rate": 1.1168841454963073e-05, "loss": 0.784, "step": 111500 }, { "epoch": 2.22, "learning_rate": 1.1129239847137793e-05, "loss": 0.8107, "step": 112000 }, { "epoch": 2.23, "learning_rate": 1.1089638239312517e-05, "loss": 0.8861, "step": 112500 }, { "epoch": 2.24, "learning_rate": 1.105003663148724e-05, "loss": 0.7589, "step": 113000 }, { "epoch": 2.25, "learning_rate": 1.101043502366196e-05, "loss": 0.8012, "step": 113500 }, { "epoch": 2.26, "learning_rate": 1.0970833415836684e-05, "loss": 0.8537, "step": 114000 }, { "epoch": 2.27, "learning_rate": 1.0931231808011406e-05, "loss": 0.7882, "step": 114500 }, { "epoch": 2.28, "learning_rate": 1.0891630200186128e-05, "loss": 0.8474, "step": 115000 }, { "epoch": 2.29, "learning_rate": 1.085202859236085e-05, "loss": 0.9111, "step": 115500 }, { "epoch": 2.3, "learning_rate": 1.0812426984535573e-05, "loss": 0.9118, "step": 116000 }, { "epoch": 2.31, "learning_rate": 1.0772825376710297e-05, "loss": 0.7777, "step": 116500 }, { "epoch": 2.32, "learning_rate": 1.0733223768885017e-05, "loss": 0.8525, "step": 117000 }, { "epoch": 2.33, "learning_rate": 1.069362216105974e-05, "loss": 0.7955, "step": 117500 }, { "epoch": 2.34, "learning_rate": 1.0654020553234463e-05, "loss": 0.8704, "step": 118000 }, { "epoch": 2.35, "learning_rate": 1.0614418945409184e-05, "loss": 0.8488, "step": 118500 }, { "epoch": 2.36, "learning_rate": 1.0574817337583908e-05, "loss": 0.7501, "step": 119000 }, { "epoch": 2.37, "learning_rate": 1.053521572975863e-05, "loss": 0.8638, "step": 119500 }, { "epoch": 2.38, "learning_rate": 1.049561412193335e-05, "loss": 0.8608, "step": 120000 }, { "epoch": 2.39, "learning_rate": 1.0456012514108074e-05, "loss": 0.7551, "step": 120500 }, { "epoch": 2.4, "learning_rate": 1.0416410906282797e-05, "loss": 0.7557, "step": 121000 }, { "epoch": 2.41, "learning_rate": 1.0376809298457517e-05, "loss": 0.8126, "step": 121500 }, { "epoch": 2.42, "learning_rate": 1.0337207690632241e-05, "loss": 0.7691, "step": 122000 }, { "epoch": 2.43, "learning_rate": 1.0297606082806963e-05, "loss": 0.839, "step": 122500 }, { "epoch": 2.44, "learning_rate": 1.0258004474981684e-05, "loss": 0.8673, "step": 123000 }, { "epoch": 2.45, "learning_rate": 1.0218402867156408e-05, "loss": 0.9438, "step": 123500 }, { "epoch": 2.46, "learning_rate": 1.017880125933113e-05, "loss": 0.6942, "step": 124000 }, { "epoch": 2.47, "learning_rate": 1.013919965150585e-05, "loss": 0.8687, "step": 124500 }, { "epoch": 2.48, "learning_rate": 1.0099598043680574e-05, "loss": 0.8989, "step": 125000 }, { "epoch": 2.49, "learning_rate": 1.0059996435855297e-05, "loss": 0.8685, "step": 125500 }, { "epoch": 2.49, "learning_rate": 1.0020394828030017e-05, "loss": 0.7819, "step": 126000 }, { "epoch": 2.5, "learning_rate": 9.980793220204741e-06, "loss": 0.8555, "step": 126500 }, { "epoch": 2.51, "learning_rate": 9.941191612379463e-06, "loss": 0.8755, "step": 127000 }, { "epoch": 2.52, "learning_rate": 9.901590004554186e-06, "loss": 0.6992, "step": 127500 }, { "epoch": 2.53, "learning_rate": 9.861988396728908e-06, "loss": 0.8049, "step": 128000 }, { "epoch": 2.54, "learning_rate": 9.82238678890363e-06, "loss": 0.7552, "step": 128500 }, { "epoch": 2.55, "learning_rate": 9.782785181078352e-06, "loss": 0.9123, "step": 129000 }, { "epoch": 2.56, "learning_rate": 9.743183573253075e-06, "loss": 0.8646, "step": 129500 }, { "epoch": 2.57, "learning_rate": 9.703581965427797e-06, "loss": 0.7248, "step": 130000 }, { "epoch": 2.58, "learning_rate": 9.663980357602519e-06, "loss": 0.7409, "step": 130500 }, { "epoch": 2.59, "learning_rate": 9.624378749777243e-06, "loss": 0.818, "step": 131000 }, { "epoch": 2.6, "learning_rate": 9.584777141951963e-06, "loss": 0.9093, "step": 131500 }, { "epoch": 2.61, "learning_rate": 9.545175534126686e-06, "loss": 0.9333, "step": 132000 }, { "epoch": 2.62, "learning_rate": 9.50557392630141e-06, "loss": 0.7346, "step": 132500 }, { "epoch": 2.63, "learning_rate": 9.46597231847613e-06, "loss": 0.9388, "step": 133000 }, { "epoch": 2.64, "learning_rate": 9.426370710650852e-06, "loss": 0.8087, "step": 133500 }, { "epoch": 2.65, "learning_rate": 9.386769102825576e-06, "loss": 0.7997, "step": 134000 }, { "epoch": 2.66, "learning_rate": 9.347167495000299e-06, "loss": 0.848, "step": 134500 }, { "epoch": 2.67, "learning_rate": 9.307565887175019e-06, "loss": 0.8125, "step": 135000 }, { "epoch": 2.68, "learning_rate": 9.267964279349743e-06, "loss": 0.8664, "step": 135500 }, { "epoch": 2.69, "learning_rate": 9.228362671524465e-06, "loss": 0.8433, "step": 136000 }, { "epoch": 2.7, "learning_rate": 9.188761063699186e-06, "loss": 0.8155, "step": 136500 }, { "epoch": 2.71, "learning_rate": 9.14915945587391e-06, "loss": 0.7922, "step": 137000 }, { "epoch": 2.72, "learning_rate": 9.109557848048632e-06, "loss": 0.8011, "step": 137500 }, { "epoch": 2.73, "learning_rate": 9.069956240223354e-06, "loss": 0.9205, "step": 138000 }, { "epoch": 2.74, "learning_rate": 9.030354632398076e-06, "loss": 0.7784, "step": 138500 }, { "epoch": 2.75, "learning_rate": 8.990753024572799e-06, "loss": 0.8606, "step": 139000 }, { "epoch": 2.76, "learning_rate": 8.951151416747521e-06, "loss": 0.7609, "step": 139500 }, { "epoch": 2.77, "learning_rate": 8.911549808922243e-06, "loss": 0.7485, "step": 140000 }, { "epoch": 2.78, "learning_rate": 8.871948201096965e-06, "loss": 0.845, "step": 140500 }, { "epoch": 2.79, "learning_rate": 8.832346593271688e-06, "loss": 0.79, "step": 141000 }, { "epoch": 2.8, "learning_rate": 8.79274498544641e-06, "loss": 0.8645, "step": 141500 }, { "epoch": 2.81, "learning_rate": 8.753143377621132e-06, "loss": 0.8348, "step": 142000 }, { "epoch": 2.82, "learning_rate": 8.713541769795854e-06, "loss": 0.7997, "step": 142500 }, { "epoch": 2.83, "learning_rate": 8.673940161970576e-06, "loss": 0.9206, "step": 143000 }, { "epoch": 2.84, "learning_rate": 8.634338554145299e-06, "loss": 0.8132, "step": 143500 }, { "epoch": 2.85, "learning_rate": 8.594736946320021e-06, "loss": 0.7391, "step": 144000 }, { "epoch": 2.86, "learning_rate": 8.555135338494743e-06, "loss": 0.7425, "step": 144500 }, { "epoch": 2.87, "learning_rate": 8.515533730669467e-06, "loss": 0.7782, "step": 145000 }, { "epoch": 2.88, "learning_rate": 8.475932122844188e-06, "loss": 0.7929, "step": 145500 }, { "epoch": 2.89, "learning_rate": 8.43633051501891e-06, "loss": 0.8021, "step": 146000 }, { "epoch": 2.9, "learning_rate": 8.396728907193634e-06, "loss": 0.7632, "step": 146500 }, { "epoch": 2.91, "learning_rate": 8.357127299368354e-06, "loss": 0.8913, "step": 147000 }, { "epoch": 2.92, "learning_rate": 8.317525691543077e-06, "loss": 0.7637, "step": 147500 }, { "epoch": 2.93, "learning_rate": 8.2779240837178e-06, "loss": 0.9567, "step": 148000 }, { "epoch": 2.94, "learning_rate": 8.238322475892521e-06, "loss": 0.7907, "step": 148500 }, { "epoch": 2.95, "learning_rate": 8.198720868067243e-06, "loss": 0.8514, "step": 149000 }, { "epoch": 2.96, "learning_rate": 8.159119260241967e-06, "loss": 0.8256, "step": 149500 }, { "epoch": 2.97, "learning_rate": 8.119517652416688e-06, "loss": 0.8798, "step": 150000 }, { "epoch": 2.98, "learning_rate": 8.079916044591412e-06, "loss": 0.8104, "step": 150500 }, { "epoch": 2.99, "learning_rate": 8.040314436766134e-06, "loss": 0.836, "step": 151000 }, { "epoch": 3.0, "learning_rate": 8.000712828940856e-06, "loss": 0.7485, "step": 151500 }, { "epoch": 3.0, "eval_accuracy": 0.7875177501471964, "eval_f1": 0.7860011498691125, "eval_kappa": 0.7158727986038643, "eval_loss": 1.1474955081939697, "eval_precision": 0.7861810623391806, "eval_recall": 0.7875177501471964, "eval_runtime": 75.6215, "eval_samples_per_second": 381.809, "eval_steps_per_second": 190.911, "step": 151509 }, { "epoch": 3.01, "learning_rate": 7.961111221115578e-06, "loss": 0.7403, "step": 152000 }, { "epoch": 3.02, "learning_rate": 7.9215096132903e-06, "loss": 0.7638, "step": 152500 }, { "epoch": 3.03, "learning_rate": 7.881908005465023e-06, "loss": 0.7803, "step": 153000 }, { "epoch": 3.04, "learning_rate": 7.842306397639745e-06, "loss": 0.7332, "step": 153500 }, { "epoch": 3.05, "learning_rate": 7.802704789814467e-06, "loss": 0.6525, "step": 154000 }, { "epoch": 3.06, "learning_rate": 7.76310318198919e-06, "loss": 0.7092, "step": 154500 }, { "epoch": 3.07, "learning_rate": 7.723501574163912e-06, "loss": 0.7231, "step": 155000 }, { "epoch": 3.08, "learning_rate": 7.683899966338634e-06, "loss": 0.6819, "step": 155500 }, { "epoch": 3.09, "learning_rate": 7.644298358513356e-06, "loss": 0.6336, "step": 156000 }, { "epoch": 3.1, "learning_rate": 7.604696750688078e-06, "loss": 0.7541, "step": 156500 }, { "epoch": 3.11, "learning_rate": 7.565095142862801e-06, "loss": 0.7551, "step": 157000 }, { "epoch": 3.12, "learning_rate": 7.525493535037524e-06, "loss": 0.6822, "step": 157500 }, { "epoch": 3.13, "learning_rate": 7.485891927212245e-06, "loss": 0.6976, "step": 158000 }, { "epoch": 3.14, "learning_rate": 7.446290319386967e-06, "loss": 0.7348, "step": 158500 }, { "epoch": 3.15, "learning_rate": 7.40668871156169e-06, "loss": 0.7137, "step": 159000 }, { "epoch": 3.16, "learning_rate": 7.367087103736413e-06, "loss": 0.6911, "step": 159500 }, { "epoch": 3.17, "learning_rate": 7.327485495911134e-06, "loss": 0.7046, "step": 160000 }, { "epoch": 3.18, "learning_rate": 7.287883888085857e-06, "loss": 0.6457, "step": 160500 }, { "epoch": 3.19, "learning_rate": 7.248282280260579e-06, "loss": 0.7221, "step": 161000 }, { "epoch": 3.2, "learning_rate": 7.208680672435302e-06, "loss": 0.7766, "step": 161500 }, { "epoch": 3.21, "learning_rate": 7.169079064610024e-06, "loss": 0.7972, "step": 162000 }, { "epoch": 3.22, "learning_rate": 7.129477456784746e-06, "loss": 0.6912, "step": 162500 }, { "epoch": 3.23, "learning_rate": 7.089875848959469e-06, "loss": 0.7636, "step": 163000 }, { "epoch": 3.24, "learning_rate": 7.0502742411341905e-06, "loss": 0.7225, "step": 163500 }, { "epoch": 3.25, "learning_rate": 7.010672633308913e-06, "loss": 0.8146, "step": 164000 }, { "epoch": 3.26, "learning_rate": 6.971071025483636e-06, "loss": 0.7418, "step": 164500 }, { "epoch": 3.27, "learning_rate": 6.931469417658357e-06, "loss": 0.6402, "step": 165000 }, { "epoch": 3.28, "learning_rate": 6.891867809833079e-06, "loss": 0.7651, "step": 165500 }, { "epoch": 3.29, "learning_rate": 6.8522662020078025e-06, "loss": 0.7056, "step": 166000 }, { "epoch": 3.3, "learning_rate": 6.812664594182524e-06, "loss": 0.7975, "step": 166500 }, { "epoch": 3.31, "learning_rate": 6.773062986357246e-06, "loss": 0.762, "step": 167000 }, { "epoch": 3.32, "learning_rate": 6.733461378531969e-06, "loss": 0.778, "step": 167500 }, { "epoch": 3.33, "learning_rate": 6.693859770706691e-06, "loss": 0.7588, "step": 168000 }, { "epoch": 3.34, "learning_rate": 6.654258162881414e-06, "loss": 0.7555, "step": 168500 }, { "epoch": 3.35, "learning_rate": 6.614656555056136e-06, "loss": 0.6292, "step": 169000 }, { "epoch": 3.36, "learning_rate": 6.575054947230858e-06, "loss": 0.6454, "step": 169500 }, { "epoch": 3.37, "learning_rate": 6.535453339405581e-06, "loss": 0.8678, "step": 170000 }, { "epoch": 3.38, "learning_rate": 6.4958517315803025e-06, "loss": 0.7143, "step": 170500 }, { "epoch": 3.39, "learning_rate": 6.456250123755025e-06, "loss": 0.6987, "step": 171000 }, { "epoch": 3.4, "learning_rate": 6.416648515929748e-06, "loss": 0.6458, "step": 171500 }, { "epoch": 3.41, "learning_rate": 6.377046908104469e-06, "loss": 0.7216, "step": 172000 }, { "epoch": 3.42, "learning_rate": 6.3374453002791915e-06, "loss": 0.7495, "step": 172500 }, { "epoch": 3.43, "learning_rate": 6.2978436924539145e-06, "loss": 0.7331, "step": 173000 }, { "epoch": 3.44, "learning_rate": 6.258242084628636e-06, "loss": 0.7314, "step": 173500 }, { "epoch": 3.45, "learning_rate": 6.218640476803359e-06, "loss": 0.7403, "step": 174000 }, { "epoch": 3.46, "learning_rate": 6.179038868978081e-06, "loss": 0.8834, "step": 174500 }, { "epoch": 3.47, "learning_rate": 6.139437261152803e-06, "loss": 0.7251, "step": 175000 }, { "epoch": 3.48, "learning_rate": 6.099835653327526e-06, "loss": 0.7906, "step": 175500 }, { "epoch": 3.48, "learning_rate": 6.060234045502248e-06, "loss": 0.6149, "step": 176000 }, { "epoch": 3.49, "learning_rate": 6.02063243767697e-06, "loss": 0.8147, "step": 176500 }, { "epoch": 3.5, "learning_rate": 5.981030829851692e-06, "loss": 0.4999, "step": 177000 }, { "epoch": 3.51, "learning_rate": 5.941429222026415e-06, "loss": 0.6705, "step": 177500 }, { "epoch": 3.52, "learning_rate": 5.901827614201137e-06, "loss": 0.7327, "step": 178000 }, { "epoch": 3.53, "learning_rate": 5.86222600637586e-06, "loss": 0.7145, "step": 178500 }, { "epoch": 3.54, "learning_rate": 5.822624398550581e-06, "loss": 0.755, "step": 179000 }, { "epoch": 3.55, "learning_rate": 5.7830227907253035e-06, "loss": 0.7146, "step": 179500 }, { "epoch": 3.56, "learning_rate": 5.743421182900027e-06, "loss": 0.7128, "step": 180000 }, { "epoch": 3.57, "learning_rate": 5.703819575074748e-06, "loss": 0.8053, "step": 180500 }, { "epoch": 3.58, "learning_rate": 5.664217967249471e-06, "loss": 0.6797, "step": 181000 }, { "epoch": 3.59, "learning_rate": 5.624616359424193e-06, "loss": 0.7759, "step": 181500 }, { "epoch": 3.6, "learning_rate": 5.585014751598915e-06, "loss": 0.7025, "step": 182000 }, { "epoch": 3.61, "learning_rate": 5.545413143773638e-06, "loss": 0.841, "step": 182500 }, { "epoch": 3.62, "learning_rate": 5.50581153594836e-06, "loss": 0.7091, "step": 183000 }, { "epoch": 3.63, "learning_rate": 5.466209928123082e-06, "loss": 0.7714, "step": 183500 }, { "epoch": 3.64, "learning_rate": 5.4266083202978045e-06, "loss": 0.6947, "step": 184000 }, { "epoch": 3.65, "learning_rate": 5.387006712472527e-06, "loss": 0.6951, "step": 184500 }, { "epoch": 3.66, "learning_rate": 5.347405104647249e-06, "loss": 0.7448, "step": 185000 }, { "epoch": 3.67, "learning_rate": 5.307803496821972e-06, "loss": 0.7157, "step": 185500 }, { "epoch": 3.68, "learning_rate": 5.268201888996693e-06, "loss": 0.7899, "step": 186000 }, { "epoch": 3.69, "learning_rate": 5.2286002811714165e-06, "loss": 0.7614, "step": 186500 }, { "epoch": 3.7, "learning_rate": 5.188998673346139e-06, "loss": 0.6748, "step": 187000 }, { "epoch": 3.71, "learning_rate": 5.14939706552086e-06, "loss": 0.775, "step": 187500 }, { "epoch": 3.72, "learning_rate": 5.109795457695583e-06, "loss": 0.5913, "step": 188000 }, { "epoch": 3.73, "learning_rate": 5.070193849870305e-06, "loss": 0.781, "step": 188500 }, { "epoch": 3.74, "learning_rate": 5.030592242045027e-06, "loss": 0.7484, "step": 189000 }, { "epoch": 3.75, "learning_rate": 4.99099063421975e-06, "loss": 0.8642, "step": 189500 }, { "epoch": 3.76, "learning_rate": 4.951389026394472e-06, "loss": 0.7611, "step": 190000 }, { "epoch": 3.77, "learning_rate": 4.911787418569194e-06, "loss": 0.6992, "step": 190500 }, { "epoch": 3.78, "learning_rate": 4.8721858107439165e-06, "loss": 0.7199, "step": 191000 }, { "epoch": 3.79, "learning_rate": 4.832584202918639e-06, "loss": 0.72, "step": 191500 }, { "epoch": 3.8, "learning_rate": 4.792982595093361e-06, "loss": 0.63, "step": 192000 }, { "epoch": 3.81, "learning_rate": 4.753380987268083e-06, "loss": 0.7819, "step": 192500 }, { "epoch": 3.82, "learning_rate": 4.7137793794428055e-06, "loss": 0.8278, "step": 193000 }, { "epoch": 3.83, "learning_rate": 4.674177771617528e-06, "loss": 0.8617, "step": 193500 }, { "epoch": 3.84, "learning_rate": 4.634576163792251e-06, "loss": 0.7137, "step": 194000 }, { "epoch": 3.85, "learning_rate": 4.594974555966973e-06, "loss": 0.8056, "step": 194500 }, { "epoch": 3.86, "learning_rate": 4.555372948141694e-06, "loss": 0.7433, "step": 195000 }, { "epoch": 3.87, "learning_rate": 4.5157713403164175e-06, "loss": 0.7577, "step": 195500 }, { "epoch": 3.88, "learning_rate": 4.47616973249114e-06, "loss": 0.6781, "step": 196000 }, { "epoch": 3.89, "learning_rate": 4.436568124665862e-06, "loss": 0.7327, "step": 196500 }, { "epoch": 3.9, "learning_rate": 4.396966516840584e-06, "loss": 0.6053, "step": 197000 }, { "epoch": 3.91, "learning_rate": 4.357364909015306e-06, "loss": 0.8182, "step": 197500 }, { "epoch": 3.92, "learning_rate": 4.317763301190029e-06, "loss": 0.6552, "step": 198000 }, { "epoch": 3.93, "learning_rate": 4.278161693364751e-06, "loss": 0.708, "step": 198500 }, { "epoch": 3.94, "learning_rate": 4.238560085539473e-06, "loss": 0.7182, "step": 199000 }, { "epoch": 3.95, "learning_rate": 4.198958477714195e-06, "loss": 0.7163, "step": 199500 }, { "epoch": 3.96, "learning_rate": 4.159356869888918e-06, "loss": 0.6284, "step": 200000 }, { "epoch": 3.97, "learning_rate": 4.11975526206364e-06, "loss": 0.7379, "step": 200500 }, { "epoch": 3.98, "learning_rate": 4.080153654238362e-06, "loss": 0.6856, "step": 201000 }, { "epoch": 3.99, "learning_rate": 4.040552046413085e-06, "loss": 0.8066, "step": 201500 }, { "epoch": 4.0, "learning_rate": 4.0009504385878065e-06, "loss": 0.7771, "step": 202000 }, { "epoch": 4.0, "eval_accuracy": 0.7868943303432272, "eval_f1": 0.7853190367285746, "eval_kappa": 0.7154440623938234, "eval_loss": 1.2119886875152588, "eval_precision": 0.78613565094685, "eval_recall": 0.7868943303432272, "eval_runtime": 75.5516, "eval_samples_per_second": 382.163, "eval_steps_per_second": 191.088, "step": 202012 }, { "epoch": 4.01, "learning_rate": 3.9613488307625295e-06, "loss": 0.6296, "step": 202500 }, { "epoch": 4.02, "learning_rate": 3.921747222937252e-06, "loss": 0.6372, "step": 203000 }, { "epoch": 4.03, "learning_rate": 3.882145615111974e-06, "loss": 0.6925, "step": 203500 }, { "epoch": 4.04, "learning_rate": 3.842544007286696e-06, "loss": 0.6363, "step": 204000 }, { "epoch": 4.05, "learning_rate": 3.8029423994614185e-06, "loss": 0.6417, "step": 204500 }, { "epoch": 4.06, "learning_rate": 3.7633407916361407e-06, "loss": 0.7023, "step": 205000 }, { "epoch": 4.07, "learning_rate": 3.7237391838108633e-06, "loss": 0.5014, "step": 205500 }, { "epoch": 4.08, "learning_rate": 3.684137575985585e-06, "loss": 0.5731, "step": 206000 }, { "epoch": 4.09, "learning_rate": 3.644535968160308e-06, "loss": 0.6207, "step": 206500 }, { "epoch": 4.1, "learning_rate": 3.60493436033503e-06, "loss": 0.6886, "step": 207000 }, { "epoch": 4.11, "learning_rate": 3.565332752509752e-06, "loss": 0.5521, "step": 207500 }, { "epoch": 4.12, "learning_rate": 3.5257311446844745e-06, "loss": 0.6991, "step": 208000 }, { "epoch": 4.13, "learning_rate": 3.4861295368591967e-06, "loss": 0.727, "step": 208500 }, { "epoch": 4.14, "learning_rate": 3.4465279290339194e-06, "loss": 0.6307, "step": 209000 }, { "epoch": 4.15, "learning_rate": 3.406926321208641e-06, "loss": 0.6373, "step": 209500 }, { "epoch": 4.16, "learning_rate": 3.367324713383364e-06, "loss": 0.7613, "step": 210000 }, { "epoch": 4.17, "learning_rate": 3.327723105558086e-06, "loss": 0.7644, "step": 210500 }, { "epoch": 4.18, "learning_rate": 3.288121497732808e-06, "loss": 0.636, "step": 211000 }, { "epoch": 4.19, "learning_rate": 3.2485198899075305e-06, "loss": 0.6597, "step": 211500 }, { "epoch": 4.2, "learning_rate": 3.2089182820822528e-06, "loss": 0.618, "step": 212000 }, { "epoch": 4.21, "learning_rate": 3.1693166742569754e-06, "loss": 0.6683, "step": 212500 }, { "epoch": 4.22, "learning_rate": 3.1297150664316972e-06, "loss": 0.6481, "step": 213000 }, { "epoch": 4.23, "learning_rate": 3.0901134586064194e-06, "loss": 0.6668, "step": 213500 }, { "epoch": 4.24, "learning_rate": 3.050511850781142e-06, "loss": 0.7244, "step": 214000 }, { "epoch": 4.25, "learning_rate": 3.010910242955864e-06, "loss": 0.7062, "step": 214500 }, { "epoch": 4.26, "learning_rate": 2.9713086351305866e-06, "loss": 0.6242, "step": 215000 }, { "epoch": 4.27, "learning_rate": 2.931707027305309e-06, "loss": 0.7101, "step": 215500 }, { "epoch": 4.28, "learning_rate": 2.8921054194800314e-06, "loss": 0.7261, "step": 216000 }, { "epoch": 4.29, "learning_rate": 2.8525038116547533e-06, "loss": 0.6079, "step": 216500 }, { "epoch": 4.3, "learning_rate": 2.8129022038294755e-06, "loss": 0.6083, "step": 217000 }, { "epoch": 4.31, "learning_rate": 2.773300596004198e-06, "loss": 0.66, "step": 217500 }, { "epoch": 4.32, "learning_rate": 2.7336989881789204e-06, "loss": 0.6848, "step": 218000 }, { "epoch": 4.33, "learning_rate": 2.6940973803536426e-06, "loss": 0.6737, "step": 218500 }, { "epoch": 4.34, "learning_rate": 2.654495772528365e-06, "loss": 0.6204, "step": 219000 }, { "epoch": 4.35, "learning_rate": 2.6148941647030875e-06, "loss": 0.7143, "step": 219500 }, { "epoch": 4.36, "learning_rate": 2.5752925568778093e-06, "loss": 0.6472, "step": 220000 }, { "epoch": 4.37, "learning_rate": 2.5356909490525315e-06, "loss": 0.5789, "step": 220500 }, { "epoch": 4.38, "learning_rate": 2.496089341227254e-06, "loss": 0.5995, "step": 221000 }, { "epoch": 4.39, "learning_rate": 2.4564877334019764e-06, "loss": 0.5762, "step": 221500 }, { "epoch": 4.4, "learning_rate": 2.4168861255766986e-06, "loss": 0.6906, "step": 222000 }, { "epoch": 4.41, "learning_rate": 2.377284517751421e-06, "loss": 0.7736, "step": 222500 }, { "epoch": 4.42, "learning_rate": 2.3376829099261435e-06, "loss": 0.6102, "step": 223000 }, { "epoch": 4.43, "learning_rate": 2.2980813021008653e-06, "loss": 0.6308, "step": 223500 }, { "epoch": 4.44, "learning_rate": 2.2584796942755876e-06, "loss": 0.6342, "step": 224000 }, { "epoch": 4.45, "learning_rate": 2.21887808645031e-06, "loss": 0.6648, "step": 224500 }, { "epoch": 4.46, "learning_rate": 2.1792764786250324e-06, "loss": 0.6703, "step": 225000 }, { "epoch": 4.47, "learning_rate": 2.1396748707997547e-06, "loss": 0.5516, "step": 225500 }, { "epoch": 4.47, "learning_rate": 2.100073262974477e-06, "loss": 0.5947, "step": 226000 }, { "epoch": 4.48, "learning_rate": 2.060471655149199e-06, "loss": 0.6167, "step": 226500 }, { "epoch": 4.49, "learning_rate": 2.0208700473239214e-06, "loss": 0.6554, "step": 227000 }, { "epoch": 4.5, "learning_rate": 1.9812684394986436e-06, "loss": 0.5821, "step": 227500 }, { "epoch": 4.51, "learning_rate": 1.9416668316733662e-06, "loss": 0.7354, "step": 228000 }, { "epoch": 4.52, "learning_rate": 1.9020652238480883e-06, "loss": 0.5839, "step": 228500 }, { "epoch": 4.53, "learning_rate": 1.8624636160228107e-06, "loss": 0.5959, "step": 229000 }, { "epoch": 4.54, "learning_rate": 1.822862008197533e-06, "loss": 0.6615, "step": 229500 }, { "epoch": 4.55, "learning_rate": 1.7832604003722554e-06, "loss": 0.7699, "step": 230000 }, { "epoch": 4.56, "learning_rate": 1.7436587925469776e-06, "loss": 0.5697, "step": 230500 }, { "epoch": 4.57, "learning_rate": 1.7040571847217e-06, "loss": 0.587, "step": 231000 }, { "epoch": 4.58, "learning_rate": 1.664455576896422e-06, "loss": 0.6322, "step": 231500 }, { "epoch": 4.59, "learning_rate": 1.6248539690711443e-06, "loss": 0.6551, "step": 232000 }, { "epoch": 4.6, "learning_rate": 1.5852523612458667e-06, "loss": 0.7344, "step": 232500 }, { "epoch": 4.61, "learning_rate": 1.545650753420589e-06, "loss": 0.6996, "step": 233000 }, { "epoch": 4.62, "learning_rate": 1.5060491455953114e-06, "loss": 0.6933, "step": 233500 }, { "epoch": 4.63, "learning_rate": 1.4664475377700334e-06, "loss": 0.6981, "step": 234000 }, { "epoch": 4.64, "learning_rate": 1.4268459299447559e-06, "loss": 0.7396, "step": 234500 }, { "epoch": 4.65, "learning_rate": 1.3872443221194781e-06, "loss": 0.6504, "step": 235000 }, { "epoch": 4.66, "learning_rate": 1.3476427142942005e-06, "loss": 0.6253, "step": 235500 }, { "epoch": 4.67, "learning_rate": 1.3080411064689228e-06, "loss": 0.5895, "step": 236000 }, { "epoch": 4.68, "learning_rate": 1.268439498643645e-06, "loss": 0.641, "step": 236500 }, { "epoch": 4.69, "learning_rate": 1.2288378908183675e-06, "loss": 0.5764, "step": 237000 }, { "epoch": 4.7, "learning_rate": 1.1892362829930897e-06, "loss": 0.641, "step": 237500 }, { "epoch": 4.71, "learning_rate": 1.149634675167812e-06, "loss": 0.5424, "step": 238000 }, { "epoch": 4.72, "learning_rate": 1.1100330673425341e-06, "loss": 0.7001, "step": 238500 }, { "epoch": 4.73, "learning_rate": 1.0704314595172564e-06, "loss": 0.7445, "step": 239000 }, { "epoch": 4.74, "learning_rate": 1.0308298516919788e-06, "loss": 0.7469, "step": 239500 }, { "epoch": 4.75, "learning_rate": 9.91228243866701e-07, "loss": 0.6584, "step": 240000 }, { "epoch": 4.76, "learning_rate": 9.516266360414234e-07, "loss": 0.6375, "step": 240500 }, { "epoch": 4.77, "learning_rate": 9.120250282161456e-07, "loss": 0.6724, "step": 241000 }, { "epoch": 4.78, "learning_rate": 8.72423420390868e-07, "loss": 0.5083, "step": 241500 }, { "epoch": 4.79, "learning_rate": 8.328218125655903e-07, "loss": 0.6267, "step": 242000 }, { "epoch": 4.8, "learning_rate": 7.932202047403126e-07, "loss": 0.7312, "step": 242500 }, { "epoch": 4.81, "learning_rate": 7.536185969150347e-07, "loss": 0.5861, "step": 243000 }, { "epoch": 4.82, "learning_rate": 7.140169890897571e-07, "loss": 0.7772, "step": 243500 }, { "epoch": 4.83, "learning_rate": 6.744153812644793e-07, "loss": 0.6953, "step": 244000 }, { "epoch": 4.84, "learning_rate": 6.348137734392017e-07, "loss": 0.6275, "step": 244500 }, { "epoch": 4.85, "learning_rate": 5.95212165613924e-07, "loss": 0.6203, "step": 245000 }, { "epoch": 4.86, "learning_rate": 5.556105577886463e-07, "loss": 0.5098, "step": 245500 }, { "epoch": 4.87, "learning_rate": 5.160089499633686e-07, "loss": 0.6237, "step": 246000 }, { "epoch": 4.88, "learning_rate": 4.7640734213809084e-07, "loss": 0.5705, "step": 246500 }, { "epoch": 4.89, "learning_rate": 4.368057343128131e-07, "loss": 0.6605, "step": 247000 }, { "epoch": 4.9, "learning_rate": 3.9720412648753546e-07, "loss": 0.653, "step": 247500 }, { "epoch": 4.91, "learning_rate": 3.5760251866225774e-07, "loss": 0.6499, "step": 248000 }, { "epoch": 4.92, "learning_rate": 3.1800091083697997e-07, "loss": 0.6469, "step": 248500 }, { "epoch": 4.93, "learning_rate": 2.783993030117023e-07, "loss": 0.5937, "step": 249000 }, { "epoch": 4.94, "learning_rate": 2.387976951864246e-07, "loss": 0.7448, "step": 249500 }, { "epoch": 4.95, "learning_rate": 1.9919608736114687e-07, "loss": 0.6844, "step": 250000 }, { "epoch": 4.96, "learning_rate": 1.5959447953586918e-07, "loss": 0.6315, "step": 250500 }, { "epoch": 4.97, "learning_rate": 1.1999287171059147e-07, "loss": 0.6194, "step": 251000 }, { "epoch": 4.98, "learning_rate": 8.039126388531375e-08, "loss": 0.6681, "step": 251500 }, { "epoch": 4.99, "learning_rate": 4.078965606003604e-08, "loss": 0.7031, "step": 252000 }, { "epoch": 5.0, "learning_rate": 1.1880482347583313e-09, "loss": 0.6563, "step": 252500 }, { "epoch": 5.0, "eval_accuracy": 0.7867904270425657, "eval_f1": 0.7858255807919805, "eval_kappa": 0.7156714057157751, "eval_loss": 1.2860811948776245, "eval_precision": 0.786926563144229, "eval_recall": 0.7867904270425657, "eval_runtime": 77.0835, "eval_samples_per_second": 374.568, "eval_steps_per_second": 187.29, "step": 252515 } ], "max_steps": 252515, "num_train_epochs": 5, "total_flos": 9657176634040320.0, "trial_name": null, "trial_params": null }