{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 176630, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.8307761988337205e-09, "loss": 0.7015, "step": 1 }, { "epoch": 0.01, "learning_rate": 7.0769404970843e-07, "loss": 0.6985, "step": 250 }, { "epoch": 0.03, "learning_rate": 1.41538809941686e-06, "loss": 0.6961, "step": 500 }, { "epoch": 0.04, "learning_rate": 2.12308214912529e-06, "loss": 0.6965, "step": 750 }, { "epoch": 0.06, "learning_rate": 2.83077619883372e-06, "loss": 0.6961, "step": 1000 }, { "epoch": 0.07, "learning_rate": 3.538470248542151e-06, "loss": 0.6945, "step": 1250 }, { "epoch": 0.08, "learning_rate": 4.24616429825058e-06, "loss": 0.6941, "step": 1500 }, { "epoch": 0.1, "learning_rate": 4.953858347959011e-06, "loss": 0.6953, "step": 1750 }, { "epoch": 0.11, "learning_rate": 5.66155239766744e-06, "loss": 0.6938, "step": 2000 }, { "epoch": 0.13, "learning_rate": 6.36924644737587e-06, "loss": 0.6939, "step": 2250 }, { "epoch": 0.14, "learning_rate": 7.076940497084302e-06, "loss": 0.6943, "step": 2500 }, { "epoch": 0.16, "learning_rate": 7.78463454679273e-06, "loss": 0.694, "step": 2750 }, { "epoch": 0.17, "learning_rate": 8.49232859650116e-06, "loss": 0.695, "step": 3000 }, { "epoch": 0.18, "learning_rate": 9.200022646209591e-06, "loss": 0.6937, "step": 3250 }, { "epoch": 0.2, "learning_rate": 9.907716695918021e-06, "loss": 0.6944, "step": 3500 }, { "epoch": 0.21, "learning_rate": 1.0615410745626452e-05, "loss": 0.6954, "step": 3750 }, { "epoch": 0.23, "learning_rate": 1.132310479533488e-05, "loss": 0.6955, "step": 4000 }, { "epoch": 0.24, "learning_rate": 1.2030798845043312e-05, "loss": 0.6959, "step": 4250 }, { "epoch": 0.25, "learning_rate": 1.273849289475174e-05, "loss": 0.6926, "step": 4500 }, { "epoch": 0.27, "learning_rate": 1.3446186944460171e-05, "loss": 0.6946, "step": 4750 }, { "epoch": 0.28, "learning_rate": 1.4153880994168603e-05, "loss": 0.6934, "step": 5000 }, { "epoch": 0.3, "learning_rate": 1.4861575043877032e-05, "loss": 0.6956, "step": 5250 }, { "epoch": 0.31, "learning_rate": 1.556926909358546e-05, "loss": 0.6936, "step": 5500 }, { "epoch": 0.33, "learning_rate": 1.627696314329389e-05, "loss": 0.6944, "step": 5750 }, { "epoch": 0.34, "learning_rate": 1.698465719300232e-05, "loss": 0.6927, "step": 6000 }, { "epoch": 0.35, "learning_rate": 1.769235124271075e-05, "loss": 0.6959, "step": 6250 }, { "epoch": 0.37, "learning_rate": 1.8400045292419182e-05, "loss": 0.6954, "step": 6500 }, { "epoch": 0.38, "learning_rate": 1.9107739342127612e-05, "loss": 0.6931, "step": 6750 }, { "epoch": 0.4, "learning_rate": 1.9815433391836042e-05, "loss": 0.6948, "step": 7000 }, { "epoch": 0.41, "learning_rate": 2.0523127441544473e-05, "loss": 0.694, "step": 7250 }, { "epoch": 0.42, "learning_rate": 2.1230821491252903e-05, "loss": 0.6955, "step": 7500 }, { "epoch": 0.44, "learning_rate": 2.1938515540961333e-05, "loss": 0.6939, "step": 7750 }, { "epoch": 0.45, "learning_rate": 2.264620959066976e-05, "loss": 0.6944, "step": 8000 }, { "epoch": 0.47, "learning_rate": 2.3353903640378194e-05, "loss": 0.695, "step": 8250 }, { "epoch": 0.48, "learning_rate": 2.4061597690086624e-05, "loss": 0.6968, "step": 8500 }, { "epoch": 0.5, "learning_rate": 2.476929173979505e-05, "loss": 0.6958, "step": 8750 }, { "epoch": 0.51, "learning_rate": 2.547698578950348e-05, "loss": 0.6948, "step": 9000 }, { "epoch": 0.52, "learning_rate": 2.6184679839211912e-05, "loss": 0.6948, "step": 9250 }, { "epoch": 0.54, "learning_rate": 2.6892373888920342e-05, "loss": 0.6965, "step": 9500 }, { "epoch": 0.55, "learning_rate": 2.760006793862877e-05, "loss": 0.6935, "step": 9750 }, { "epoch": 0.57, "learning_rate": 2.8307761988337206e-05, "loss": 0.6949, "step": 10000 }, { "epoch": 0.58, "learning_rate": 2.9015456038045637e-05, "loss": 0.6949, "step": 10250 }, { "epoch": 0.59, "learning_rate": 2.9723150087754064e-05, "loss": 0.6946, "step": 10500 }, { "epoch": 0.61, "learning_rate": 3.0430844137462494e-05, "loss": 0.6944, "step": 10750 }, { "epoch": 0.62, "learning_rate": 3.113853818717092e-05, "loss": 0.694, "step": 11000 }, { "epoch": 0.64, "learning_rate": 3.184623223687935e-05, "loss": 0.6934, "step": 11250 }, { "epoch": 0.65, "learning_rate": 3.255109551038895e-05, "loss": 0.6955, "step": 11500 }, { "epoch": 0.67, "learning_rate": 3.325878956009738e-05, "loss": 0.6953, "step": 11750 }, { "epoch": 0.68, "learning_rate": 3.396648360980581e-05, "loss": 0.6957, "step": 12000 }, { "epoch": 0.69, "learning_rate": 3.4674177659514237e-05, "loss": 0.6939, "step": 12250 }, { "epoch": 0.71, "learning_rate": 3.538187170922267e-05, "loss": 0.6956, "step": 12500 }, { "epoch": 0.72, "learning_rate": 3.60895657589311e-05, "loss": 0.6948, "step": 12750 }, { "epoch": 0.74, "learning_rate": 3.679725980863953e-05, "loss": 0.6936, "step": 13000 }, { "epoch": 0.75, "learning_rate": 3.7504953858347965e-05, "loss": 0.6949, "step": 13250 }, { "epoch": 0.76, "learning_rate": 3.820981713185756e-05, "loss": 0.6941, "step": 13500 }, { "epoch": 0.78, "learning_rate": 3.891751118156598e-05, "loss": 0.6941, "step": 13750 }, { "epoch": 0.79, "learning_rate": 3.962520523127441e-05, "loss": 0.695, "step": 14000 }, { "epoch": 0.81, "learning_rate": 4.033289928098285e-05, "loss": 0.6957, "step": 14250 }, { "epoch": 0.82, "learning_rate": 4.104059333069128e-05, "loss": 0.6949, "step": 14500 }, { "epoch": 0.84, "learning_rate": 4.174828738039971e-05, "loss": 0.6945, "step": 14750 }, { "epoch": 0.85, "learning_rate": 4.245598143010814e-05, "loss": 0.6943, "step": 15000 }, { "epoch": 0.86, "learning_rate": 4.3163675479816565e-05, "loss": 0.6952, "step": 15250 }, { "epoch": 0.88, "learning_rate": 4.3871369529524995e-05, "loss": 0.6939, "step": 15500 }, { "epoch": 0.89, "learning_rate": 4.4579063579233425e-05, "loss": 0.6952, "step": 15750 }, { "epoch": 0.91, "learning_rate": 4.5283926852743026e-05, "loss": 0.6946, "step": 16000 }, { "epoch": 0.92, "learning_rate": 4.599162090245146e-05, "loss": 0.6939, "step": 16250 }, { "epoch": 0.93, "learning_rate": 4.669931495215989e-05, "loss": 0.695, "step": 16500 }, { "epoch": 0.95, "learning_rate": 4.740700900186831e-05, "loss": 0.6945, "step": 16750 }, { "epoch": 0.96, "learning_rate": 4.811470305157674e-05, "loss": 0.6938, "step": 17000 }, { "epoch": 0.98, "learning_rate": 4.882239710128517e-05, "loss": 0.6946, "step": 17250 }, { "epoch": 0.99, "learning_rate": 4.953009115099361e-05, "loss": 0.6943, "step": 17500 }, { "epoch": 1.0, "eval_accuracy": 0.5002547770700637, "eval_f1": 0.33344654835696697, "eval_loss": 0.6931106448173523, "eval_precision": 0.25012738853503186, "eval_recall": 0.5, "eval_runtime": 12.7276, "eval_samples_per_second": 616.771, "eval_steps_per_second": 77.155, "step": 17663 }, { "epoch": 1.0, "learning_rate": 4.999996555277244e-05, "loss": 0.6939, "step": 17750 }, { "epoch": 1.02, "learning_rate": 4.999945538806009e-05, "loss": 0.6931, "step": 18000 }, { "epoch": 1.03, "learning_rate": 4.999834068779779e-05, "loss": 0.6939, "step": 18250 }, { "epoch": 1.05, "learning_rate": 4.9996612525797716e-05, "loss": 0.6938, "step": 18500 }, { "epoch": 1.06, "learning_rate": 4.999427420049964e-05, "loss": 0.6948, "step": 18750 }, { "epoch": 1.08, "learning_rate": 4.999132576898172e-05, "loss": 0.6935, "step": 19000 }, { "epoch": 1.09, "learning_rate": 4.998776730321471e-05, "loss": 0.6945, "step": 19250 }, { "epoch": 1.1, "learning_rate": 4.9983598890060156e-05, "loss": 0.6949, "step": 19500 }, { "epoch": 1.12, "learning_rate": 4.997882063126838e-05, "loss": 0.6947, "step": 19750 }, { "epoch": 1.13, "learning_rate": 4.997343264347589e-05, "loss": 0.6935, "step": 20000 }, { "epoch": 1.15, "learning_rate": 4.996743505820262e-05, "loss": 0.6941, "step": 20250 }, { "epoch": 1.16, "learning_rate": 4.996082802184866e-05, "loss": 0.6947, "step": 20500 }, { "epoch": 1.17, "learning_rate": 4.9953641774472115e-05, "loss": 0.6937, "step": 20750 }, { "epoch": 1.19, "learning_rate": 4.9945818770743544e-05, "loss": 0.6958, "step": 21000 }, { "epoch": 1.2, "learning_rate": 4.99373868435844e-05, "loss": 0.6946, "step": 21250 }, { "epoch": 1.22, "learning_rate": 4.992834619881665e-05, "loss": 0.6937, "step": 21500 }, { "epoch": 1.23, "learning_rate": 4.991869705712099e-05, "loss": 0.6952, "step": 21750 }, { "epoch": 1.25, "learning_rate": 4.990843965403141e-05, "loss": 0.6938, "step": 22000 }, { "epoch": 1.26, "learning_rate": 4.989757423992949e-05, "loss": 0.6927, "step": 22250 }, { "epoch": 1.27, "learning_rate": 4.9886101080038236e-05, "loss": 0.6954, "step": 22500 }, { "epoch": 1.29, "learning_rate": 4.987402045441564e-05, "loss": 0.6933, "step": 22750 }, { "epoch": 1.3, "learning_rate": 4.986138461821248e-05, "loss": 0.6934, "step": 23000 }, { "epoch": 1.32, "learning_rate": 4.9848092387414506e-05, "loss": 0.6949, "step": 23250 }, { "epoch": 1.33, "learning_rate": 4.983419361867128e-05, "loss": 0.6947, "step": 23500 }, { "epoch": 1.34, "learning_rate": 4.9819688651249495e-05, "loss": 0.6951, "step": 23750 }, { "epoch": 1.36, "learning_rate": 4.9804577839213096e-05, "loss": 0.694, "step": 24000 }, { "epoch": 1.37, "learning_rate": 4.978886155141456e-05, "loss": 0.6941, "step": 24250 }, { "epoch": 1.39, "learning_rate": 4.977254017148597e-05, "loss": 0.6942, "step": 24500 }, { "epoch": 1.4, "learning_rate": 4.9755614097829575e-05, "loss": 0.6931, "step": 24750 }, { "epoch": 1.42, "learning_rate": 4.9738083743608114e-05, "loss": 0.6939, "step": 25000 }, { "epoch": 1.43, "learning_rate": 4.9719949536734725e-05, "loss": 0.6946, "step": 25250 }, { "epoch": 1.44, "learning_rate": 4.97012880717214e-05, "loss": 0.695, "step": 25500 }, { "epoch": 1.46, "learning_rate": 4.968194991311238e-05, "loss": 0.6935, "step": 25750 }, { "epoch": 1.47, "learning_rate": 4.9662009272069223e-05, "loss": 0.6946, "step": 26000 }, { "epoch": 1.49, "learning_rate": 4.964146663533976e-05, "loss": 0.6943, "step": 26250 }, { "epoch": 1.5, "learning_rate": 4.962032250436647e-05, "loss": 0.6942, "step": 26500 }, { "epoch": 1.51, "learning_rate": 4.959857739527419e-05, "loss": 0.6938, "step": 26750 }, { "epoch": 1.53, "learning_rate": 4.957623183885755e-05, "loss": 0.6942, "step": 27000 }, { "epoch": 1.54, "learning_rate": 4.9553286380567973e-05, "loss": 0.6947, "step": 27250 }, { "epoch": 1.56, "learning_rate": 4.952983695283269e-05, "loss": 0.6946, "step": 27500 }, { "epoch": 1.57, "learning_rate": 4.9505695779615944e-05, "loss": 0.6945, "step": 27750 }, { "epoch": 1.59, "learning_rate": 4.9480956426300165e-05, "loss": 0.6937, "step": 28000 }, { "epoch": 1.6, "learning_rate": 4.945561949676898e-05, "loss": 0.6935, "step": 28250 }, { "epoch": 1.61, "learning_rate": 4.9429685609492773e-05, "loss": 0.6943, "step": 28500 }, { "epoch": 1.63, "learning_rate": 4.940315539751357e-05, "loss": 0.6942, "step": 28750 }, { "epoch": 1.64, "learning_rate": 4.9376029508429584e-05, "loss": 0.6938, "step": 29000 }, { "epoch": 1.66, "learning_rate": 4.934830860437941e-05, "loss": 0.6941, "step": 29250 }, { "epoch": 1.67, "learning_rate": 4.932010780600598e-05, "loss": 0.6943, "step": 29500 }, { "epoch": 1.68, "learning_rate": 4.92912012897121e-05, "loss": 0.694, "step": 29750 }, { "epoch": 1.7, "learning_rate": 4.926170182909523e-05, "loss": 0.6943, "step": 30000 }, { "epoch": 1.71, "learning_rate": 4.923161014423248e-05, "loss": 0.6941, "step": 30250 }, { "epoch": 1.73, "learning_rate": 4.920092696965702e-05, "loss": 0.6947, "step": 30500 }, { "epoch": 1.74, "learning_rate": 4.916965305434024e-05, "loss": 0.6942, "step": 30750 }, { "epoch": 1.76, "learning_rate": 4.913778916167339e-05, "loss": 0.6941, "step": 31000 }, { "epoch": 1.77, "learning_rate": 4.910533606944895e-05, "loss": 0.6942, "step": 31250 }, { "epoch": 1.78, "learning_rate": 4.907242790688348e-05, "loss": 0.6942, "step": 31500 }, { "epoch": 1.8, "learning_rate": 4.903880115520887e-05, "loss": 0.6939, "step": 31750 }, { "epoch": 1.81, "learning_rate": 4.900458762025803e-05, "loss": 0.6933, "step": 32000 }, { "epoch": 1.83, "learning_rate": 4.8969788137177854e-05, "loss": 0.6943, "step": 32250 }, { "epoch": 1.84, "learning_rate": 4.893440355541813e-05, "loss": 0.6938, "step": 32500 }, { "epoch": 1.85, "learning_rate": 4.889843473871082e-05, "loss": 0.6947, "step": 32750 }, { "epoch": 1.87, "learning_rate": 4.8861882565048975e-05, "loss": 0.6939, "step": 33000 }, { "epoch": 1.88, "learning_rate": 4.882474792666527e-05, "loss": 0.694, "step": 33250 }, { "epoch": 1.9, "learning_rate": 4.878703173001027e-05, "loss": 0.693, "step": 33500 }, { "epoch": 1.91, "learning_rate": 4.87488892384633e-05, "loss": 0.6939, "step": 33750 }, { "epoch": 1.92, "learning_rate": 4.871001501830819e-05, "loss": 0.6947, "step": 34000 }, { "epoch": 1.94, "learning_rate": 4.867056204049366e-05, "loss": 0.6938, "step": 34250 }, { "epoch": 1.95, "learning_rate": 4.863053126806055e-05, "loss": 0.6932, "step": 34500 }, { "epoch": 1.97, "learning_rate": 4.85899236781536e-05, "loss": 0.6932, "step": 34750 }, { "epoch": 1.98, "learning_rate": 4.854874026199756e-05, "loss": 0.694, "step": 35000 }, { "epoch": 2.0, "learning_rate": 4.8506982024873006e-05, "loss": 0.6941, "step": 35250 }, { "epoch": 2.0, "eval_accuracy": 0.4997452229299363, "eval_f1": 0.3332200798437102, "eval_loss": 0.6948533058166504, "eval_precision": 0.24987261146496814, "eval_recall": 0.5, "eval_runtime": 12.6064, "eval_samples_per_second": 622.697, "eval_steps_per_second": 77.897, "step": 35326 }, { "epoch": 2.01, "learning_rate": 4.846464998609178e-05, "loss": 0.6946, "step": 35500 }, { "epoch": 2.02, "learning_rate": 4.842174517897218e-05, "loss": 0.6937, "step": 35750 }, { "epoch": 2.04, "learning_rate": 4.8378443694392686e-05, "loss": 0.6938, "step": 36000 }, { "epoch": 2.05, "learning_rate": 4.833439878695692e-05, "loss": 0.6936, "step": 36250 }, { "epoch": 2.07, "learning_rate": 4.8289784290593636e-05, "loss": 0.6934, "step": 36500 }, { "epoch": 2.08, "learning_rate": 4.824460129433551e-05, "loss": 0.6939, "step": 36750 }, { "epoch": 2.09, "learning_rate": 4.819885090109222e-05, "loss": 0.6936, "step": 37000 }, { "epoch": 2.11, "learning_rate": 4.815253422762353e-05, "loss": 0.6931, "step": 37250 }, { "epoch": 2.12, "learning_rate": 4.8105652404512013e-05, "loss": 0.6941, "step": 37500 }, { "epoch": 2.14, "learning_rate": 4.8058206576135415e-05, "loss": 0.6935, "step": 37750 }, { "epoch": 2.15, "learning_rate": 4.801039105498368e-05, "loss": 0.694, "step": 38000 }, { "epoch": 2.17, "learning_rate": 4.796182294859949e-05, "loss": 0.6936, "step": 38250 }, { "epoch": 2.18, "learning_rate": 4.791269434780415e-05, "loss": 0.6933, "step": 38500 }, { "epoch": 2.19, "learning_rate": 4.7863006451818936e-05, "loss": 0.6936, "step": 38750 }, { "epoch": 2.21, "learning_rate": 4.781276047351739e-05, "loss": 0.6932, "step": 39000 }, { "epoch": 2.22, "learning_rate": 4.7761957639395794e-05, "loss": 0.6943, "step": 39250 }, { "epoch": 2.24, "learning_rate": 4.771059918954316e-05, "loss": 0.6933, "step": 39500 }, { "epoch": 2.25, "learning_rate": 4.7658686377610994e-05, "loss": 0.6943, "step": 39750 }, { "epoch": 2.26, "learning_rate": 4.7606220470782714e-05, "loss": 0.6942, "step": 40000 }, { "epoch": 2.28, "learning_rate": 4.755341591813002e-05, "loss": 0.6933, "step": 40250 }, { "epoch": 2.29, "learning_rate": 4.749984987651673e-05, "loss": 0.6943, "step": 40500 }, { "epoch": 2.31, "learning_rate": 4.744573461718082e-05, "loss": 0.6934, "step": 40750 }, { "epoch": 2.32, "learning_rate": 4.739107146106705e-05, "loss": 0.6933, "step": 41000 }, { "epoch": 2.34, "learning_rate": 4.733586174249431e-05, "loss": 0.6938, "step": 41250 }, { "epoch": 2.35, "learning_rate": 4.728010680912296e-05, "loss": 0.6937, "step": 41500 }, { "epoch": 2.36, "learning_rate": 4.722380802192197e-05, "loss": 0.6935, "step": 41750 }, { "epoch": 2.38, "learning_rate": 4.716696675513569e-05, "loss": 0.694, "step": 42000 }, { "epoch": 2.39, "learning_rate": 4.710958439625032e-05, "loss": 0.6945, "step": 42250 }, { "epoch": 2.41, "learning_rate": 4.7051895107356976e-05, "loss": 0.694, "step": 42500 }, { "epoch": 2.42, "learning_rate": 4.6993436929806e-05, "loss": 0.694, "step": 42750 }, { "epoch": 2.43, "learning_rate": 4.69344418959911e-05, "loss": 0.6934, "step": 43000 }, { "epoch": 2.45, "learning_rate": 4.687491144597158e-05, "loss": 0.6942, "step": 43250 }, { "epoch": 2.46, "learning_rate": 4.6814847032876174e-05, "loss": 0.6937, "step": 43500 }, { "epoch": 2.48, "learning_rate": 4.675425012286756e-05, "loss": 0.6932, "step": 43750 }, { "epoch": 2.49, "learning_rate": 4.6693122195106574e-05, "loss": 0.6942, "step": 44000 }, { "epoch": 2.51, "learning_rate": 4.663146474171613e-05, "loss": 0.6933, "step": 44250 }, { "epoch": 2.52, "learning_rate": 4.656952905945011e-05, "loss": 0.694, "step": 44500 }, { "epoch": 2.53, "learning_rate": 4.650681918580503e-05, "loss": 0.6939, "step": 44750 }, { "epoch": 2.55, "learning_rate": 4.644358433415702e-05, "loss": 0.6935, "step": 45000 }, { "epoch": 2.56, "learning_rate": 4.6379826048058654e-05, "loss": 0.6942, "step": 45250 }, { "epoch": 2.58, "learning_rate": 4.6315545883839474e-05, "loss": 0.6939, "step": 45500 }, { "epoch": 2.59, "learning_rate": 4.625074541056795e-05, "loss": 0.693, "step": 45750 }, { "epoch": 2.6, "learning_rate": 4.618542621001324e-05, "loss": 0.6941, "step": 46000 }, { "epoch": 2.62, "learning_rate": 4.611958987660653e-05, "loss": 0.6938, "step": 46250 }, { "epoch": 2.63, "learning_rate": 4.6053238017402155e-05, "loss": 0.6932, "step": 46500 }, { "epoch": 2.65, "learning_rate": 4.598664073664183e-05, "loss": 0.6938, "step": 46750 }, { "epoch": 2.66, "learning_rate": 4.591926474312919e-05, "loss": 0.694, "step": 47000 }, { "epoch": 2.68, "learning_rate": 4.585137811372323e-05, "loss": 0.6938, "step": 47250 }, { "epoch": 2.69, "learning_rate": 4.5782982505525664e-05, "loss": 0.6936, "step": 47500 }, { "epoch": 2.7, "learning_rate": 4.5714079588062306e-05, "loss": 0.6938, "step": 47750 }, { "epoch": 2.72, "learning_rate": 4.5644671043242314e-05, "loss": 0.6938, "step": 48000 }, { "epoch": 2.73, "learning_rate": 4.557475856531712e-05, "loss": 0.6943, "step": 48250 }, { "epoch": 2.75, "learning_rate": 4.550434386083909e-05, "loss": 0.693, "step": 48500 }, { "epoch": 2.76, "learning_rate": 4.543371330418998e-05, "loss": 0.6936, "step": 48750 }, { "epoch": 2.77, "learning_rate": 4.5362301306900956e-05, "loss": 0.6924, "step": 49000 }, { "epoch": 2.79, "learning_rate": 4.5290392269106644e-05, "loss": 0.6936, "step": 49250 }, { "epoch": 2.8, "learning_rate": 4.521798794609512e-05, "loss": 0.6941, "step": 49500 }, { "epoch": 2.82, "learning_rate": 4.514509010524428e-05, "loss": 0.693, "step": 49750 }, { "epoch": 2.83, "learning_rate": 4.507170052597872e-05, "loss": 0.6943, "step": 50000 }, { "epoch": 2.84, "learning_rate": 4.499782099972628e-05, "loss": 0.6936, "step": 50250 }, { "epoch": 2.86, "learning_rate": 4.4923453329874334e-05, "loss": 0.6938, "step": 50500 }, { "epoch": 2.87, "learning_rate": 4.484859933172575e-05, "loss": 0.694, "step": 50750 }, { "epoch": 2.89, "learning_rate": 4.477356314914456e-05, "loss": 0.6938, "step": 51000 }, { "epoch": 2.9, "learning_rate": 4.469774391472113e-05, "loss": 0.6933, "step": 51250 }, { "epoch": 2.92, "learning_rate": 4.4621443861531634e-05, "loss": 0.6936, "step": 51500 }, { "epoch": 2.93, "learning_rate": 4.4544664852048143e-05, "loss": 0.6932, "step": 51750 }, { "epoch": 2.94, "learning_rate": 4.446740876043395e-05, "loss": 0.693, "step": 52000 }, { "epoch": 2.96, "learning_rate": 4.438967747249785e-05, "loss": 0.6944, "step": 52250 }, { "epoch": 2.97, "learning_rate": 4.4311472885648076e-05, "loss": 0.694, "step": 52500 }, { "epoch": 2.99, "learning_rate": 4.423279690884606e-05, "loss": 0.6931, "step": 52750 }, { "epoch": 3.0, "eval_accuracy": 0.4997452229299363, "eval_f1": 0.3332200798437102, "eval_loss": 0.6931188702583313, "eval_precision": 0.24987261146496814, "eval_recall": 0.5, "eval_runtime": 12.4472, "eval_samples_per_second": 630.665, "eval_steps_per_second": 78.893, "step": 52989 }, { "epoch": 3.0, "learning_rate": 4.4153651462559756e-05, "loss": 0.6939, "step": 53000 }, { "epoch": 3.01, "learning_rate": 4.407403847871679e-05, "loss": 0.6935, "step": 53250 }, { "epoch": 3.03, "learning_rate": 4.399395990065732e-05, "loss": 0.693, "step": 53500 }, { "epoch": 3.04, "learning_rate": 4.3913740772924885e-05, "loss": 0.6932, "step": 53750 }, { "epoch": 3.06, "learning_rate": 4.383273872462815e-05, "loss": 0.6931, "step": 54000 }, { "epoch": 3.07, "learning_rate": 4.3751276972203183e-05, "loss": 0.6941, "step": 54250 }, { "epoch": 3.09, "learning_rate": 4.366935750411825e-05, "loss": 0.6938, "step": 54500 }, { "epoch": 3.1, "learning_rate": 4.3586982320014426e-05, "loss": 0.6942, "step": 54750 }, { "epoch": 3.11, "learning_rate": 4.3504153430656725e-05, "loss": 0.694, "step": 55000 }, { "epoch": 3.13, "learning_rate": 4.3420872857885045e-05, "loss": 0.693, "step": 55250 }, { "epoch": 3.14, "learning_rate": 4.333714263456478e-05, "loss": 0.6935, "step": 55500 }, { "epoch": 3.16, "learning_rate": 4.325330240477026e-05, "loss": 0.693, "step": 55750 }, { "epoch": 3.17, "learning_rate": 4.3168680800902995e-05, "loss": 0.6936, "step": 56000 }, { "epoch": 3.18, "learning_rate": 4.308361570245477e-05, "loss": 0.6926, "step": 56250 }, { "epoch": 3.2, "learning_rate": 4.299810918585097e-05, "loss": 0.6945, "step": 56500 }, { "epoch": 3.21, "learning_rate": 4.2912163338291946e-05, "loss": 0.6929, "step": 56750 }, { "epoch": 3.23, "learning_rate": 4.2825780257702033e-05, "loss": 0.6947, "step": 57000 }, { "epoch": 3.24, "learning_rate": 4.2738962052678356e-05, "loss": 0.6936, "step": 57250 }, { "epoch": 3.26, "learning_rate": 4.265171084243936e-05, "loss": 0.6934, "step": 57500 }, { "epoch": 3.27, "learning_rate": 4.256402875677308e-05, "loss": 0.694, "step": 57750 }, { "epoch": 3.28, "learning_rate": 4.247591793598513e-05, "loss": 0.6935, "step": 58000 }, { "epoch": 3.3, "learning_rate": 4.238773552736224e-05, "loss": 0.6933, "step": 58250 }, { "epoch": 3.31, "learning_rate": 4.2298775392430656e-05, "loss": 0.6934, "step": 58500 }, { "epoch": 3.33, "learning_rate": 4.220939299716943e-05, "loss": 0.6939, "step": 58750 }, { "epoch": 3.34, "learning_rate": 4.211959052338851e-05, "loss": 0.6933, "step": 59000 }, { "epoch": 3.35, "learning_rate": 4.202937016315183e-05, "loss": 0.6934, "step": 59250 }, { "epoch": 3.37, "learning_rate": 4.1938734118723924e-05, "loss": 0.6935, "step": 59500 }, { "epoch": 3.38, "learning_rate": 4.184768460251607e-05, "loss": 0.6932, "step": 59750 }, { "epoch": 3.4, "learning_rate": 4.1756223837032336e-05, "loss": 0.6939, "step": 60000 }, { "epoch": 3.41, "learning_rate": 4.166435405481533e-05, "loss": 0.6935, "step": 60250 }, { "epoch": 3.43, "learning_rate": 4.1572077498391684e-05, "loss": 0.6938, "step": 60500 }, { "epoch": 3.44, "learning_rate": 4.147976794734224e-05, "loss": 0.6936, "step": 60750 }, { "epoch": 3.45, "learning_rate": 4.138668621426548e-05, "loss": 0.6937, "step": 61000 }, { "epoch": 3.47, "learning_rate": 4.129320448480947e-05, "loss": 0.6935, "step": 61250 }, { "epoch": 3.48, "learning_rate": 4.1199325040848224e-05, "loss": 0.6932, "step": 61500 }, { "epoch": 3.5, "learning_rate": 4.11050501739639e-05, "loss": 0.6934, "step": 61750 }, { "epoch": 3.51, "learning_rate": 4.101038218539085e-05, "loss": 0.6934, "step": 62000 }, { "epoch": 3.52, "learning_rate": 4.091532338595949e-05, "loss": 0.6933, "step": 62250 }, { "epoch": 3.54, "learning_rate": 4.081987609603982e-05, "loss": 0.6933, "step": 62500 }, { "epoch": 3.55, "learning_rate": 4.072404264548489e-05, "loss": 0.694, "step": 62750 }, { "epoch": 3.57, "learning_rate": 4.0628211004123955e-05, "loss": 0.6937, "step": 63000 }, { "epoch": 3.58, "learning_rate": 4.053161378070489e-05, "loss": 0.6933, "step": 63250 }, { "epoch": 3.6, "learning_rate": 4.043463743308738e-05, "loss": 0.6936, "step": 63500 }, { "epoch": 3.61, "learning_rate": 4.03372843284485e-05, "loss": 0.6938, "step": 63750 }, { "epoch": 3.62, "learning_rate": 4.023955684316192e-05, "loss": 0.6936, "step": 64000 }, { "epoch": 3.64, "learning_rate": 4.014145736273984e-05, "loss": 0.6935, "step": 64250 }, { "epoch": 3.65, "learning_rate": 4.004298828177483e-05, "loss": 0.6931, "step": 64500 }, { "epoch": 3.67, "learning_rate": 3.9944152003881354e-05, "loss": 0.6921, "step": 64750 }, { "epoch": 3.68, "learning_rate": 3.984534846933014e-05, "loss": 0.6939, "step": 65000 }, { "epoch": 3.69, "learning_rate": 3.974578648883321e-05, "loss": 0.6935, "step": 65250 }, { "epoch": 3.71, "learning_rate": 3.9645864566055856e-05, "loss": 0.6934, "step": 65500 }, { "epoch": 3.72, "learning_rate": 3.954558514007616e-05, "loss": 0.6936, "step": 65750 }, { "epoch": 3.74, "learning_rate": 3.944495065869881e-05, "loss": 0.6933, "step": 66000 }, { "epoch": 3.75, "learning_rate": 3.934396357839535e-05, "loss": 0.6937, "step": 66250 }, { "epoch": 3.76, "learning_rate": 3.9242626364244185e-05, "loss": 0.6936, "step": 66500 }, { "epoch": 3.78, "learning_rate": 3.914094148987043e-05, "loss": 0.6931, "step": 66750 }, { "epoch": 3.79, "learning_rate": 3.9039320241892266e-05, "loss": 0.6938, "step": 67000 }, { "epoch": 3.81, "learning_rate": 3.89369488676115e-05, "loss": 0.6936, "step": 67250 }, { "epoch": 3.82, "learning_rate": 3.883423729464678e-05, "loss": 0.6926, "step": 67500 }, { "epoch": 3.84, "learning_rate": 3.8731188030171076e-05, "loss": 0.6938, "step": 67750 }, { "epoch": 3.85, "learning_rate": 3.862780358960041e-05, "loss": 0.6929, "step": 68000 }, { "epoch": 3.86, "learning_rate": 3.8524086496532375e-05, "loss": 0.6937, "step": 68250 }, { "epoch": 3.88, "learning_rate": 3.842003928268455e-05, "loss": 0.6935, "step": 68500 }, { "epoch": 3.89, "learning_rate": 3.831566448783271e-05, "loss": 0.6928, "step": 68750 }, { "epoch": 3.91, "learning_rate": 3.8210964659748866e-05, "loss": 0.694, "step": 69000 }, { "epoch": 3.92, "learning_rate": 3.810636308234164e-05, "loss": 0.6934, "step": 69250 }, { "epoch": 3.93, "learning_rate": 3.800102213732213e-05, "loss": 0.6937, "step": 69500 }, { "epoch": 3.95, "learning_rate": 3.789536383943985e-05, "loss": 0.6936, "step": 69750 }, { "epoch": 3.96, "learning_rate": 3.778939076779688e-05, "loss": 0.6934, "step": 70000 }, { "epoch": 3.98, "learning_rate": 3.768310550917889e-05, "loss": 0.6936, "step": 70250 }, { "epoch": 3.99, "learning_rate": 3.757651065799198e-05, "loss": 0.6936, "step": 70500 }, { "epoch": 4.0, "eval_accuracy": 0.5002547770700637, "eval_f1": 0.33344654835696697, "eval_loss": 0.6933583617210388, "eval_precision": 0.25012738853503186, "eval_recall": 0.5, "eval_runtime": 12.4824, "eval_samples_per_second": 628.887, "eval_steps_per_second": 78.671, "step": 70652 }, { "epoch": 4.01, "learning_rate": 3.7469608816199355e-05, "loss": 0.6932, "step": 70750 }, { "epoch": 4.02, "learning_rate": 3.7362402593257796e-05, "loss": 0.6939, "step": 71000 }, { "epoch": 4.03, "learning_rate": 3.725489460605399e-05, "loss": 0.6935, "step": 71250 }, { "epoch": 4.05, "learning_rate": 3.7147087478840654e-05, "loss": 0.693, "step": 71500 }, { "epoch": 4.06, "learning_rate": 3.7039416844865045e-05, "loss": 0.694, "step": 71750 }, { "epoch": 4.08, "learning_rate": 3.693102050974699e-05, "loss": 0.6933, "step": 72000 }, { "epoch": 4.09, "learning_rate": 3.682233294033409e-05, "loss": 0.6933, "step": 72250 }, { "epoch": 4.1, "learning_rate": 3.671335678967246e-05, "loss": 0.6933, "step": 72500 }, { "epoch": 4.12, "learning_rate": 3.6604094717852435e-05, "loss": 0.6935, "step": 72750 }, { "epoch": 4.13, "learning_rate": 3.649454939194364e-05, "loss": 0.6932, "step": 73000 }, { "epoch": 4.15, "learning_rate": 3.638472348592989e-05, "loss": 0.6934, "step": 73250 }, { "epoch": 4.16, "learning_rate": 3.627461968064393e-05, "loss": 0.6935, "step": 73500 }, { "epoch": 4.18, "learning_rate": 3.616424066370194e-05, "loss": 0.6929, "step": 73750 }, { "epoch": 4.19, "learning_rate": 3.605358912943803e-05, "loss": 0.6929, "step": 74000 }, { "epoch": 4.2, "learning_rate": 3.594311199812862e-05, "loss": 0.6937, "step": 74250 }, { "epoch": 4.22, "learning_rate": 3.5831924601798475e-05, "loss": 0.6934, "step": 74500 }, { "epoch": 4.23, "learning_rate": 3.5720472799928105e-05, "loss": 0.6937, "step": 74750 }, { "epoch": 4.25, "learning_rate": 3.560875931303811e-05, "loss": 0.6934, "step": 75000 }, { "epoch": 4.26, "learning_rate": 3.549678686803674e-05, "loss": 0.6936, "step": 75250 }, { "epoch": 4.27, "learning_rate": 3.5384558198153416e-05, "loss": 0.694, "step": 75500 }, { "epoch": 4.29, "learning_rate": 3.527207604287196e-05, "loss": 0.6934, "step": 75750 }, { "epoch": 4.3, "learning_rate": 3.51593431478637e-05, "loss": 0.693, "step": 76000 }, { "epoch": 4.32, "learning_rate": 3.504681467879116e-05, "loss": 0.6931, "step": 76250 }, { "epoch": 4.33, "learning_rate": 3.4933589541177024e-05, "loss": 0.6936, "step": 76500 }, { "epoch": 4.35, "learning_rate": 3.4820121926237256e-05, "loss": 0.6934, "step": 76750 }, { "epoch": 4.36, "learning_rate": 3.47064146036981e-05, "loss": 0.6937, "step": 77000 }, { "epoch": 4.37, "learning_rate": 3.4592470349137056e-05, "loss": 0.6934, "step": 77250 }, { "epoch": 4.39, "learning_rate": 3.447829194391507e-05, "loss": 0.6932, "step": 77500 }, { "epoch": 4.4, "learning_rate": 3.436388217510869e-05, "loss": 0.6937, "step": 77750 }, { "epoch": 4.42, "learning_rate": 3.4249243835441994e-05, "loss": 0.6928, "step": 78000 }, { "epoch": 4.43, "learning_rate": 3.413437972321844e-05, "loss": 0.692, "step": 78250 }, { "epoch": 4.44, "learning_rate": 3.401975343100874e-05, "loss": 0.694, "step": 78500 }, { "epoch": 4.46, "learning_rate": 3.390444706559248e-05, "loss": 0.693, "step": 78750 }, { "epoch": 4.47, "learning_rate": 3.3788923344053156e-05, "loss": 0.6936, "step": 79000 }, { "epoch": 4.49, "learning_rate": 3.367318508630627e-05, "loss": 0.6932, "step": 79250 }, { "epoch": 4.5, "learning_rate": 3.355723511750409e-05, "loss": 0.6933, "step": 79500 }, { "epoch": 4.52, "learning_rate": 3.3441076267966755e-05, "loss": 0.6935, "step": 79750 }, { "epoch": 4.53, "learning_rate": 3.3324711373113114e-05, "loss": 0.6932, "step": 80000 }, { "epoch": 4.54, "learning_rate": 3.320814327339158e-05, "loss": 0.693, "step": 80250 }, { "epoch": 4.56, "learning_rate": 3.30918422833875e-05, "loss": 0.6941, "step": 80500 }, { "epoch": 4.57, "learning_rate": 3.2974877099399125e-05, "loss": 0.6934, "step": 80750 }, { "epoch": 4.59, "learning_rate": 3.28577172499413e-05, "loss": 0.6932, "step": 81000 }, { "epoch": 4.6, "learning_rate": 3.274036559486714e-05, "loss": 0.6935, "step": 81250 }, { "epoch": 4.61, "learning_rate": 3.26228249987117e-05, "loss": 0.6931, "step": 81500 }, { "epoch": 4.63, "learning_rate": 3.250509833062205e-05, "loss": 0.6929, "step": 81750 }, { "epoch": 4.64, "learning_rate": 3.238718846428726e-05, "loss": 0.6932, "step": 82000 }, { "epoch": 4.66, "learning_rate": 3.226909827786824e-05, "loss": 0.6934, "step": 82250 }, { "epoch": 4.67, "learning_rate": 3.2150830653927466e-05, "loss": 0.6928, "step": 82500 }, { "epoch": 4.68, "learning_rate": 3.203238847935866e-05, "loss": 0.6936, "step": 82750 }, { "epoch": 4.7, "learning_rate": 3.191424943876217e-05, "loss": 0.6935, "step": 83000 }, { "epoch": 4.71, "learning_rate": 3.179546750987436e-05, "loss": 0.6934, "step": 83250 }, { "epoch": 4.73, "learning_rate": 3.1676519704715806e-05, "loss": 0.6934, "step": 83500 }, { "epoch": 4.74, "learning_rate": 3.155740892678332e-05, "loss": 0.6934, "step": 83750 }, { "epoch": 4.76, "learning_rate": 3.143813808355187e-05, "loss": 0.6932, "step": 84000 }, { "epoch": 4.77, "learning_rate": 3.1318710086403584e-05, "loss": 0.6935, "step": 84250 }, { "epoch": 4.78, "learning_rate": 3.1199127850556704e-05, "loss": 0.6935, "step": 84500 }, { "epoch": 4.8, "learning_rate": 3.10793942949944e-05, "loss": 0.6936, "step": 84750 }, { "epoch": 4.81, "learning_rate": 3.095951234239353e-05, "loss": 0.6932, "step": 85000 }, { "epoch": 4.83, "learning_rate": 3.083948491905332e-05, "loss": 0.6933, "step": 85250 }, { "epoch": 4.84, "learning_rate": 3.0719314954823894e-05, "loss": 0.693, "step": 85500 }, { "epoch": 4.85, "learning_rate": 3.059900538303479e-05, "loss": 0.6931, "step": 85750 }, { "epoch": 4.87, "learning_rate": 3.047855914042333e-05, "loss": 0.6938, "step": 86000 }, { "epoch": 4.88, "learning_rate": 3.0357979167062957e-05, "loss": 0.6933, "step": 86250 }, { "epoch": 4.9, "learning_rate": 3.0237751505958946e-05, "loss": 0.6935, "step": 86500 }, { "epoch": 4.91, "learning_rate": 3.0116913409796476e-05, "loss": 0.6926, "step": 86750 }, { "epoch": 4.93, "learning_rate": 2.999595041059932e-05, "loss": 0.6938, "step": 87000 }, { "epoch": 4.94, "learning_rate": 2.9874865461054845e-05, "loss": 0.6934, "step": 87250 }, { "epoch": 4.95, "learning_rate": 2.9753661516827242e-05, "loss": 0.6927, "step": 87500 }, { "epoch": 4.97, "learning_rate": 2.9632341536485315e-05, "loss": 0.6938, "step": 87750 }, { "epoch": 4.98, "learning_rate": 2.9510908481430306e-05, "loss": 0.6931, "step": 88000 }, { "epoch": 5.0, "learning_rate": 2.938936531582359e-05, "loss": 0.6934, "step": 88250 }, { "epoch": 5.0, "eval_accuracy": 0.4997452229299363, "eval_f1": 0.3332200798437102, "eval_loss": 0.6931193470954895, "eval_precision": 0.24987261146496814, "eval_recall": 0.5, "eval_runtime": 12.6829, "eval_samples_per_second": 618.944, "eval_steps_per_second": 77.427, "step": 88315 }, { "epoch": 5.01, "learning_rate": 2.926820181724712e-05, "loss": 0.6931, "step": 88500 }, { "epoch": 5.02, "learning_rate": 2.914644774447821e-05, "loss": 0.6929, "step": 88750 }, { "epoch": 5.04, "learning_rate": 2.9024592457585668e-05, "loss": 0.6936, "step": 89000 }, { "epoch": 5.05, "learning_rate": 2.8902638931037468e-05, "loss": 0.6931, "step": 89250 }, { "epoch": 5.07, "learning_rate": 2.878059014169962e-05, "loss": 0.6933, "step": 89500 }, { "epoch": 5.08, "learning_rate": 2.8658449068763467e-05, "loss": 0.6934, "step": 89750 }, { "epoch": 5.1, "learning_rate": 2.8536218693672996e-05, "loss": 0.6934, "step": 90000 }, { "epoch": 5.11, "learning_rate": 2.841390200005203e-05, "loss": 0.6932, "step": 90250 }, { "epoch": 5.12, "learning_rate": 2.8291501973631418e-05, "loss": 0.6935, "step": 90500 }, { "epoch": 5.14, "learning_rate": 2.8169511679747517e-05, "loss": 0.6933, "step": 90750 }, { "epoch": 5.15, "learning_rate": 2.8046954256446972e-05, "loss": 0.6932, "step": 91000 }, { "epoch": 5.17, "learning_rate": 2.7924322457482288e-05, "loss": 0.6935, "step": 91250 }, { "epoch": 5.18, "learning_rate": 2.780161927627598e-05, "loss": 0.6932, "step": 91500 }, { "epoch": 5.19, "learning_rate": 2.7678847707993006e-05, "loss": 0.6935, "step": 91750 }, { "epoch": 5.21, "learning_rate": 2.7556010749467626e-05, "loss": 0.693, "step": 92000 }, { "epoch": 5.22, "learning_rate": 2.7433111399130273e-05, "loss": 0.6933, "step": 92250 }, { "epoch": 5.24, "learning_rate": 2.7310152656934363e-05, "loss": 0.6931, "step": 92500 }, { "epoch": 5.25, "learning_rate": 2.7187629693164157e-05, "loss": 0.6934, "step": 92750 }, { "epoch": 5.27, "learning_rate": 2.7064561380404474e-05, "loss": 0.6933, "step": 93000 }, { "epoch": 5.28, "learning_rate": 2.6941442672033014e-05, "loss": 0.6934, "step": 93250 }, { "epoch": 5.29, "learning_rate": 2.6818276573357664e-05, "loss": 0.6932, "step": 93500 }, { "epoch": 5.31, "learning_rate": 2.6695066090843123e-05, "loss": 0.6933, "step": 93750 }, { "epoch": 5.32, "learning_rate": 2.657181423203749e-05, "loss": 0.6934, "step": 94000 }, { "epoch": 5.34, "learning_rate": 2.6448524005498838e-05, "loss": 0.6933, "step": 94250 }, { "epoch": 5.35, "learning_rate": 2.6325198420721808e-05, "loss": 0.6933, "step": 94500 }, { "epoch": 5.36, "learning_rate": 2.6201840488064118e-05, "loss": 0.6934, "step": 94750 }, { "epoch": 5.38, "learning_rate": 2.6078453218673098e-05, "loss": 0.6932, "step": 95000 }, { "epoch": 5.39, "learning_rate": 2.595553332723595e-05, "loss": 0.6936, "step": 95250 }, { "epoch": 5.41, "learning_rate": 2.5832096507858917e-05, "loss": 0.6928, "step": 95500 }, { "epoch": 5.42, "learning_rate": 2.5708639377139736e-05, "loss": 0.6933, "step": 95750 }, { "epoch": 5.44, "learning_rate": 2.558516494864715e-05, "loss": 0.6932, "step": 96000 }, { "epoch": 5.45, "learning_rate": 2.546167623637212e-05, "loss": 0.6933, "step": 96250 }, { "epoch": 5.46, "learning_rate": 2.5338176254654305e-05, "loss": 0.6934, "step": 96500 }, { "epoch": 5.48, "learning_rate": 2.5214668018108413e-05, "loss": 0.6933, "step": 96750 }, { "epoch": 5.49, "learning_rate": 2.509115454155066e-05, "loss": 0.6934, "step": 97000 }, { "epoch": 5.51, "learning_rate": 2.4968132903168124e-05, "loss": 0.6929, "step": 97250 }, { "epoch": 5.52, "learning_rate": 2.4844617982307774e-05, "loss": 0.6936, "step": 97500 }, { "epoch": 5.53, "learning_rate": 2.4721106854297538e-05, "loss": 0.6932, "step": 97750 }, { "epoch": 5.55, "learning_rate": 2.4597602534024195e-05, "loss": 0.6935, "step": 98000 }, { "epoch": 5.56, "learning_rate": 2.447410803620838e-05, "loss": 0.6931, "step": 98250 }, { "epoch": 5.58, "learning_rate": 2.4350626375330963e-05, "loss": 0.6931, "step": 98500 }, { "epoch": 5.59, "learning_rate": 2.4227160565559448e-05, "loss": 0.6933, "step": 98750 }, { "epoch": 5.6, "learning_rate": 2.410371362067444e-05, "loss": 0.6933, "step": 99000 }, { "epoch": 5.62, "learning_rate": 2.398078220668819e-05, "loss": 0.6934, "step": 99250 }, { "epoch": 5.63, "learning_rate": 2.385738192543777e-05, "loss": 0.6934, "step": 99500 }, { "epoch": 5.65, "learning_rate": 2.3734009535311082e-05, "loss": 0.693, "step": 99750 }, { "epoch": 5.66, "learning_rate": 2.3610668047808355e-05, "loss": 0.6936, "step": 100000 }, { "epoch": 5.68, "learning_rate": 2.3487360473675493e-05, "loss": 0.693, "step": 100250 }, { "epoch": 5.69, "learning_rate": 2.3364089822830585e-05, "loss": 0.6934, "step": 100500 }, { "epoch": 5.7, "learning_rate": 2.3240859104290418e-05, "loss": 0.6932, "step": 100750 }, { "epoch": 5.72, "learning_rate": 2.311767132609704e-05, "loss": 0.6932, "step": 101000 }, { "epoch": 5.73, "learning_rate": 2.2994529495244343e-05, "loss": 0.6934, "step": 101250 }, { "epoch": 5.75, "learning_rate": 2.287192888761777e-05, "loss": 0.6934, "step": 101500 }, { "epoch": 5.76, "learning_rate": 2.2748887754053225e-05, "loss": 0.6932, "step": 101750 }, { "epoch": 5.77, "learning_rate": 2.2625901569777145e-05, "loss": 0.693, "step": 102000 }, { "epoch": 5.79, "learning_rate": 2.2502973336862524e-05, "loss": 0.6933, "step": 102250 }, { "epoch": 5.8, "learning_rate": 2.23801060559678e-05, "loss": 0.6935, "step": 102500 }, { "epoch": 5.82, "learning_rate": 2.2257302726263566e-05, "loss": 0.6931, "step": 102750 }, { "epoch": 5.83, "learning_rate": 2.213456634535938e-05, "loss": 0.6933, "step": 103000 }, { "epoch": 5.85, "learning_rate": 2.201189990923061e-05, "loss": 0.6936, "step": 103250 }, { "epoch": 5.86, "learning_rate": 2.1889796636872324e-05, "loss": 0.6934, "step": 103500 }, { "epoch": 5.87, "learning_rate": 2.176727876163321e-05, "loss": 0.6929, "step": 103750 }, { "epoch": 5.89, "learning_rate": 2.1644839796600443e-05, "loss": 0.6935, "step": 104000 }, { "epoch": 5.9, "learning_rate": 2.152248273048949e-05, "loss": 0.6932, "step": 104250 }, { "epoch": 5.92, "learning_rate": 2.1400210550016697e-05, "loss": 0.6931, "step": 104500 }, { "epoch": 5.93, "learning_rate": 2.127802623982636e-05, "loss": 0.6932, "step": 104750 }, { "epoch": 5.94, "learning_rate": 2.1155932782417855e-05, "loss": 0.6933, "step": 105000 }, { "epoch": 5.96, "learning_rate": 2.1033933158072878e-05, "loss": 0.6928, "step": 105250 }, { "epoch": 5.97, "learning_rate": 2.0912030344782672e-05, "loss": 0.6935, "step": 105500 }, { "epoch": 5.99, "learning_rate": 2.0790714327565363e-05, "loss": 0.6932, "step": 105750 }, { "epoch": 6.0, "eval_accuracy": 0.5002547770700637, "eval_f1": 0.33344654835696697, "eval_loss": 0.6931106448173523, "eval_precision": 0.25012738853503186, "eval_recall": 0.5, "eval_runtime": 12.652, "eval_samples_per_second": 620.456, "eval_steps_per_second": 77.616, "step": 105978 }, { "epoch": 6.0, "learning_rate": 2.066901364387442e-05, "loss": 0.6934, "step": 106000 }, { "epoch": 6.02, "learning_rate": 2.0547418678865e-05, "loss": 0.693, "step": 106250 }, { "epoch": 6.03, "learning_rate": 2.0425932400650658e-05, "loss": 0.6935, "step": 106500 }, { "epoch": 6.04, "learning_rate": 2.0304557774691947e-05, "loss": 0.6935, "step": 106750 }, { "epoch": 6.06, "learning_rate": 2.0183297763724e-05, "loss": 0.693, "step": 107000 }, { "epoch": 6.07, "learning_rate": 2.006215532768421e-05, "loss": 0.6932, "step": 107250 }, { "epoch": 6.09, "learning_rate": 1.9941133423640003e-05, "loss": 0.6933, "step": 107500 }, { "epoch": 6.1, "learning_rate": 1.9820235005716594e-05, "loss": 0.6924, "step": 107750 }, { "epoch": 6.11, "learning_rate": 1.9699945857176588e-05, "loss": 0.6935, "step": 108000 }, { "epoch": 6.13, "learning_rate": 1.9579302738332077e-05, "loss": 0.6935, "step": 108250 }, { "epoch": 6.14, "learning_rate": 1.9458791937837308e-05, "loss": 0.6932, "step": 108500 }, { "epoch": 6.16, "learning_rate": 1.9338416397341575e-05, "loss": 0.6932, "step": 108750 }, { "epoch": 6.17, "learning_rate": 1.92181790551925e-05, "loss": 0.6933, "step": 109000 }, { "epoch": 6.19, "learning_rate": 1.9098082846364272e-05, "loss": 0.6932, "step": 109250 }, { "epoch": 6.2, "learning_rate": 1.8978130702386082e-05, "loss": 0.6932, "step": 109500 }, { "epoch": 6.21, "learning_rate": 1.8858325551270503e-05, "loss": 0.6933, "step": 109750 }, { "epoch": 6.23, "learning_rate": 1.8739148635869862e-05, "loss": 0.6933, "step": 110000 }, { "epoch": 6.24, "learning_rate": 1.8619645622928097e-05, "loss": 0.6932, "step": 110250 }, { "epoch": 6.26, "learning_rate": 1.8500298353412176e-05, "loss": 0.6933, "step": 110500 }, { "epoch": 6.27, "learning_rate": 1.838110974056977e-05, "loss": 0.6933, "step": 110750 }, { "epoch": 6.28, "learning_rate": 1.826208269377578e-05, "loss": 0.6933, "step": 111000 }, { "epoch": 6.3, "learning_rate": 1.8143220118461316e-05, "loss": 0.6932, "step": 111250 }, { "epoch": 6.31, "learning_rate": 1.802452491604275e-05, "loss": 0.6931, "step": 111500 }, { "epoch": 6.33, "learning_rate": 1.790599998385092e-05, "loss": 0.6935, "step": 111750 }, { "epoch": 6.34, "learning_rate": 1.778764821506038e-05, "loss": 0.6934, "step": 112000 }, { "epoch": 6.36, "learning_rate": 1.7669944846963703e-05, "loss": 0.6933, "step": 112250 }, { "epoch": 6.37, "learning_rate": 1.75519473460329e-05, "loss": 0.6933, "step": 112500 }, { "epoch": 6.38, "learning_rate": 1.7434131650871434e-05, "loss": 0.6933, "step": 112750 }, { "epoch": 6.4, "learning_rate": 1.7316500637341497e-05, "loss": 0.6934, "step": 113000 }, { "epoch": 6.41, "learning_rate": 1.7199057176797255e-05, "loss": 0.693, "step": 113250 }, { "epoch": 6.43, "learning_rate": 1.7081804136014705e-05, "loss": 0.6934, "step": 113500 }, { "epoch": 6.44, "learning_rate": 1.696474437712175e-05, "loss": 0.6932, "step": 113750 }, { "epoch": 6.45, "learning_rate": 1.684788075752831e-05, "loss": 0.6933, "step": 114000 }, { "epoch": 6.47, "learning_rate": 1.6731216129856575e-05, "loss": 0.6928, "step": 114250 }, { "epoch": 6.48, "learning_rate": 1.6614753341871385e-05, "loss": 0.6933, "step": 114500 }, { "epoch": 6.5, "learning_rate": 1.6498959857342135e-05, "loss": 0.6934, "step": 114750 }, { "epoch": 6.51, "learning_rate": 1.6382908436519833e-05, "loss": 0.6933, "step": 115000 }, { "epoch": 6.52, "learning_rate": 1.6267067357518934e-05, "loss": 0.6932, "step": 115250 }, { "epoch": 6.54, "learning_rate": 1.615143944800157e-05, "loss": 0.6932, "step": 115500 }, { "epoch": 6.55, "learning_rate": 1.6036027530426446e-05, "loss": 0.6932, "step": 115750 }, { "epoch": 6.57, "learning_rate": 1.592083442197995e-05, "loss": 0.693, "step": 116000 }, { "epoch": 6.58, "learning_rate": 1.5805862934507337e-05, "loss": 0.6932, "step": 116250 }, { "epoch": 6.6, "learning_rate": 1.5691115874444174e-05, "loss": 0.6931, "step": 116500 }, { "epoch": 6.61, "learning_rate": 1.5576596042747766e-05, "loss": 0.6928, "step": 116750 }, { "epoch": 6.62, "learning_rate": 1.546230623482881e-05, "loss": 0.6937, "step": 117000 }, { "epoch": 6.64, "learning_rate": 1.534870500100411e-05, "loss": 0.6932, "step": 117250 }, { "epoch": 6.65, "learning_rate": 1.5234882656415627e-05, "loss": 0.6931, "step": 117500 }, { "epoch": 6.67, "learning_rate": 1.5121298676773482e-05, "loss": 0.6931, "step": 117750 }, { "epoch": 6.68, "learning_rate": 1.5007955834644389e-05, "loss": 0.6932, "step": 118000 }, { "epoch": 6.69, "learning_rate": 1.4894856896708926e-05, "loss": 0.6933, "step": 118250 }, { "epoch": 6.71, "learning_rate": 1.478200462369401e-05, "loss": 0.6935, "step": 118500 }, { "epoch": 6.72, "learning_rate": 1.4669401770305513e-05, "loss": 0.6933, "step": 118750 }, { "epoch": 6.74, "learning_rate": 1.4557051085160978e-05, "loss": 0.6932, "step": 119000 }, { "epoch": 6.75, "learning_rate": 1.4445403182409694e-05, "loss": 0.6931, "step": 119250 }, { "epoch": 6.77, "learning_rate": 1.4333564018887296e-05, "loss": 0.6931, "step": 119500 }, { "epoch": 6.78, "learning_rate": 1.422198522135445e-05, "loss": 0.6932, "step": 119750 }, { "epoch": 6.79, "learning_rate": 1.4110669513431698e-05, "loss": 0.6931, "step": 120000 }, { "epoch": 6.81, "learning_rate": 1.39996196123176e-05, "loss": 0.6929, "step": 120250 }, { "epoch": 6.82, "learning_rate": 1.3888838228722412e-05, "loss": 0.6934, "step": 120500 }, { "epoch": 6.84, "learning_rate": 1.377832806680191e-05, "loss": 0.6932, "step": 120750 }, { "epoch": 6.85, "learning_rate": 1.3668091824091411e-05, "loss": 0.6932, "step": 121000 }, { "epoch": 6.86, "learning_rate": 1.3558132191439892e-05, "loss": 0.6931, "step": 121250 }, { "epoch": 6.88, "learning_rate": 1.3448890014393772e-05, "loss": 0.6933, "step": 121500 }, { "epoch": 6.89, "learning_rate": 1.3339490514123848e-05, "loss": 0.6934, "step": 121750 }, { "epoch": 6.91, "learning_rate": 1.3230375645018111e-05, "loss": 0.6934, "step": 122000 }, { "epoch": 6.92, "learning_rate": 1.3121548070553007e-05, "loss": 0.6926, "step": 122250 }, { "epoch": 6.94, "learning_rate": 1.3013010447192154e-05, "loss": 0.6932, "step": 122500 }, { "epoch": 6.95, "learning_rate": 1.2904765424321502e-05, "loss": 0.6934, "step": 122750 }, { "epoch": 6.96, "learning_rate": 1.2796815644184693e-05, "loss": 0.6933, "step": 123000 }, { "epoch": 6.98, "learning_rate": 1.2689163741818505e-05, "loss": 0.6932, "step": 123250 }, { "epoch": 6.99, "learning_rate": 1.2581812344988603e-05, "loss": 0.6932, "step": 123500 }, { "epoch": 7.0, "eval_accuracy": 0.5002547770700637, "eval_f1": 0.33344654835696697, "eval_loss": 0.6931138634681702, "eval_precision": 0.25012738853503186, "eval_recall": 0.5, "eval_runtime": 12.5558, "eval_samples_per_second": 625.208, "eval_steps_per_second": 78.211, "step": 123641 }, { "epoch": 7.01, "learning_rate": 1.2475191659916228e-05, "loss": 0.6933, "step": 123750 }, { "epoch": 7.02, "learning_rate": 1.2368447899899433e-05, "loss": 0.6933, "step": 124000 }, { "epoch": 7.03, "learning_rate": 1.2262012474041122e-05, "loss": 0.6932, "step": 124250 }, { "epoch": 7.05, "learning_rate": 1.2155887980412958e-05, "loss": 0.693, "step": 124500 }, { "epoch": 7.06, "learning_rate": 1.2050077009496776e-05, "loss": 0.6934, "step": 124750 }, { "epoch": 7.08, "learning_rate": 1.1944582144121394e-05, "loss": 0.6933, "step": 125000 }, { "epoch": 7.09, "learning_rate": 1.1839405959399536e-05, "loss": 0.6931, "step": 125250 }, { "epoch": 7.11, "learning_rate": 1.1734551022664981e-05, "loss": 0.693, "step": 125500 }, { "epoch": 7.12, "learning_rate": 1.1630437369518046e-05, "loss": 0.6935, "step": 125750 }, { "epoch": 7.13, "learning_rate": 1.152623128882203e-05, "loss": 0.6933, "step": 126000 }, { "epoch": 7.15, "learning_rate": 1.1422354100656728e-05, "loss": 0.6931, "step": 126250 }, { "epoch": 7.16, "learning_rate": 1.1318808340647638e-05, "loss": 0.6927, "step": 126500 }, { "epoch": 7.18, "learning_rate": 1.1215596536330131e-05, "loss": 0.6935, "step": 126750 }, { "epoch": 7.19, "learning_rate": 1.1112721207087779e-05, "loss": 0.6933, "step": 127000 }, { "epoch": 7.2, "learning_rate": 1.1010184864090856e-05, "loss": 0.6933, "step": 127250 }, { "epoch": 7.22, "learning_rate": 1.0907990010235016e-05, "loss": 0.6933, "step": 127500 }, { "epoch": 7.23, "learning_rate": 1.080613914008024e-05, "loss": 0.6932, "step": 127750 }, { "epoch": 7.25, "learning_rate": 1.0704634739789915e-05, "loss": 0.6931, "step": 128000 }, { "epoch": 7.26, "learning_rate": 1.06038832105021e-05, "loss": 0.6933, "step": 128250 }, { "epoch": 7.28, "learning_rate": 1.0503077763966945e-05, "loss": 0.6933, "step": 128500 }, { "epoch": 7.29, "learning_rate": 1.0402626184975775e-05, "loss": 0.6933, "step": 128750 }, { "epoch": 7.3, "learning_rate": 1.0302530925535489e-05, "loss": 0.6933, "step": 129000 }, { "epoch": 7.32, "learning_rate": 1.0202794428955301e-05, "loss": 0.6932, "step": 129250 }, { "epoch": 7.33, "learning_rate": 1.0103419129787083e-05, "loss": 0.6932, "step": 129500 }, { "epoch": 7.35, "learning_rate": 1.0004407453765927e-05, "loss": 0.693, "step": 129750 }, { "epoch": 7.36, "learning_rate": 9.905761817750958e-06, "loss": 0.6934, "step": 130000 }, { "epoch": 7.37, "learning_rate": 9.807484629666289e-06, "loss": 0.6929, "step": 130250 }, { "epoch": 7.39, "learning_rate": 9.709969171909833e-06, "loss": 0.6931, "step": 130500 }, { "epoch": 7.4, "learning_rate": 9.612434569729081e-06, "loss": 0.6935, "step": 130750 }, { "epoch": 7.42, "learning_rate": 9.515275575549665e-06, "loss": 0.6932, "step": 131000 }, { "epoch": 7.43, "learning_rate": 9.418494561007033e-06, "loss": 0.6932, "step": 131250 }, { "epoch": 7.44, "learning_rate": 9.322093888510217e-06, "loss": 0.6933, "step": 131500 }, { "epoch": 7.46, "learning_rate": 9.226075911184137e-06, "loss": 0.6932, "step": 131750 }, { "epoch": 7.47, "learning_rate": 9.130442972812207e-06, "loss": 0.6933, "step": 132000 }, { "epoch": 7.49, "learning_rate": 9.0351974077791e-06, "loss": 0.6932, "step": 132250 }, { "epoch": 7.5, "learning_rate": 8.940720185129978e-06, "loss": 0.6932, "step": 132500 }, { "epoch": 7.52, "learning_rate": 8.846254759394507e-06, "loss": 0.6931, "step": 132750 }, { "epoch": 7.53, "learning_rate": 8.752183643986523e-06, "loss": 0.6932, "step": 133000 }, { "epoch": 7.54, "learning_rate": 8.658509135166829e-06, "loss": 0.6933, "step": 133250 }, { "epoch": 7.56, "learning_rate": 8.565233519515157e-06, "loss": 0.6932, "step": 133500 }, { "epoch": 7.57, "learning_rate": 8.472359073874304e-06, "loss": 0.693, "step": 133750 }, { "epoch": 7.59, "learning_rate": 8.379888065294575e-06, "loss": 0.6934, "step": 134000 }, { "epoch": 7.6, "learning_rate": 8.287822750978453e-06, "loss": 0.6932, "step": 134250 }, { "epoch": 7.61, "learning_rate": 8.196531192128304e-06, "loss": 0.6932, "step": 134500 }, { "epoch": 7.63, "learning_rate": 8.105282353121044e-06, "loss": 0.6931, "step": 134750 }, { "epoch": 7.64, "learning_rate": 8.014445911458718e-06, "loss": 0.6934, "step": 135000 }, { "epoch": 7.66, "learning_rate": 7.924024084444284e-06, "loss": 0.6932, "step": 135250 }, { "epoch": 7.67, "learning_rate": 7.834019079260019e-06, "loss": 0.6931, "step": 135500 }, { "epoch": 7.69, "learning_rate": 7.744433092913655e-06, "loss": 0.6931, "step": 135750 }, { "epoch": 7.7, "learning_rate": 7.655268312184721e-06, "loss": 0.693, "step": 136000 }, { "epoch": 7.71, "learning_rate": 7.5665269135712034e-06, "loss": 0.6934, "step": 136250 }, { "epoch": 7.73, "learning_rate": 7.478563476084949e-06, "loss": 0.6932, "step": 136500 }, { "epoch": 7.74, "learning_rate": 7.390673614707866e-06, "loss": 0.6931, "step": 136750 }, { "epoch": 7.76, "learning_rate": 7.3032135941602745e-06, "loss": 0.6932, "step": 137000 }, { "epoch": 7.77, "learning_rate": 7.216185549327245e-06, "loss": 0.6931, "step": 137250 }, { "epoch": 7.78, "learning_rate": 7.129591604549363e-06, "loss": 0.6931, "step": 137500 }, { "epoch": 7.8, "learning_rate": 7.043433873570918e-06, "loss": 0.6932, "step": 137750 }, { "epoch": 7.81, "learning_rate": 6.957714459488293e-06, "loss": 0.6934, "step": 138000 }, { "epoch": 7.83, "learning_rate": 6.87243545469862e-06, "loss": 0.693, "step": 138250 }, { "epoch": 7.84, "learning_rate": 6.787937402713973e-06, "loss": 0.6934, "step": 138500 }, { "epoch": 7.86, "learning_rate": 6.703543668291617e-06, "loss": 0.6932, "step": 138750 }, { "epoch": 7.87, "learning_rate": 6.619596547430409e-06, "loss": 0.693, "step": 139000 }, { "epoch": 7.88, "learning_rate": 6.536098089266093e-06, "loss": 0.6933, "step": 139250 }, { "epoch": 7.9, "learning_rate": 6.453050331982624e-06, "loss": 0.6933, "step": 139500 }, { "epoch": 7.91, "learning_rate": 6.370455302762429e-06, "loss": 0.6931, "step": 139750 }, { "epoch": 7.93, "learning_rate": 6.2883150177369095e-06, "loss": 0.6933, "step": 140000 }, { "epoch": 7.94, "learning_rate": 6.206631481937219e-06, "loss": 0.6933, "step": 140250 }, { "epoch": 7.95, "learning_rate": 6.125406689245361e-06, "loss": 0.693, "step": 140500 }, { "epoch": 7.97, "learning_rate": 6.04496475823072e-06, "loss": 0.6931, "step": 140750 }, { "epoch": 7.98, "learning_rate": 5.964661533859653e-06, "loss": 0.6934, "step": 141000 }, { "epoch": 8.0, "learning_rate": 5.884822959043998e-06, "loss": 0.6933, "step": 141250 }, { "epoch": 8.0, "eval_accuracy": 0.4997452229299363, "eval_f1": 0.3332200798437102, "eval_loss": 0.6931153535842896, "eval_precision": 0.24987261146496814, "eval_recall": 0.5, "eval_runtime": 12.5013, "eval_samples_per_second": 627.937, "eval_steps_per_second": 78.552, "step": 141304 }, { "epoch": 8.01, "learning_rate": 5.805450982630542e-06, "loss": 0.6931, "step": 141500 }, { "epoch": 8.03, "learning_rate": 5.726547542076485e-06, "loss": 0.6932, "step": 141750 }, { "epoch": 8.04, "learning_rate": 5.6481145634021515e-06, "loss": 0.6931, "step": 142000 }, { "epoch": 8.05, "learning_rate": 5.570153961143942e-06, "loss": 0.6932, "step": 142250 }, { "epoch": 8.07, "learning_rate": 5.492667638307647e-06, "loss": 0.6932, "step": 142500 }, { "epoch": 8.08, "learning_rate": 5.415964575902305e-06, "loss": 0.6933, "step": 142750 }, { "epoch": 8.1, "learning_rate": 5.339430558640929e-06, "loss": 0.693, "step": 143000 }, { "epoch": 8.11, "learning_rate": 5.263376452722673e-06, "loss": 0.6931, "step": 143250 }, { "epoch": 8.12, "learning_rate": 5.187804114616051e-06, "loss": 0.6933, "step": 143500 }, { "epoch": 8.14, "learning_rate": 5.112715389029707e-06, "loss": 0.6932, "step": 143750 }, { "epoch": 8.15, "learning_rate": 5.038112108867363e-06, "loss": 0.6931, "step": 144000 }, { "epoch": 8.17, "learning_rate": 4.963996095183115e-06, "loss": 0.6932, "step": 144250 }, { "epoch": 8.18, "learning_rate": 4.890369157136956e-06, "loss": 0.6931, "step": 144500 }, { "epoch": 8.2, "learning_rate": 4.817233091950621e-06, "loss": 0.6932, "step": 144750 }, { "epoch": 8.21, "learning_rate": 4.744879274763003e-06, "loss": 0.6934, "step": 145000 }, { "epoch": 8.22, "learning_rate": 4.672728317747921e-06, "loss": 0.6934, "step": 145250 }, { "epoch": 8.24, "learning_rate": 4.601073546170611e-06, "loss": 0.6932, "step": 145500 }, { "epoch": 8.25, "learning_rate": 4.529916709112531e-06, "loss": 0.693, "step": 145750 }, { "epoch": 8.27, "learning_rate": 4.459259543500649e-06, "loss": 0.6932, "step": 146000 }, { "epoch": 8.28, "learning_rate": 4.38910377406504e-06, "loss": 0.6932, "step": 146250 }, { "epoch": 8.29, "learning_rate": 4.319451113296763e-06, "loss": 0.6931, "step": 146500 }, { "epoch": 8.31, "learning_rate": 4.250303261406091e-06, "loss": 0.6933, "step": 146750 }, { "epoch": 8.32, "learning_rate": 4.1819354605351554e-06, "loss": 0.6934, "step": 147000 }, { "epoch": 8.34, "learning_rate": 4.113800241689689e-06, "loss": 0.6931, "step": 147250 }, { "epoch": 8.35, "learning_rate": 4.046174851626572e-06, "loss": 0.6932, "step": 147500 }, { "epoch": 8.36, "learning_rate": 3.979060941070722e-06, "loss": 0.6932, "step": 147750 }, { "epoch": 8.38, "learning_rate": 3.9124601482619205e-06, "loss": 0.6932, "step": 148000 }, { "epoch": 8.39, "learning_rate": 3.846374098914826e-06, "loss": 0.693, "step": 148250 }, { "epoch": 8.41, "learning_rate": 3.780804406179295e-06, "loss": 0.6931, "step": 148500 }, { "epoch": 8.42, "learning_rate": 3.715752670600986e-06, "loss": 0.6934, "step": 148750 }, { "epoch": 8.44, "learning_rate": 3.651220480082326e-06, "loss": 0.6932, "step": 149000 }, { "epoch": 8.45, "learning_rate": 3.587209409843728e-06, "loss": 0.6931, "step": 149250 }, { "epoch": 8.46, "learning_rate": 3.523721022385132e-06, "loss": 0.6932, "step": 149500 }, { "epoch": 8.48, "learning_rate": 3.4607568674478897e-06, "loss": 0.6933, "step": 149750 }, { "epoch": 8.49, "learning_rate": 3.3983184819769e-06, "loss": 0.6932, "step": 150000 }, { "epoch": 8.51, "learning_rate": 3.3366539820747027e-06, "loss": 0.6932, "step": 150250 }, { "epoch": 8.52, "learning_rate": 3.2752695767854113e-06, "loss": 0.6931, "step": 150500 }, { "epoch": 8.53, "learning_rate": 3.2144154686773565e-06, "loss": 0.6933, "step": 150750 }, { "epoch": 8.55, "learning_rate": 3.1540931431895514e-06, "loss": 0.6932, "step": 151000 }, { "epoch": 8.56, "learning_rate": 3.0943040727802797e-06, "loss": 0.6932, "step": 151250 }, { "epoch": 8.58, "learning_rate": 3.0350497168911334e-06, "loss": 0.6931, "step": 151500 }, { "epoch": 8.59, "learning_rate": 2.976331521911438e-06, "loss": 0.6931, "step": 151750 }, { "epoch": 8.61, "learning_rate": 2.918150921142901e-06, "loss": 0.6933, "step": 152000 }, { "epoch": 8.62, "learning_rate": 2.860509334764647e-06, "loss": 0.6932, "step": 152250 }, { "epoch": 8.63, "learning_rate": 2.8034081697985437e-06, "loss": 0.6931, "step": 152500 }, { "epoch": 8.65, "learning_rate": 2.746848820074854e-06, "loss": 0.6933, "step": 152750 }, { "epoch": 8.66, "learning_rate": 2.6910556469509853e-06, "loss": 0.6934, "step": 153000 }, { "epoch": 8.68, "learning_rate": 2.6355818753078243e-06, "loss": 0.6932, "step": 153250 }, { "epoch": 8.69, "learning_rate": 2.5806540155199815e-06, "loss": 0.6932, "step": 153500 }, { "epoch": 8.7, "learning_rate": 2.5262734083676936e-06, "loss": 0.6933, "step": 153750 }, { "epoch": 8.72, "learning_rate": 2.4724413812728525e-06, "loss": 0.6932, "step": 154000 }, { "epoch": 8.73, "learning_rate": 2.4191592482665893e-06, "loss": 0.693, "step": 154250 }, { "epoch": 8.75, "learning_rate": 2.3664283099572192e-06, "loss": 0.6933, "step": 154500 }, { "epoch": 8.76, "learning_rate": 2.3142498534984735e-06, "loss": 0.693, "step": 154750 }, { "epoch": 8.78, "learning_rate": 2.262625152558104e-06, "loss": 0.6932, "step": 155000 }, { "epoch": 8.79, "learning_rate": 2.211758638779948e-06, "loss": 0.6932, "step": 155250 }, { "epoch": 8.8, "learning_rate": 2.161242988266132e-06, "loss": 0.6932, "step": 155500 }, { "epoch": 8.82, "learning_rate": 2.111284828143695e-06, "loss": 0.6932, "step": 155750 }, { "epoch": 8.83, "learning_rate": 2.0618853778833075e-06, "loss": 0.6933, "step": 156000 }, { "epoch": 8.85, "learning_rate": 2.013045843317621e-06, "loss": 0.6933, "step": 156250 }, { "epoch": 8.86, "learning_rate": 1.964767416611829e-06, "loss": 0.6933, "step": 156500 }, { "epoch": 8.87, "learning_rate": 1.9170512762345825e-06, "loss": 0.6932, "step": 156750 }, { "epoch": 8.89, "learning_rate": 1.8698985869291973e-06, "loss": 0.6932, "step": 157000 }, { "epoch": 8.9, "learning_rate": 1.8234957258351364e-06, "loss": 0.6931, "step": 157250 }, { "epoch": 8.92, "learning_rate": 1.7774711126562565e-06, "loss": 0.6933, "step": 157500 }, { "epoch": 8.93, "learning_rate": 1.732013357678619e-06, "loss": 0.6933, "step": 157750 }, { "epoch": 8.95, "learning_rate": 1.6871235705187283e-06, "loss": 0.6931, "step": 158000 }, { "epoch": 8.96, "learning_rate": 1.6428028469290813e-06, "loss": 0.693, "step": 158250 }, { "epoch": 8.97, "learning_rate": 1.5990522687714172e-06, "loss": 0.693, "step": 158500 }, { "epoch": 8.99, "learning_rate": 1.5558729039903296e-06, "loss": 0.6933, "step": 158750 }, { "epoch": 9.0, "eval_accuracy": 0.4997452229299363, "eval_f1": 0.3332200798437102, "eval_loss": 0.6933611035346985, "eval_precision": 0.24987261146496814, "eval_recall": 0.5, "eval_runtime": 12.7446, "eval_samples_per_second": 615.949, "eval_steps_per_second": 77.052, "step": 158967 }, { "epoch": 9.0, "learning_rate": 1.5132658065871747e-06, "loss": 0.6932, "step": 159000 }, { "epoch": 9.02, "learning_rate": 1.471232016594365e-06, "loss": 0.6932, "step": 159250 }, { "epoch": 9.03, "learning_rate": 1.4299372524579662e-06, "loss": 0.6932, "step": 159500 }, { "epoch": 9.04, "learning_rate": 1.3890508380014433e-06, "loss": 0.6931, "step": 159750 }, { "epoch": 9.06, "learning_rate": 1.348740763022696e-06, "loss": 0.6931, "step": 160000 }, { "epoch": 9.07, "learning_rate": 1.3090080114841863e-06, "loss": 0.6934, "step": 160250 }, { "epoch": 9.09, "learning_rate": 1.269853553255998e-06, "loss": 0.6932, "step": 160500 }, { "epoch": 9.1, "learning_rate": 1.231278344092171e-06, "loss": 0.6933, "step": 160750 }, { "epoch": 9.12, "learning_rate": 1.1932833256073695e-06, "loss": 0.6931, "step": 161000 }, { "epoch": 9.13, "learning_rate": 1.155869425253886e-06, "loss": 0.6931, "step": 161250 }, { "epoch": 9.14, "learning_rate": 1.1191837231720042e-06, "loss": 0.6931, "step": 161500 }, { "epoch": 9.16, "learning_rate": 1.0829324511818617e-06, "loss": 0.6932, "step": 161750 }, { "epoch": 9.17, "learning_rate": 1.0472649909701676e-06, "loss": 0.6931, "step": 162000 }, { "epoch": 9.19, "learning_rate": 1.0121822131738956e-06, "loss": 0.6933, "step": 162250 }, { "epoch": 9.2, "learning_rate": 9.776849741580247e-07, "loss": 0.6932, "step": 162500 }, { "epoch": 9.21, "learning_rate": 9.437741159946139e-07, "loss": 0.6933, "step": 162750 }, { "epoch": 9.23, "learning_rate": 9.104504664422669e-07, "loss": 0.6932, "step": 163000 }, { "epoch": 9.24, "learning_rate": 8.777148389259194e-07, "loss": 0.6931, "step": 163250 }, { "epoch": 9.26, "learning_rate": 8.455680325169802e-07, "loss": 0.6933, "step": 163500 }, { "epoch": 9.27, "learning_rate": 8.140108319138362e-07, "loss": 0.6932, "step": 163750 }, { "epoch": 9.28, "learning_rate": 7.831666976848146e-07, "loss": 0.6932, "step": 164000 }, { "epoch": 9.3, "learning_rate": 7.527886391862193e-07, "loss": 0.6932, "step": 164250 }, { "epoch": 9.31, "learning_rate": 7.230024512235062e-07, "loss": 0.6932, "step": 164500 }, { "epoch": 9.33, "learning_rate": 6.938088608727439e-07, "loss": 0.6933, "step": 164750 }, { "epoch": 9.34, "learning_rate": 6.652085807447861e-07, "loss": 0.693, "step": 165000 }, { "epoch": 9.36, "learning_rate": 6.372023089678797e-07, "loss": 0.6932, "step": 165250 }, { "epoch": 9.37, "learning_rate": 6.097907291706201e-07, "loss": 0.693, "step": 165500 }, { "epoch": 9.38, "learning_rate": 5.829745104652673e-07, "loss": 0.6932, "step": 165750 }, { "epoch": 9.4, "learning_rate": 5.568580001272361e-07, "loss": 0.6932, "step": 166000 }, { "epoch": 9.41, "learning_rate": 5.31232064917378e-07, "loss": 0.6931, "step": 166250 }, { "epoch": 9.43, "learning_rate": 5.062034084038104e-07, "loss": 0.6932, "step": 166500 }, { "epoch": 9.44, "learning_rate": 4.817726415320162e-07, "loss": 0.6932, "step": 166750 }, { "epoch": 9.45, "learning_rate": 4.5794036065309466e-07, "loss": 0.6932, "step": 167000 }, { "epoch": 9.47, "learning_rate": 4.347071475091918e-07, "loss": 0.6932, "step": 167250 }, { "epoch": 9.48, "learning_rate": 4.120735692193151e-07, "loss": 0.6931, "step": 167500 }, { "epoch": 9.5, "learning_rate": 3.900401782654717e-07, "loss": 0.6932, "step": 167750 }, { "epoch": 9.51, "learning_rate": 3.686075124792021e-07, "loss": 0.6932, "step": 168000 }, { "epoch": 9.53, "learning_rate": 3.477760950284292e-07, "loss": 0.6932, "step": 168250 }, { "epoch": 9.54, "learning_rate": 3.2762615368826244e-07, "loss": 0.6932, "step": 168500 }, { "epoch": 9.55, "learning_rate": 3.0799633372767536e-07, "loss": 0.6931, "step": 168750 }, { "epoch": 9.57, "learning_rate": 2.889692416117279e-07, "loss": 0.6931, "step": 169000 }, { "epoch": 9.58, "learning_rate": 2.7054534178868607e-07, "loss": 0.6932, "step": 169250 }, { "epoch": 9.6, "learning_rate": 2.527250839829881e-07, "loss": 0.6932, "step": 169500 }, { "epoch": 9.61, "learning_rate": 2.3550890318425888e-07, "loss": 0.6932, "step": 169750 }, { "epoch": 9.62, "learning_rate": 2.1889721963671284e-07, "loss": 0.6931, "step": 170000 }, { "epoch": 9.64, "learning_rate": 2.0289043882887604e-07, "loss": 0.6932, "step": 170250 }, { "epoch": 9.65, "learning_rate": 1.8748895148369973e-07, "loss": 0.6933, "step": 170500 }, { "epoch": 9.67, "learning_rate": 1.727511098436996e-07, "loss": 0.6931, "step": 170750 }, { "epoch": 9.68, "learning_rate": 1.5855889766091536e-07, "loss": 0.6932, "step": 171000 }, { "epoch": 9.7, "learning_rate": 1.4497306106658893e-07, "loss": 0.693, "step": 171250 }, { "epoch": 9.71, "learning_rate": 1.3199393168881468e-07, "loss": 0.6932, "step": 171500 }, { "epoch": 9.72, "learning_rate": 1.1962182634605302e-07, "loss": 0.6932, "step": 171750 }, { "epoch": 9.74, "learning_rate": 1.0785704703941135e-07, "loss": 0.6932, "step": 172000 }, { "epoch": 9.75, "learning_rate": 9.669988094526128e-08, "loss": 0.6933, "step": 172250 }, { "epoch": 9.77, "learning_rate": 8.615060040823852e-08, "loss": 0.6931, "step": 172500 }, { "epoch": 9.78, "learning_rate": 7.620946293458442e-08, "loss": 0.6932, "step": 172750 }, { "epoch": 9.79, "learning_rate": 6.691282998163018e-08, "loss": 0.6931, "step": 173000 }, { "epoch": 9.81, "learning_rate": 5.8186256880568555e-08, "loss": 0.6933, "step": 173250 }, { "epoch": 9.82, "learning_rate": 5.006850944801467e-08, "loss": 0.6932, "step": 173500 }, { "epoch": 9.84, "learning_rate": 4.25597858368737e-08, "loss": 0.6934, "step": 173750 }, { "epoch": 9.85, "learning_rate": 3.5660269333887e-08, "loss": 0.6931, "step": 174000 }, { "epoch": 9.87, "learning_rate": 2.9370128355143966e-08, "loss": 0.6932, "step": 174250 }, { "epoch": 9.88, "learning_rate": 2.3689516441977034e-08, "loss": 0.6931, "step": 174500 }, { "epoch": 9.89, "learning_rate": 1.8618572257209087e-08, "loss": 0.6932, "step": 174750 }, { "epoch": 9.91, "learning_rate": 1.4174049338511186e-08, "loss": 0.6932, "step": 175000 }, { "epoch": 9.92, "learning_rate": 1.0320357269619218e-08, "loss": 0.6932, "step": 175250 }, { "epoch": 9.94, "learning_rate": 7.076659268151487e-09, "loss": 0.6932, "step": 175500 }, { "epoch": 9.95, "learning_rate": 4.443034512263689e-09, "loss": 0.6931, "step": 175750 }, { "epoch": 9.96, "learning_rate": 2.4195472883042914e-09, "loss": 0.6931, "step": 176000 }, { "epoch": 9.98, "learning_rate": 1.0062469892796466e-09, "loss": 0.6932, "step": 176250 }, { "epoch": 9.99, "learning_rate": 2.031681136188679e-10, "loss": 0.6932, "step": 176500 }, { "epoch": 10.0, "eval_accuracy": 0.4997452229299363, "eval_f1": 0.3332200798437102, "eval_loss": 0.693359375, "eval_precision": 0.24987261146496814, "eval_recall": 0.5, "eval_runtime": 12.7636, "eval_samples_per_second": 615.031, "eval_steps_per_second": 76.938, "step": 176630 }, { "epoch": 10.0, "step": 176630, "total_flos": 8.989707126226944e+16, "train_loss": 0.6935468676189722, "train_runtime": 13050.2295, "train_samples_per_second": 108.274, "train_steps_per_second": 13.535 } ], "max_steps": 176630, "num_train_epochs": 10, "total_flos": 8.989707126226944e+16, "trial_name": null, "trial_params": { "learning_rate": 1e-05 } }