{ "best_metric": 0.872356495468278, "best_model_checkpoint": "./Research_paper_MLM_all_CGO_Level_2_Final_Model/checkpoint-3000", "epoch": 0.24, "eval_steps": 100, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1e-07, "loss": 1.1475, "step": 50 }, { "epoch": 0.01, "learning_rate": 2e-07, "loss": 1.1355, "step": 100 }, { "epoch": 0.01, "eval_accuracy": 0.33534743202416917, "eval_f1": 0.28883320685557196, "eval_loss": 1.125103235244751, "eval_precision": 0.25365069358986825, "eval_recall": 0.33534743202416917, "eval_runtime": 10.8046, "eval_samples_per_second": 245.08, "eval_steps_per_second": 3.887, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.9983974358974358e-07, "loss": 1.1142, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.9967948717948717e-07, "loss": 1.0879, "step": 200 }, { "epoch": 0.02, "eval_accuracy": 0.5143504531722054, "eval_f1": 0.3499174782499206, "eval_loss": 1.0747710466384888, "eval_precision": 0.2651511602396808, "eval_recall": 0.5143504531722054, "eval_runtime": 10.7915, "eval_samples_per_second": 245.378, "eval_steps_per_second": 3.892, "step": 200 }, { "epoch": 0.02, "learning_rate": 1.9951923076923076e-07, "loss": 1.0575, "step": 250 }, { "epoch": 0.02, "learning_rate": 1.9935897435897435e-07, "loss": 1.0245, "step": 300 }, { "epoch": 0.02, "eval_accuracy": 0.5173716012084593, "eval_f1": 0.3848590932649115, "eval_loss": 1.0068289041519165, "eval_precision": 0.5250854777959997, "eval_recall": 0.5173716012084593, "eval_runtime": 10.7938, "eval_samples_per_second": 245.326, "eval_steps_per_second": 3.891, "step": 300 }, { "epoch": 0.03, "learning_rate": 1.9919871794871794e-07, "loss": 0.9823, "step": 350 }, { "epoch": 0.03, "learning_rate": 1.9903846153846153e-07, "loss": 0.9271, "step": 400 }, { "epoch": 0.03, "eval_accuracy": 0.5664652567975831, "eval_f1": 0.5612424944690789, "eval_loss": 0.9066060781478882, "eval_precision": 0.5664412983318924, "eval_recall": 0.5664652567975831, "eval_runtime": 10.8011, "eval_samples_per_second": 245.161, "eval_steps_per_second": 3.889, "step": 400 }, { "epoch": 0.04, "learning_rate": 1.9887820512820513e-07, "loss": 0.8753, "step": 450 }, { "epoch": 0.04, "learning_rate": 1.9871794871794872e-07, "loss": 0.8326, "step": 500 }, { "epoch": 0.04, "eval_accuracy": 0.597809667673716, "eval_f1": 0.5936208877467785, "eval_loss": 0.8094537854194641, "eval_precision": 0.606349562686828, "eval_recall": 0.597809667673716, "eval_runtime": 10.7899, "eval_samples_per_second": 245.415, "eval_steps_per_second": 3.893, "step": 500 }, { "epoch": 0.04, "learning_rate": 1.985576923076923e-07, "loss": 0.7909, "step": 550 }, { "epoch": 0.05, "learning_rate": 1.983974358974359e-07, "loss": 0.7634, "step": 600 }, { "epoch": 0.05, "eval_accuracy": 0.6185800604229608, "eval_f1": 0.6127776437556779, "eval_loss": 0.7459443211555481, "eval_precision": 0.6315377091362361, "eval_recall": 0.6185800604229608, "eval_runtime": 16.0132, "eval_samples_per_second": 165.364, "eval_steps_per_second": 2.623, "step": 600 }, { "epoch": 0.05, "learning_rate": 1.982371794871795e-07, "loss": 0.7404, "step": 650 }, { "epoch": 0.06, "learning_rate": 1.9807692307692308e-07, "loss": 0.7334, "step": 700 }, { "epoch": 0.06, "eval_accuracy": 0.6578549848942599, "eval_f1": 0.6578987306609153, "eval_loss": 0.7136017680168152, "eval_precision": 0.6579792469600714, "eval_recall": 0.6578549848942599, "eval_runtime": 10.8054, "eval_samples_per_second": 245.064, "eval_steps_per_second": 3.887, "step": 700 }, { "epoch": 0.06, "learning_rate": 1.9791666666666667e-07, "loss": 0.7162, "step": 750 }, { "epoch": 0.06, "learning_rate": 1.9775641025641026e-07, "loss": 0.7052, "step": 800 }, { "epoch": 0.06, "eval_accuracy": 0.68202416918429, "eval_f1": 0.6820909311917643, "eval_loss": 0.6918750405311584, "eval_precision": 0.6825693212542151, "eval_recall": 0.68202416918429, "eval_runtime": 10.7979, "eval_samples_per_second": 245.233, "eval_steps_per_second": 3.89, "step": 800 }, { "epoch": 0.07, "learning_rate": 1.9759615384615385e-07, "loss": 0.7014, "step": 850 }, { "epoch": 0.07, "learning_rate": 1.9743589743589744e-07, "loss": 0.6917, "step": 900 }, { "epoch": 0.07, "eval_accuracy": 0.7005287009063444, "eval_f1": 0.6970978787648872, "eval_loss": 0.6726363897323608, "eval_precision": 0.7060152626941981, "eval_recall": 0.7005287009063444, "eval_runtime": 10.7791, "eval_samples_per_second": 245.661, "eval_steps_per_second": 3.896, "step": 900 }, { "epoch": 0.08, "learning_rate": 1.9727564102564103e-07, "loss": 0.6746, "step": 950 }, { "epoch": 0.08, "learning_rate": 1.9711538461538462e-07, "loss": 0.669, "step": 1000 }, { "epoch": 0.08, "eval_accuracy": 0.722809667673716, "eval_f1": 0.7197029874640588, "eval_loss": 0.6518763303756714, "eval_precision": 0.7290416924889942, "eval_recall": 0.722809667673716, "eval_runtime": 10.7984, "eval_samples_per_second": 245.222, "eval_steps_per_second": 3.889, "step": 1000 }, { "epoch": 0.08, "learning_rate": 1.969551282051282e-07, "loss": 0.6552, "step": 1050 }, { "epoch": 0.09, "learning_rate": 1.9679487179487178e-07, "loss": 0.6476, "step": 1100 }, { "epoch": 0.09, "eval_accuracy": 0.7379154078549849, "eval_f1": 0.7351061135755582, "eval_loss": 0.6282221674919128, "eval_precision": 0.74437720980925, "eval_recall": 0.7379154078549849, "eval_runtime": 10.7935, "eval_samples_per_second": 245.333, "eval_steps_per_second": 3.891, "step": 1100 }, { "epoch": 0.09, "learning_rate": 1.9663461538461537e-07, "loss": 0.6325, "step": 1150 }, { "epoch": 0.1, "learning_rate": 1.9647435897435896e-07, "loss": 0.6252, "step": 1200 }, { "epoch": 0.1, "eval_accuracy": 0.7492447129909365, "eval_f1": 0.7452052048147401, "eval_loss": 0.5985825061798096, "eval_precision": 0.7607901002801604, "eval_recall": 0.7492447129909365, "eval_runtime": 10.8114, "eval_samples_per_second": 244.926, "eval_steps_per_second": 3.885, "step": 1200 }, { "epoch": 0.1, "learning_rate": 1.9631410256410255e-07, "loss": 0.6067, "step": 1250 }, { "epoch": 0.1, "learning_rate": 1.9615384615384614e-07, "loss": 0.5906, "step": 1300 }, { "epoch": 0.1, "eval_accuracy": 0.7775679758308157, "eval_f1": 0.7747144726300009, "eval_loss": 0.5599903464317322, "eval_precision": 0.7875765823937204, "eval_recall": 0.7775679758308157, "eval_runtime": 10.7946, "eval_samples_per_second": 245.309, "eval_steps_per_second": 3.891, "step": 1300 }, { "epoch": 0.11, "learning_rate": 1.9599358974358973e-07, "loss": 0.5622, "step": 1350 }, { "epoch": 0.11, "learning_rate": 1.9583333333333332e-07, "loss": 0.5391, "step": 1400 }, { "epoch": 0.11, "eval_accuracy": 0.8111782477341389, "eval_f1": 0.8088711339987766, "eval_loss": 0.5117343664169312, "eval_precision": 0.8220985360822193, "eval_recall": 0.8111782477341389, "eval_runtime": 10.8029, "eval_samples_per_second": 245.12, "eval_steps_per_second": 3.888, "step": 1400 }, { "epoch": 0.12, "learning_rate": 1.956730769230769e-07, "loss": 0.5259, "step": 1450 }, { "epoch": 0.12, "learning_rate": 1.955128205128205e-07, "loss": 0.5089, "step": 1500 }, { "epoch": 0.12, "eval_accuracy": 0.8308157099697885, "eval_f1": 0.8288063720795289, "eval_loss": 0.46717309951782227, "eval_precision": 0.8422125041602981, "eval_recall": 0.8308157099697885, "eval_runtime": 10.7869, "eval_samples_per_second": 245.483, "eval_steps_per_second": 3.894, "step": 1500 }, { "epoch": 0.12, "learning_rate": 1.953525641025641e-07, "loss": 0.4812, "step": 1550 }, { "epoch": 0.13, "learning_rate": 1.9519230769230768e-07, "loss": 0.4581, "step": 1600 }, { "epoch": 0.13, "eval_accuracy": 0.8466767371601208, "eval_f1": 0.8452935464575092, "eval_loss": 0.42611148953437805, "eval_precision": 0.8553191629170025, "eval_recall": 0.8466767371601208, "eval_runtime": 10.811, "eval_samples_per_second": 244.936, "eval_steps_per_second": 3.885, "step": 1600 }, { "epoch": 0.13, "learning_rate": 1.9503205128205127e-07, "loss": 0.4265, "step": 1650 }, { "epoch": 0.14, "learning_rate": 1.9487179487179486e-07, "loss": 0.4174, "step": 1700 }, { "epoch": 0.14, "eval_accuracy": 0.8591389728096677, "eval_f1": 0.8581333993450322, "eval_loss": 0.3975095748901367, "eval_precision": 0.865914594919593, "eval_recall": 0.8591389728096677, "eval_runtime": 15.8476, "eval_samples_per_second": 167.092, "eval_steps_per_second": 2.65, "step": 1700 }, { "epoch": 0.14, "learning_rate": 1.9471153846153845e-07, "loss": 0.3979, "step": 1750 }, { "epoch": 0.14, "learning_rate": 1.9455128205128204e-07, "loss": 0.3897, "step": 1800 }, { "epoch": 0.14, "eval_accuracy": 0.8557401812688822, "eval_f1": 0.854075463670531, "eval_loss": 0.3941803574562073, "eval_precision": 0.8677897892001838, "eval_recall": 0.8557401812688822, "eval_runtime": 10.7957, "eval_samples_per_second": 245.283, "eval_steps_per_second": 3.89, "step": 1800 }, { "epoch": 0.15, "learning_rate": 1.9439102564102564e-07, "loss": 0.3619, "step": 1850 }, { "epoch": 0.15, "learning_rate": 1.9423076923076923e-07, "loss": 0.3511, "step": 1900 }, { "epoch": 0.15, "eval_accuracy": 0.8576283987915407, "eval_f1": 0.8557767441533479, "eval_loss": 0.3930197060108185, "eval_precision": 0.8716354305639, "eval_recall": 0.8576283987915407, "eval_runtime": 10.8129, "eval_samples_per_second": 244.893, "eval_steps_per_second": 3.884, "step": 1900 }, { "epoch": 0.16, "learning_rate": 1.9407051282051282e-07, "loss": 0.3537, "step": 1950 }, { "epoch": 0.16, "learning_rate": 1.939102564102564e-07, "loss": 0.3466, "step": 2000 }, { "epoch": 0.16, "eval_accuracy": 0.8606495468277946, "eval_f1": 0.8590762411480044, "eval_loss": 0.3839268088340759, "eval_precision": 0.8725861729431068, "eval_recall": 0.8606495468277946, "eval_runtime": 10.8113, "eval_samples_per_second": 244.93, "eval_steps_per_second": 3.885, "step": 2000 }, { "epoch": 0.16, "learning_rate": 1.9375e-07, "loss": 0.3521, "step": 2050 }, { "epoch": 0.17, "learning_rate": 1.935897435897436e-07, "loss": 0.3389, "step": 2100 }, { "epoch": 0.17, "eval_accuracy": 0.8606495468277946, "eval_f1": 0.8588371845957175, "eval_loss": 0.39029455184936523, "eval_precision": 0.8748009772925901, "eval_recall": 0.8606495468277946, "eval_runtime": 10.802, "eval_samples_per_second": 245.14, "eval_steps_per_second": 3.888, "step": 2100 }, { "epoch": 0.17, "learning_rate": 1.9342948717948718e-07, "loss": 0.3238, "step": 2150 }, { "epoch": 0.18, "learning_rate": 1.9326923076923077e-07, "loss": 0.3075, "step": 2200 }, { "epoch": 0.18, "eval_accuracy": 0.8602719033232629, "eval_f1": 0.8583410955489438, "eval_loss": 0.3917655050754547, "eval_precision": 0.8754701190846773, "eval_recall": 0.8602719033232629, "eval_runtime": 10.8054, "eval_samples_per_second": 245.064, "eval_steps_per_second": 3.887, "step": 2200 }, { "epoch": 0.18, "learning_rate": 1.9310897435897436e-07, "loss": 0.3102, "step": 2250 }, { "epoch": 0.18, "learning_rate": 1.9294871794871795e-07, "loss": 0.3101, "step": 2300 }, { "epoch": 0.18, "eval_accuracy": 0.8666918429003021, "eval_f1": 0.8652087190619578, "eval_loss": 0.3828712999820709, "eval_precision": 0.878661956862829, "eval_recall": 0.8666918429003021, "eval_runtime": 15.99, "eval_samples_per_second": 165.604, "eval_steps_per_second": 2.627, "step": 2300 }, { "epoch": 0.19, "learning_rate": 1.9278846153846154e-07, "loss": 0.2863, "step": 2350 }, { "epoch": 0.19, "learning_rate": 1.9262820512820513e-07, "loss": 0.3009, "step": 2400 }, { "epoch": 0.19, "eval_accuracy": 0.8621601208459214, "eval_f1": 0.8602680217103188, "eval_loss": 0.4013417959213257, "eval_precision": 0.8773335989772154, "eval_recall": 0.8621601208459214, "eval_runtime": 10.8074, "eval_samples_per_second": 245.018, "eval_steps_per_second": 3.886, "step": 2400 }, { "epoch": 0.2, "learning_rate": 1.9246794871794872e-07, "loss": 0.306, "step": 2450 }, { "epoch": 0.2, "learning_rate": 1.9230769230769231e-07, "loss": 0.2866, "step": 2500 }, { "epoch": 0.2, "eval_accuracy": 0.8640483383685801, "eval_f1": 0.862268075795537, "eval_loss": 0.3997219502925873, "eval_precision": 0.8784797318905917, "eval_recall": 0.8640483383685801, "eval_runtime": 10.796, "eval_samples_per_second": 245.277, "eval_steps_per_second": 3.89, "step": 2500 }, { "epoch": 0.2, "learning_rate": 1.921474358974359e-07, "loss": 0.2747, "step": 2550 }, { "epoch": 0.21, "learning_rate": 1.919871794871795e-07, "loss": 0.2856, "step": 2600 }, { "epoch": 0.21, "eval_accuracy": 0.8587613293051359, "eval_f1": 0.8564860641576998, "eval_loss": 0.4260440468788147, "eval_precision": 0.8769254342874869, "eval_recall": 0.8587613293051359, "eval_runtime": 10.798, "eval_samples_per_second": 245.231, "eval_steps_per_second": 3.89, "step": 2600 }, { "epoch": 0.21, "learning_rate": 1.9182692307692309e-07, "loss": 0.2907, "step": 2650 }, { "epoch": 0.22, "learning_rate": 1.9166666666666668e-07, "loss": 0.2871, "step": 2700 }, { "epoch": 0.22, "eval_accuracy": 0.8632930513595166, "eval_f1": 0.8613280138625302, "eval_loss": 0.4157187342643738, "eval_precision": 0.8793867110640158, "eval_recall": 0.8632930513595166, "eval_runtime": 10.8079, "eval_samples_per_second": 245.007, "eval_steps_per_second": 3.886, "step": 2700 }, { "epoch": 0.22, "learning_rate": 1.9150641025641027e-07, "loss": 0.2585, "step": 2750 }, { "epoch": 0.22, "learning_rate": 1.9134615384615386e-07, "loss": 0.2894, "step": 2800 }, { "epoch": 0.22, "eval_accuracy": 0.8644259818731118, "eval_f1": 0.8624898933835367, "eval_loss": 0.4176577627658844, "eval_precision": 0.8804547245205405, "eval_recall": 0.8644259818731118, "eval_runtime": 10.8056, "eval_samples_per_second": 245.059, "eval_steps_per_second": 3.887, "step": 2800 }, { "epoch": 0.23, "learning_rate": 1.9118589743589745e-07, "loss": 0.2551, "step": 2850 }, { "epoch": 0.23, "learning_rate": 1.9102564102564104e-07, "loss": 0.2755, "step": 2900 }, { "epoch": 0.23, "eval_accuracy": 0.8659365558912386, "eval_f1": 0.8639206748641556, "eval_loss": 0.4247892498970032, "eval_precision": 0.8830682801454843, "eval_recall": 0.8659365558912386, "eval_runtime": 10.8115, "eval_samples_per_second": 244.924, "eval_steps_per_second": 3.885, "step": 2900 }, { "epoch": 0.24, "learning_rate": 1.9086538461538463e-07, "loss": 0.2494, "step": 2950 }, { "epoch": 0.24, "learning_rate": 1.9070512820512822e-07, "loss": 0.2508, "step": 3000 }, { "epoch": 0.24, "eval_accuracy": 0.872356495468278, "eval_f1": 0.8707077161534905, "eval_loss": 0.41548070311546326, "eval_precision": 0.8869475341602814, "eval_recall": 0.872356495468278, "eval_runtime": 10.7909, "eval_samples_per_second": 245.393, "eval_steps_per_second": 3.892, "step": 3000 } ], "logging_steps": 50, "max_steps": 62500, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "total_flos": 2.5209554336064e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }