|
{ |
|
"best_metric": 0.872356495468278, |
|
"best_model_checkpoint": "./Research_paper_MLM_all_CGO_Level_2_Final_Model/checkpoint-3000", |
|
"epoch": 0.24, |
|
"eval_steps": 100, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1e-07, |
|
"loss": 1.1475, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2e-07, |
|
"loss": 1.1355, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.33534743202416917, |
|
"eval_f1": 0.28883320685557196, |
|
"eval_loss": 1.125103235244751, |
|
"eval_precision": 0.25365069358986825, |
|
"eval_recall": 0.33534743202416917, |
|
"eval_runtime": 10.8046, |
|
"eval_samples_per_second": 245.08, |
|
"eval_steps_per_second": 3.887, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9983974358974358e-07, |
|
"loss": 1.1142, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9967948717948717e-07, |
|
"loss": 1.0879, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.5143504531722054, |
|
"eval_f1": 0.3499174782499206, |
|
"eval_loss": 1.0747710466384888, |
|
"eval_precision": 0.2651511602396808, |
|
"eval_recall": 0.5143504531722054, |
|
"eval_runtime": 10.7915, |
|
"eval_samples_per_second": 245.378, |
|
"eval_steps_per_second": 3.892, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9951923076923076e-07, |
|
"loss": 1.0575, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9935897435897435e-07, |
|
"loss": 1.0245, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.5173716012084593, |
|
"eval_f1": 0.3848590932649115, |
|
"eval_loss": 1.0068289041519165, |
|
"eval_precision": 0.5250854777959997, |
|
"eval_recall": 0.5173716012084593, |
|
"eval_runtime": 10.7938, |
|
"eval_samples_per_second": 245.326, |
|
"eval_steps_per_second": 3.891, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9919871794871794e-07, |
|
"loss": 0.9823, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9903846153846153e-07, |
|
"loss": 0.9271, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.5664652567975831, |
|
"eval_f1": 0.5612424944690789, |
|
"eval_loss": 0.9066060781478882, |
|
"eval_precision": 0.5664412983318924, |
|
"eval_recall": 0.5664652567975831, |
|
"eval_runtime": 10.8011, |
|
"eval_samples_per_second": 245.161, |
|
"eval_steps_per_second": 3.889, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9887820512820513e-07, |
|
"loss": 0.8753, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9871794871794872e-07, |
|
"loss": 0.8326, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.597809667673716, |
|
"eval_f1": 0.5936208877467785, |
|
"eval_loss": 0.8094537854194641, |
|
"eval_precision": 0.606349562686828, |
|
"eval_recall": 0.597809667673716, |
|
"eval_runtime": 10.7899, |
|
"eval_samples_per_second": 245.415, |
|
"eval_steps_per_second": 3.893, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.985576923076923e-07, |
|
"loss": 0.7909, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.983974358974359e-07, |
|
"loss": 0.7634, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.6185800604229608, |
|
"eval_f1": 0.6127776437556779, |
|
"eval_loss": 0.7459443211555481, |
|
"eval_precision": 0.6315377091362361, |
|
"eval_recall": 0.6185800604229608, |
|
"eval_runtime": 16.0132, |
|
"eval_samples_per_second": 165.364, |
|
"eval_steps_per_second": 2.623, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.982371794871795e-07, |
|
"loss": 0.7404, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9807692307692308e-07, |
|
"loss": 0.7334, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.6578549848942599, |
|
"eval_f1": 0.6578987306609153, |
|
"eval_loss": 0.7136017680168152, |
|
"eval_precision": 0.6579792469600714, |
|
"eval_recall": 0.6578549848942599, |
|
"eval_runtime": 10.8054, |
|
"eval_samples_per_second": 245.064, |
|
"eval_steps_per_second": 3.887, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9791666666666667e-07, |
|
"loss": 0.7162, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9775641025641026e-07, |
|
"loss": 0.7052, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.68202416918429, |
|
"eval_f1": 0.6820909311917643, |
|
"eval_loss": 0.6918750405311584, |
|
"eval_precision": 0.6825693212542151, |
|
"eval_recall": 0.68202416918429, |
|
"eval_runtime": 10.7979, |
|
"eval_samples_per_second": 245.233, |
|
"eval_steps_per_second": 3.89, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9759615384615385e-07, |
|
"loss": 0.7014, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9743589743589744e-07, |
|
"loss": 0.6917, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.7005287009063444, |
|
"eval_f1": 0.6970978787648872, |
|
"eval_loss": 0.6726363897323608, |
|
"eval_precision": 0.7060152626941981, |
|
"eval_recall": 0.7005287009063444, |
|
"eval_runtime": 10.7791, |
|
"eval_samples_per_second": 245.661, |
|
"eval_steps_per_second": 3.896, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9727564102564103e-07, |
|
"loss": 0.6746, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9711538461538462e-07, |
|
"loss": 0.669, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.722809667673716, |
|
"eval_f1": 0.7197029874640588, |
|
"eval_loss": 0.6518763303756714, |
|
"eval_precision": 0.7290416924889942, |
|
"eval_recall": 0.722809667673716, |
|
"eval_runtime": 10.7984, |
|
"eval_samples_per_second": 245.222, |
|
"eval_steps_per_second": 3.889, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.969551282051282e-07, |
|
"loss": 0.6552, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9679487179487178e-07, |
|
"loss": 0.6476, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.7379154078549849, |
|
"eval_f1": 0.7351061135755582, |
|
"eval_loss": 0.6282221674919128, |
|
"eval_precision": 0.74437720980925, |
|
"eval_recall": 0.7379154078549849, |
|
"eval_runtime": 10.7935, |
|
"eval_samples_per_second": 245.333, |
|
"eval_steps_per_second": 3.891, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9663461538461537e-07, |
|
"loss": 0.6325, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9647435897435896e-07, |
|
"loss": 0.6252, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.7492447129909365, |
|
"eval_f1": 0.7452052048147401, |
|
"eval_loss": 0.5985825061798096, |
|
"eval_precision": 0.7607901002801604, |
|
"eval_recall": 0.7492447129909365, |
|
"eval_runtime": 10.8114, |
|
"eval_samples_per_second": 244.926, |
|
"eval_steps_per_second": 3.885, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9631410256410255e-07, |
|
"loss": 0.6067, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9615384615384614e-07, |
|
"loss": 0.5906, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.7775679758308157, |
|
"eval_f1": 0.7747144726300009, |
|
"eval_loss": 0.5599903464317322, |
|
"eval_precision": 0.7875765823937204, |
|
"eval_recall": 0.7775679758308157, |
|
"eval_runtime": 10.7946, |
|
"eval_samples_per_second": 245.309, |
|
"eval_steps_per_second": 3.891, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9599358974358973e-07, |
|
"loss": 0.5622, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9583333333333332e-07, |
|
"loss": 0.5391, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.8111782477341389, |
|
"eval_f1": 0.8088711339987766, |
|
"eval_loss": 0.5117343664169312, |
|
"eval_precision": 0.8220985360822193, |
|
"eval_recall": 0.8111782477341389, |
|
"eval_runtime": 10.8029, |
|
"eval_samples_per_second": 245.12, |
|
"eval_steps_per_second": 3.888, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.956730769230769e-07, |
|
"loss": 0.5259, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.955128205128205e-07, |
|
"loss": 0.5089, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.8308157099697885, |
|
"eval_f1": 0.8288063720795289, |
|
"eval_loss": 0.46717309951782227, |
|
"eval_precision": 0.8422125041602981, |
|
"eval_recall": 0.8308157099697885, |
|
"eval_runtime": 10.7869, |
|
"eval_samples_per_second": 245.483, |
|
"eval_steps_per_second": 3.894, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.953525641025641e-07, |
|
"loss": 0.4812, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9519230769230768e-07, |
|
"loss": 0.4581, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.8466767371601208, |
|
"eval_f1": 0.8452935464575092, |
|
"eval_loss": 0.42611148953437805, |
|
"eval_precision": 0.8553191629170025, |
|
"eval_recall": 0.8466767371601208, |
|
"eval_runtime": 10.811, |
|
"eval_samples_per_second": 244.936, |
|
"eval_steps_per_second": 3.885, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9503205128205127e-07, |
|
"loss": 0.4265, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9487179487179486e-07, |
|
"loss": 0.4174, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.8591389728096677, |
|
"eval_f1": 0.8581333993450322, |
|
"eval_loss": 0.3975095748901367, |
|
"eval_precision": 0.865914594919593, |
|
"eval_recall": 0.8591389728096677, |
|
"eval_runtime": 15.8476, |
|
"eval_samples_per_second": 167.092, |
|
"eval_steps_per_second": 2.65, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9471153846153845e-07, |
|
"loss": 0.3979, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9455128205128204e-07, |
|
"loss": 0.3897, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.8557401812688822, |
|
"eval_f1": 0.854075463670531, |
|
"eval_loss": 0.3941803574562073, |
|
"eval_precision": 0.8677897892001838, |
|
"eval_recall": 0.8557401812688822, |
|
"eval_runtime": 10.7957, |
|
"eval_samples_per_second": 245.283, |
|
"eval_steps_per_second": 3.89, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9439102564102564e-07, |
|
"loss": 0.3619, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9423076923076923e-07, |
|
"loss": 0.3511, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.8576283987915407, |
|
"eval_f1": 0.8557767441533479, |
|
"eval_loss": 0.3930197060108185, |
|
"eval_precision": 0.8716354305639, |
|
"eval_recall": 0.8576283987915407, |
|
"eval_runtime": 10.8129, |
|
"eval_samples_per_second": 244.893, |
|
"eval_steps_per_second": 3.884, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9407051282051282e-07, |
|
"loss": 0.3537, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.939102564102564e-07, |
|
"loss": 0.3466, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.8606495468277946, |
|
"eval_f1": 0.8590762411480044, |
|
"eval_loss": 0.3839268088340759, |
|
"eval_precision": 0.8725861729431068, |
|
"eval_recall": 0.8606495468277946, |
|
"eval_runtime": 10.8113, |
|
"eval_samples_per_second": 244.93, |
|
"eval_steps_per_second": 3.885, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9375e-07, |
|
"loss": 0.3521, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.935897435897436e-07, |
|
"loss": 0.3389, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.8606495468277946, |
|
"eval_f1": 0.8588371845957175, |
|
"eval_loss": 0.39029455184936523, |
|
"eval_precision": 0.8748009772925901, |
|
"eval_recall": 0.8606495468277946, |
|
"eval_runtime": 10.802, |
|
"eval_samples_per_second": 245.14, |
|
"eval_steps_per_second": 3.888, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9342948717948718e-07, |
|
"loss": 0.3238, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9326923076923077e-07, |
|
"loss": 0.3075, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.8602719033232629, |
|
"eval_f1": 0.8583410955489438, |
|
"eval_loss": 0.3917655050754547, |
|
"eval_precision": 0.8754701190846773, |
|
"eval_recall": 0.8602719033232629, |
|
"eval_runtime": 10.8054, |
|
"eval_samples_per_second": 245.064, |
|
"eval_steps_per_second": 3.887, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9310897435897436e-07, |
|
"loss": 0.3102, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9294871794871795e-07, |
|
"loss": 0.3101, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.8666918429003021, |
|
"eval_f1": 0.8652087190619578, |
|
"eval_loss": 0.3828712999820709, |
|
"eval_precision": 0.878661956862829, |
|
"eval_recall": 0.8666918429003021, |
|
"eval_runtime": 15.99, |
|
"eval_samples_per_second": 165.604, |
|
"eval_steps_per_second": 2.627, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9278846153846154e-07, |
|
"loss": 0.2863, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9262820512820513e-07, |
|
"loss": 0.3009, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.8621601208459214, |
|
"eval_f1": 0.8602680217103188, |
|
"eval_loss": 0.4013417959213257, |
|
"eval_precision": 0.8773335989772154, |
|
"eval_recall": 0.8621601208459214, |
|
"eval_runtime": 10.8074, |
|
"eval_samples_per_second": 245.018, |
|
"eval_steps_per_second": 3.886, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9246794871794872e-07, |
|
"loss": 0.306, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9230769230769231e-07, |
|
"loss": 0.2866, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.8640483383685801, |
|
"eval_f1": 0.862268075795537, |
|
"eval_loss": 0.3997219502925873, |
|
"eval_precision": 0.8784797318905917, |
|
"eval_recall": 0.8640483383685801, |
|
"eval_runtime": 10.796, |
|
"eval_samples_per_second": 245.277, |
|
"eval_steps_per_second": 3.89, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.921474358974359e-07, |
|
"loss": 0.2747, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.919871794871795e-07, |
|
"loss": 0.2856, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.8587613293051359, |
|
"eval_f1": 0.8564860641576998, |
|
"eval_loss": 0.4260440468788147, |
|
"eval_precision": 0.8769254342874869, |
|
"eval_recall": 0.8587613293051359, |
|
"eval_runtime": 10.798, |
|
"eval_samples_per_second": 245.231, |
|
"eval_steps_per_second": 3.89, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9182692307692309e-07, |
|
"loss": 0.2907, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9166666666666668e-07, |
|
"loss": 0.2871, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.8632930513595166, |
|
"eval_f1": 0.8613280138625302, |
|
"eval_loss": 0.4157187342643738, |
|
"eval_precision": 0.8793867110640158, |
|
"eval_recall": 0.8632930513595166, |
|
"eval_runtime": 10.8079, |
|
"eval_samples_per_second": 245.007, |
|
"eval_steps_per_second": 3.886, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9150641025641027e-07, |
|
"loss": 0.2585, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9134615384615386e-07, |
|
"loss": 0.2894, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.8644259818731118, |
|
"eval_f1": 0.8624898933835367, |
|
"eval_loss": 0.4176577627658844, |
|
"eval_precision": 0.8804547245205405, |
|
"eval_recall": 0.8644259818731118, |
|
"eval_runtime": 10.8056, |
|
"eval_samples_per_second": 245.059, |
|
"eval_steps_per_second": 3.887, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9118589743589745e-07, |
|
"loss": 0.2551, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9102564102564104e-07, |
|
"loss": 0.2755, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.8659365558912386, |
|
"eval_f1": 0.8639206748641556, |
|
"eval_loss": 0.4247892498970032, |
|
"eval_precision": 0.8830682801454843, |
|
"eval_recall": 0.8659365558912386, |
|
"eval_runtime": 10.8115, |
|
"eval_samples_per_second": 244.924, |
|
"eval_steps_per_second": 3.885, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9086538461538463e-07, |
|
"loss": 0.2494, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9070512820512822e-07, |
|
"loss": 0.2508, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.872356495468278, |
|
"eval_f1": 0.8707077161534905, |
|
"eval_loss": 0.41548070311546326, |
|
"eval_precision": 0.8869475341602814, |
|
"eval_recall": 0.872356495468278, |
|
"eval_runtime": 10.7909, |
|
"eval_samples_per_second": 245.393, |
|
"eval_steps_per_second": 3.892, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 62500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"total_flos": 2.5209554336064e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|