Simba-SLID-49 / trainer_state.json
elmadany's picture
Initial model upload
4701d9d verified
{
"best_metric": 0.4363306793570824,
"best_model_checkpoint": "./outputs_slid/ajesujoba/AfriHuBERT/checkpoint-1830",
"epoch": 29.99591836734694,
"eval_steps": 500,
"global_step": 5490,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9959183673469387,
"grad_norm": 0.7721740007400513,
"learning_rate": 1.6666666666666667e-05,
"loss": 3.7874,
"step": 183
},
{
"epoch": 0.9959183673469387,
"eval_accuracy": 0.04349865165904561,
"eval_f1": 0.004599781789327515,
"eval_loss": 3.9047515392303467,
"eval_runtime": 28.6696,
"eval_samples_per_second": 297.493,
"eval_steps_per_second": 0.593,
"step": 183
},
{
"epoch": 1.9959183673469387,
"grad_norm": 0.8705180883407593,
"learning_rate": 3.3333333333333335e-05,
"loss": 3.0785,
"step": 366
},
{
"epoch": 1.9959183673469387,
"eval_accuracy": 0.16649079610739828,
"eval_f1": 0.07176290896776823,
"eval_loss": 3.3783769607543945,
"eval_runtime": 17.2721,
"eval_samples_per_second": 493.804,
"eval_steps_per_second": 0.984,
"step": 366
},
{
"epoch": 2.9959183673469387,
"grad_norm": 1.0882235765457153,
"learning_rate": 5e-05,
"loss": 1.9687,
"step": 549
},
{
"epoch": 2.9959183673469387,
"eval_accuracy": 0.41739946066361827,
"eval_f1": 0.23146127598121502,
"eval_loss": 2.4746670722961426,
"eval_runtime": 18.0262,
"eval_samples_per_second": 473.145,
"eval_steps_per_second": 0.943,
"step": 549
},
{
"epoch": 3.9959183673469387,
"grad_norm": 0.830756425857544,
"learning_rate": 4.983095894354858e-05,
"loss": 1.0019,
"step": 732
},
{
"epoch": 3.9959183673469387,
"eval_accuracy": 0.5312463360300153,
"eval_f1": 0.33343763170872565,
"eval_loss": 2.056602954864502,
"eval_runtime": 17.7176,
"eval_samples_per_second": 481.386,
"eval_steps_per_second": 0.959,
"step": 732
},
{
"epoch": 4.995918367346938,
"grad_norm": 1.341150164604187,
"learning_rate": 4.9326121764495596e-05,
"loss": 0.4955,
"step": 915
},
{
"epoch": 4.995918367346938,
"eval_accuracy": 0.5872904209168719,
"eval_f1": 0.3966908687854425,
"eval_loss": 2.070507526397705,
"eval_runtime": 17.7204,
"eval_samples_per_second": 481.309,
"eval_steps_per_second": 0.959,
"step": 915
},
{
"epoch": 5.995918367346938,
"grad_norm": 1.4914641380310059,
"learning_rate": 4.849231551964771e-05,
"loss": 0.3149,
"step": 1098
},
{
"epoch": 5.995918367346938,
"eval_accuracy": 0.608277640989565,
"eval_f1": 0.41221796485256534,
"eval_loss": 2.174699544906616,
"eval_runtime": 18.7633,
"eval_samples_per_second": 454.558,
"eval_steps_per_second": 0.906,
"step": 1098
},
{
"epoch": 6.995918367346938,
"grad_norm": 1.016514539718628,
"learning_rate": 4.734081600808531e-05,
"loss": 0.2324,
"step": 1281
},
{
"epoch": 6.995918367346938,
"eval_accuracy": 0.6051119709227342,
"eval_f1": 0.42029644401424293,
"eval_loss": 2.536925792694092,
"eval_runtime": 19.0396,
"eval_samples_per_second": 447.961,
"eval_steps_per_second": 0.893,
"step": 1281
},
{
"epoch": 7.995918367346938,
"grad_norm": 0.6603855490684509,
"learning_rate": 4.588719528532342e-05,
"loss": 0.1825,
"step": 1464
},
{
"epoch": 7.995918367346938,
"eval_accuracy": 0.5930355258529723,
"eval_f1": 0.37922494807809526,
"eval_loss": 2.6477608680725098,
"eval_runtime": 18.8796,
"eval_samples_per_second": 451.757,
"eval_steps_per_second": 0.9,
"step": 1464
},
{
"epoch": 8.995918367346938,
"grad_norm": 0.9515678286552429,
"learning_rate": 4.415111107797445e-05,
"loss": 0.1581,
"step": 1647
},
{
"epoch": 8.995918367346938,
"eval_accuracy": 0.5848282330871145,
"eval_f1": 0.3902253760074279,
"eval_loss": 2.7652101516723633,
"eval_runtime": 28.9433,
"eval_samples_per_second": 294.68,
"eval_steps_per_second": 0.587,
"step": 1647
},
{
"epoch": 9.995918367346938,
"grad_norm": 0.5628945827484131,
"learning_rate": 4.215604094671835e-05,
"loss": 0.1386,
"step": 1830
},
{
"epoch": 9.995918367346938,
"eval_accuracy": 0.6253957087583538,
"eval_f1": 0.4363306793570824,
"eval_loss": 2.5493264198303223,
"eval_runtime": 17.9843,
"eval_samples_per_second": 474.247,
"eval_steps_per_second": 0.945,
"step": 1830
},
{
"epoch": 10.995918367346938,
"grad_norm": 0.5759875178337097,
"learning_rate": 3.9928964792569655e-05,
"loss": 0.13,
"step": 2013
},
{
"epoch": 10.995918367346938,
"eval_accuracy": 0.6325477781686012,
"eval_f1": 0.42658322719917263,
"eval_loss": 2.668961763381958,
"eval_runtime": 17.9422,
"eval_samples_per_second": 475.359,
"eval_steps_per_second": 0.947,
"step": 2013
},
{
"epoch": 11.995918367346938,
"grad_norm": 0.7909059524536133,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.1134,
"step": 2196
},
{
"epoch": 11.995918367346938,
"eval_accuracy": 0.5902215969046781,
"eval_f1": 0.40717895597633597,
"eval_loss": 2.847268581390381,
"eval_runtime": 18.1922,
"eval_samples_per_second": 468.828,
"eval_steps_per_second": 0.934,
"step": 2196
},
{
"epoch": 12.995918367346938,
"grad_norm": 0.6743366718292236,
"learning_rate": 3.490199415097892e-05,
"loss": 0.1078,
"step": 2379
},
{
"epoch": 12.995918367346938,
"eval_accuracy": 0.6048774768437097,
"eval_f1": 0.40486374255791757,
"eval_loss": 2.909079074859619,
"eval_runtime": 17.3197,
"eval_samples_per_second": 492.446,
"eval_steps_per_second": 0.982,
"step": 2379
},
{
"epoch": 13.995918367346938,
"grad_norm": 0.6435021758079529,
"learning_rate": 3.217008081777726e-05,
"loss": 0.0929,
"step": 2562
},
{
"epoch": 13.995918367346938,
"eval_accuracy": 0.6124985344120061,
"eval_f1": 0.402051577315403,
"eval_loss": 2.901214599609375,
"eval_runtime": 18.278,
"eval_samples_per_second": 466.625,
"eval_steps_per_second": 0.93,
"step": 2562
},
{
"epoch": 14.995918367346938,
"grad_norm": 0.7225833535194397,
"learning_rate": 2.9341204441673266e-05,
"loss": 0.0879,
"step": 2745
},
{
"epoch": 14.995918367346938,
"eval_accuracy": 0.5815453159807715,
"eval_f1": 0.3787146481538575,
"eval_loss": 2.927959442138672,
"eval_runtime": 19.3124,
"eval_samples_per_second": 441.634,
"eval_steps_per_second": 0.88,
"step": 2745
},
{
"epoch": 15.995918367346938,
"grad_norm": 0.519130527973175,
"learning_rate": 2.6453620722761896e-05,
"loss": 0.0875,
"step": 2928
},
{
"epoch": 15.995918367346938,
"eval_accuracy": 0.6116778051354204,
"eval_f1": 0.42421911178450894,
"eval_loss": 2.8714120388031006,
"eval_runtime": 18.6944,
"eval_samples_per_second": 456.233,
"eval_steps_per_second": 0.909,
"step": 2928
},
{
"epoch": 16.99591836734694,
"grad_norm": 0.5847667455673218,
"learning_rate": 2.3546379277238107e-05,
"loss": 0.083,
"step": 3111
},
{
"epoch": 16.99591836734694,
"eval_accuracy": 0.604994723883222,
"eval_f1": 0.40283444897722465,
"eval_loss": 2.9251325130462646,
"eval_runtime": 19.0241,
"eval_samples_per_second": 448.325,
"eval_steps_per_second": 0.894,
"step": 3111
},
{
"epoch": 17.99591836734694,
"grad_norm": 0.5335302948951721,
"learning_rate": 2.0658795558326743e-05,
"loss": 0.0743,
"step": 3294
},
{
"epoch": 17.99591836734694,
"eval_accuracy": 0.6085121350685895,
"eval_f1": 0.3982368535619314,
"eval_loss": 2.907853364944458,
"eval_runtime": 18.6799,
"eval_samples_per_second": 456.587,
"eval_steps_per_second": 0.91,
"step": 3294
},
{
"epoch": 18.99591836734694,
"grad_norm": 0.6082349419593811,
"learning_rate": 1.7829919182222752e-05,
"loss": 0.0743,
"step": 3477
},
{
"epoch": 18.99591836734694,
"eval_accuracy": 0.6140227459256654,
"eval_f1": 0.40722488778058297,
"eval_loss": 2.9568777084350586,
"eval_runtime": 18.2131,
"eval_samples_per_second": 468.288,
"eval_steps_per_second": 0.933,
"step": 3477
},
{
"epoch": 19.99591836734694,
"grad_norm": 0.5372836589813232,
"learning_rate": 1.5112603381728762e-05,
"loss": 0.0745,
"step": 3660
},
{
"epoch": 19.99591836734694,
"eval_accuracy": 0.6022980419744401,
"eval_f1": 0.3888247133789473,
"eval_loss": 3.133009910583496,
"eval_runtime": 19.5015,
"eval_samples_per_second": 437.351,
"eval_steps_per_second": 0.872,
"step": 3660
},
{
"epoch": 20.99591836734694,
"grad_norm": 0.4080846905708313,
"learning_rate": 1.2513768458995337e-05,
"loss": 0.0641,
"step": 3843
},
{
"epoch": 20.99591836734694,
"eval_accuracy": 0.6041739946066362,
"eval_f1": 0.4024604989707059,
"eval_loss": 3.086355447769165,
"eval_runtime": 18.9488,
"eval_samples_per_second": 450.109,
"eval_steps_per_second": 0.897,
"step": 3843
},
{
"epoch": 21.99591836734694,
"grad_norm": 0.6301392316818237,
"learning_rate": 1.0083788397924998e-05,
"loss": 0.0611,
"step": 4026
},
{
"epoch": 21.99591836734694,
"eval_accuracy": 0.611560558095908,
"eval_f1": 0.4250797125355288,
"eval_loss": 3.1089813709259033,
"eval_runtime": 19.3666,
"eval_samples_per_second": 440.398,
"eval_steps_per_second": 0.878,
"step": 4026
},
{
"epoch": 22.99591836734694,
"grad_norm": 0.7403397560119629,
"learning_rate": 7.855524510252082e-06,
"loss": 0.0618,
"step": 4209
},
{
"epoch": 22.99591836734694,
"eval_accuracy": 0.6095673584241997,
"eval_f1": 0.38478101379896623,
"eval_loss": 3.165566921234131,
"eval_runtime": 18.268,
"eval_samples_per_second": 466.882,
"eval_steps_per_second": 0.931,
"step": 4209
},
{
"epoch": 23.99591836734694,
"grad_norm": 0.6018996238708496,
"learning_rate": 5.8591102425065766e-06,
"loss": 0.0595,
"step": 4392
},
{
"epoch": 23.99591836734694,
"eval_accuracy": 0.6026497830929769,
"eval_f1": 0.4033953887201948,
"eval_loss": 3.182464122772217,
"eval_runtime": 18.8509,
"eval_samples_per_second": 452.446,
"eval_steps_per_second": 0.902,
"step": 4392
},
{
"epoch": 24.99591836734694,
"grad_norm": 0.7152003049850464,
"learning_rate": 4.1215436728432114e-06,
"loss": 0.0549,
"step": 4575
},
{
"epoch": 24.99591836734694,
"eval_accuracy": 0.6062844413178567,
"eval_f1": 0.3998774411315016,
"eval_loss": 3.2211174964904785,
"eval_runtime": 18.4161,
"eval_samples_per_second": 463.128,
"eval_steps_per_second": 0.923,
"step": 4575
},
{
"epoch": 25.99591836734694,
"grad_norm": 0.655457615852356,
"learning_rate": 2.6663224083492645e-06,
"loss": 0.0578,
"step": 4758
},
{
"epoch": 25.99591836734694,
"eval_accuracy": 0.6093328643451753,
"eval_f1": 0.40241682477511076,
"eval_loss": 3.154259204864502,
"eval_runtime": 19.0328,
"eval_samples_per_second": 448.122,
"eval_steps_per_second": 0.893,
"step": 4758
},
{
"epoch": 26.99591836734694,
"grad_norm": 0.8799217939376831,
"learning_rate": 1.5131258202183586e-06,
"loss": 0.0531,
"step": 4941
},
{
"epoch": 26.99591836734694,
"eval_accuracy": 0.611560558095908,
"eval_f1": 0.4136571965633068,
"eval_loss": 3.1584064960479736,
"eval_runtime": 19.4229,
"eval_samples_per_second": 439.121,
"eval_steps_per_second": 0.875,
"step": 4941
},
{
"epoch": 27.99591836734694,
"grad_norm": 0.5971439480781555,
"learning_rate": 6.775489140148194e-07,
"loss": 0.0556,
"step": 5124
},
{
"epoch": 27.99591836734694,
"eval_accuracy": 0.6054637120412709,
"eval_f1": 0.4107652565512037,
"eval_loss": 3.177584171295166,
"eval_runtime": 18.7393,
"eval_samples_per_second": 455.14,
"eval_steps_per_second": 0.907,
"step": 5124
},
{
"epoch": 28.99591836734694,
"grad_norm": 0.5378488898277283,
"learning_rate": 1.7089143397631958e-07,
"loss": 0.0592,
"step": 5307
},
{
"epoch": 28.99591836734694,
"eval_accuracy": 0.604994723883222,
"eval_f1": 0.41074234435939105,
"eval_loss": 3.1705150604248047,
"eval_runtime": 19.1621,
"eval_samples_per_second": 445.096,
"eval_steps_per_second": 0.887,
"step": 5307
},
{
"epoch": 29.99591836734694,
"grad_norm": 0.7799643278121948,
"learning_rate": 5.053357646223056e-12,
"loss": 0.0511,
"step": 5490
},
{
"epoch": 29.99591836734694,
"eval_accuracy": 0.6051119709227342,
"eval_f1": 0.41072097568738997,
"eval_loss": 3.1688835620880127,
"eval_runtime": 18.8369,
"eval_samples_per_second": 452.78,
"eval_steps_per_second": 0.902,
"step": 5490
},
{
"epoch": 29.99591836734694,
"step": 5490,
"total_flos": 5.117922821239409e+20,
"train_loss": 0.060013725892225034,
"train_runtime": 5236.9953,
"train_samples_per_second": 2153.277,
"train_steps_per_second": 1.048
}
],
"logging_steps": 500,
"max_steps": 5490,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.117922821239409e+20,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}