bert_tiny_lda_5_v1 / trainer_state.json
gokulsrinivasagan's picture
End of training
6ccd9b5 verified
{
"best_metric": 7.317912578582764,
"best_model_checkpoint": "bert_tiny_lda_5_v1/checkpoint-30000",
"epoch": 25.0,
"eval_steps": 10000,
"global_step": 35725,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.34989503149055284,
"grad_norm": 2.1514995098114014,
"learning_rate": 5e-06,
"loss": 11.3174,
"step": 500
},
{
"epoch": 0.6997900629811057,
"grad_norm": 0.9805640578269958,
"learning_rate": 1e-05,
"loss": 9.6408,
"step": 1000
},
{
"epoch": 1.0496850944716585,
"grad_norm": 0.583168625831604,
"learning_rate": 1.5e-05,
"loss": 8.6938,
"step": 1500
},
{
"epoch": 1.3995801259622114,
"grad_norm": 0.581084132194519,
"learning_rate": 2e-05,
"loss": 8.503,
"step": 2000
},
{
"epoch": 1.749475157452764,
"grad_norm": 0.6091183423995972,
"learning_rate": 2.5e-05,
"loss": 8.3913,
"step": 2500
},
{
"epoch": 2.099370188943317,
"grad_norm": 0.9693360924720764,
"learning_rate": 3e-05,
"loss": 8.2919,
"step": 3000
},
{
"epoch": 2.44926522043387,
"grad_norm": 0.822096049785614,
"learning_rate": 3.5e-05,
"loss": 8.2003,
"step": 3500
},
{
"epoch": 2.7991602519244227,
"grad_norm": 0.9631165266036987,
"learning_rate": 4e-05,
"loss": 8.1238,
"step": 4000
},
{
"epoch": 3.1490552834149756,
"grad_norm": 0.9502329230308533,
"learning_rate": 4.5e-05,
"loss": 8.0682,
"step": 4500
},
{
"epoch": 3.498950314905528,
"grad_norm": 0.8137854933738708,
"learning_rate": 5e-05,
"loss": 8.0245,
"step": 5000
},
{
"epoch": 3.8488453463960814,
"grad_norm": 0.6269208788871765,
"learning_rate": 5.500000000000001e-05,
"loss": 7.9804,
"step": 5500
},
{
"epoch": 4.198740377886634,
"grad_norm": 0.7888797521591187,
"learning_rate": 6e-05,
"loss": 7.9445,
"step": 6000
},
{
"epoch": 4.548635409377187,
"grad_norm": 0.6673307418823242,
"learning_rate": 6.500000000000001e-05,
"loss": 7.909,
"step": 6500
},
{
"epoch": 4.89853044086774,
"grad_norm": 0.6339680552482605,
"learning_rate": 7e-05,
"loss": 7.8781,
"step": 7000
},
{
"epoch": 5.248425472358292,
"grad_norm": 0.7484241724014282,
"learning_rate": 7.500000000000001e-05,
"loss": 7.8479,
"step": 7500
},
{
"epoch": 5.5983205038488455,
"grad_norm": 0.705053448677063,
"learning_rate": 8e-05,
"loss": 7.8187,
"step": 8000
},
{
"epoch": 5.948215535339398,
"grad_norm": 0.7108335494995117,
"learning_rate": 8.5e-05,
"loss": 7.798,
"step": 8500
},
{
"epoch": 6.298110566829951,
"grad_norm": 0.9193771481513977,
"learning_rate": 9e-05,
"loss": 7.7733,
"step": 9000
},
{
"epoch": 6.648005598320504,
"grad_norm": 0.7022958993911743,
"learning_rate": 9.5e-05,
"loss": 7.7537,
"step": 9500
},
{
"epoch": 6.997900629811056,
"grad_norm": 0.5968444347381592,
"learning_rate": 0.0001,
"loss": 7.735,
"step": 10000
},
{
"epoch": 6.997900629811056,
"eval_accuracy": 0.1533309145711814,
"eval_loss": 7.668768882751465,
"eval_runtime": 0.9954,
"eval_samples_per_second": 481.211,
"eval_steps_per_second": 3.014,
"step": 10000
},
{
"epoch": 7.3477956613016095,
"grad_norm": 0.5823611617088318,
"learning_rate": 9.805636540330418e-05,
"loss": 7.7113,
"step": 10500
},
{
"epoch": 7.697690692792162,
"grad_norm": 0.838610827922821,
"learning_rate": 9.611273080660836e-05,
"loss": 7.6992,
"step": 11000
},
{
"epoch": 8.047585724282715,
"grad_norm": 0.6564937829971313,
"learning_rate": 9.416909620991254e-05,
"loss": 7.6777,
"step": 11500
},
{
"epoch": 8.397480755773268,
"grad_norm": 0.8518803119659424,
"learning_rate": 9.222546161321672e-05,
"loss": 7.6481,
"step": 12000
},
{
"epoch": 8.74737578726382,
"grad_norm": 0.8578371405601501,
"learning_rate": 9.02818270165209e-05,
"loss": 7.6016,
"step": 12500
},
{
"epoch": 9.097270818754374,
"grad_norm": 0.8438256978988647,
"learning_rate": 8.833819241982508e-05,
"loss": 7.5425,
"step": 13000
},
{
"epoch": 9.447165850244927,
"grad_norm": 1.0309187173843384,
"learning_rate": 8.639455782312925e-05,
"loss": 7.5037,
"step": 13500
},
{
"epoch": 9.79706088173548,
"grad_norm": 0.640387773513794,
"learning_rate": 8.445092322643343e-05,
"loss": 7.4909,
"step": 14000
},
{
"epoch": 10.146955913226032,
"grad_norm": 0.6895781755447388,
"learning_rate": 8.250728862973761e-05,
"loss": 7.4791,
"step": 14500
},
{
"epoch": 10.496850944716584,
"grad_norm": 0.6688145995140076,
"learning_rate": 8.056365403304179e-05,
"loss": 7.4638,
"step": 15000
},
{
"epoch": 10.846745976207139,
"grad_norm": 0.6284395456314087,
"learning_rate": 7.862001943634597e-05,
"loss": 7.4597,
"step": 15500
},
{
"epoch": 11.196641007697691,
"grad_norm": 0.685321569442749,
"learning_rate": 7.667638483965015e-05,
"loss": 7.4469,
"step": 16000
},
{
"epoch": 11.546536039188243,
"grad_norm": 0.6994627714157104,
"learning_rate": 7.473275024295433e-05,
"loss": 7.4372,
"step": 16500
},
{
"epoch": 11.896431070678796,
"grad_norm": 0.7122758030891418,
"learning_rate": 7.27891156462585e-05,
"loss": 7.4293,
"step": 17000
},
{
"epoch": 12.246326102169348,
"grad_norm": 0.6678953170776367,
"learning_rate": 7.08454810495627e-05,
"loss": 7.4228,
"step": 17500
},
{
"epoch": 12.596221133659903,
"grad_norm": 0.6669552326202393,
"learning_rate": 6.890184645286687e-05,
"loss": 7.4132,
"step": 18000
},
{
"epoch": 12.946116165150455,
"grad_norm": 0.8246549367904663,
"learning_rate": 6.695821185617104e-05,
"loss": 7.4135,
"step": 18500
},
{
"epoch": 13.296011196641008,
"grad_norm": 0.6137415170669556,
"learning_rate": 6.501457725947522e-05,
"loss": 7.3995,
"step": 19000
},
{
"epoch": 13.64590622813156,
"grad_norm": 0.6450156569480896,
"learning_rate": 6.30709426627794e-05,
"loss": 7.3942,
"step": 19500
},
{
"epoch": 13.995801259622114,
"grad_norm": 0.712520956993103,
"learning_rate": 6.112730806608357e-05,
"loss": 7.3886,
"step": 20000
},
{
"epoch": 13.995801259622114,
"eval_accuracy": 0.15518117168980697,
"eval_loss": 7.352290630340576,
"eval_runtime": 0.9817,
"eval_samples_per_second": 487.916,
"eval_steps_per_second": 3.056,
"step": 20000
},
{
"epoch": 14.345696291112667,
"grad_norm": 0.7041005492210388,
"learning_rate": 5.918367346938776e-05,
"loss": 7.3793,
"step": 20500
},
{
"epoch": 14.695591322603219,
"grad_norm": 0.7062017917633057,
"learning_rate": 5.724003887269194e-05,
"loss": 7.3775,
"step": 21000
},
{
"epoch": 15.045486354093772,
"grad_norm": 0.6913180351257324,
"learning_rate": 5.529640427599612e-05,
"loss": 7.3728,
"step": 21500
},
{
"epoch": 15.395381385584324,
"grad_norm": 0.6736568212509155,
"learning_rate": 5.3352769679300295e-05,
"loss": 7.3656,
"step": 22000
},
{
"epoch": 15.745276417074878,
"grad_norm": 0.788135826587677,
"learning_rate": 5.1409135082604474e-05,
"loss": 7.3602,
"step": 22500
},
{
"epoch": 16.09517144856543,
"grad_norm": 0.6803423762321472,
"learning_rate": 4.946550048590865e-05,
"loss": 7.3586,
"step": 23000
},
{
"epoch": 16.445066480055985,
"grad_norm": 0.7166891694068909,
"learning_rate": 4.752186588921283e-05,
"loss": 7.351,
"step": 23500
},
{
"epoch": 16.794961511546536,
"grad_norm": 0.629996120929718,
"learning_rate": 4.557823129251701e-05,
"loss": 7.3483,
"step": 24000
},
{
"epoch": 17.14485654303709,
"grad_norm": 0.7230992913246155,
"learning_rate": 4.363459669582119e-05,
"loss": 7.3422,
"step": 24500
},
{
"epoch": 17.49475157452764,
"grad_norm": 0.669651985168457,
"learning_rate": 4.1690962099125366e-05,
"loss": 7.3404,
"step": 25000
},
{
"epoch": 17.844646606018195,
"grad_norm": 0.6792581677436829,
"learning_rate": 3.9747327502429545e-05,
"loss": 7.3377,
"step": 25500
},
{
"epoch": 18.19454163750875,
"grad_norm": 0.6232681274414062,
"learning_rate": 3.780369290573372e-05,
"loss": 7.3318,
"step": 26000
},
{
"epoch": 18.5444366689993,
"grad_norm": 0.7704149484634399,
"learning_rate": 3.58600583090379e-05,
"loss": 7.3276,
"step": 26500
},
{
"epoch": 18.894331700489854,
"grad_norm": 0.6254093050956726,
"learning_rate": 3.391642371234208e-05,
"loss": 7.3284,
"step": 27000
},
{
"epoch": 19.244226731980405,
"grad_norm": 0.683228075504303,
"learning_rate": 3.1972789115646265e-05,
"loss": 7.3249,
"step": 27500
},
{
"epoch": 19.59412176347096,
"grad_norm": 0.7169561982154846,
"learning_rate": 3.0029154518950437e-05,
"loss": 7.3192,
"step": 28000
},
{
"epoch": 19.944016794961513,
"grad_norm": 0.7218673825263977,
"learning_rate": 2.8085519922254615e-05,
"loss": 7.3195,
"step": 28500
},
{
"epoch": 20.293911826452064,
"grad_norm": 0.629692554473877,
"learning_rate": 2.6141885325558797e-05,
"loss": 7.3133,
"step": 29000
},
{
"epoch": 20.643806857942618,
"grad_norm": 0.6370089054107666,
"learning_rate": 2.4198250728862976e-05,
"loss": 7.312,
"step": 29500
},
{
"epoch": 20.99370188943317,
"grad_norm": 0.686647891998291,
"learning_rate": 2.225461613216715e-05,
"loss": 7.31,
"step": 30000
},
{
"epoch": 20.99370188943317,
"eval_accuracy": 0.15583133549569878,
"eval_loss": 7.317912578582764,
"eval_runtime": 0.9857,
"eval_samples_per_second": 485.95,
"eval_steps_per_second": 3.044,
"step": 30000
},
{
"epoch": 21.343596920923723,
"grad_norm": 0.713712215423584,
"learning_rate": 2.0310981535471333e-05,
"loss": 7.3014,
"step": 30500
},
{
"epoch": 21.693491952414277,
"grad_norm": 0.6833124756813049,
"learning_rate": 1.836734693877551e-05,
"loss": 7.2863,
"step": 31000
},
{
"epoch": 22.043386983904828,
"grad_norm": 0.8253185153007507,
"learning_rate": 1.642371234207969e-05,
"loss": 7.264,
"step": 31500
},
{
"epoch": 22.393282015395382,
"grad_norm": 0.8029682636260986,
"learning_rate": 1.4480077745383868e-05,
"loss": 7.2417,
"step": 32000
},
{
"epoch": 22.743177046885933,
"grad_norm": 0.7757593393325806,
"learning_rate": 1.2536443148688048e-05,
"loss": 7.2277,
"step": 32500
},
{
"epoch": 23.093072078376487,
"grad_norm": 0.8712273240089417,
"learning_rate": 1.0592808551992225e-05,
"loss": 7.2148,
"step": 33000
},
{
"epoch": 23.44296710986704,
"grad_norm": 0.7989441752433777,
"learning_rate": 8.649173955296405e-06,
"loss": 7.2004,
"step": 33500
},
{
"epoch": 23.792862141357592,
"grad_norm": 0.870869517326355,
"learning_rate": 6.705539358600584e-06,
"loss": 7.1886,
"step": 34000
},
{
"epoch": 24.142757172848146,
"grad_norm": 0.7762761116027832,
"learning_rate": 4.7619047619047615e-06,
"loss": 7.1794,
"step": 34500
},
{
"epoch": 24.492652204338697,
"grad_norm": 0.7940042614936829,
"learning_rate": 2.818270165208941e-06,
"loss": 7.1725,
"step": 35000
},
{
"epoch": 24.84254723582925,
"grad_norm": 0.7887886166572571,
"learning_rate": 8.746355685131196e-07,
"loss": 7.1711,
"step": 35500
},
{
"epoch": 25.0,
"step": 35725,
"total_flos": 3.049796905526016e+17,
"train_loss": 7.630663348358774,
"train_runtime": 20886.7559,
"train_samples_per_second": 273.665,
"train_steps_per_second": 1.71
}
],
"logging_steps": 500,
"max_steps": 35725,
"num_input_tokens_seen": 0,
"num_train_epochs": 25,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.049796905526016e+17,
"train_batch_size": 160,
"trial_name": null,
"trial_params": null
}