toxic-hubert / trainer_state.json
RabidUmarell's picture
moved
65c542c
{
"best_metric": 0.2997977137565613,
"best_model_checkpoint": "models/toxic-bert-hubert/checkpoint-150",
"epoch": 0.23088023088023088,
"eval_steps": 10,
"global_step": 160,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.9600000000000002e-05,
"loss": 0.206,
"step": 10
},
{
"epoch": 0.01,
"eval_f1": 0.7730744144170906,
"eval_loss": 0.6420192122459412,
"eval_runtime": 22.3453,
"eval_samples_per_second": 247.837,
"eval_steps_per_second": 7.787,
"step": 10
},
{
"epoch": 0.03,
"learning_rate": 1.9200000000000003e-05,
"loss": 0.5054,
"step": 20
},
{
"epoch": 0.03,
"eval_f1": 0.8048163482788003,
"eval_loss": 0.5228331685066223,
"eval_runtime": 22.8204,
"eval_samples_per_second": 242.677,
"eval_steps_per_second": 7.625,
"step": 20
},
{
"epoch": 0.04,
"learning_rate": 1.88e-05,
"loss": 0.5716,
"step": 30
},
{
"epoch": 0.04,
"eval_f1": 0.8136074625725318,
"eval_loss": 0.4328407943248749,
"eval_runtime": 23.1876,
"eval_samples_per_second": 238.834,
"eval_steps_per_second": 7.504,
"step": 30
},
{
"epoch": 0.06,
"learning_rate": 1.8400000000000003e-05,
"loss": 0.4257,
"step": 40
},
{
"epoch": 0.06,
"eval_f1": 0.8259426500452947,
"eval_loss": 0.4285435080528259,
"eval_runtime": 23.2092,
"eval_samples_per_second": 238.613,
"eval_steps_per_second": 7.497,
"step": 40
},
{
"epoch": 0.07,
"learning_rate": 1.8e-05,
"loss": 0.4937,
"step": 50
},
{
"epoch": 0.07,
"eval_f1": 0.8317824244449398,
"eval_loss": 0.39296483993530273,
"eval_runtime": 23.2217,
"eval_samples_per_second": 238.484,
"eval_steps_per_second": 7.493,
"step": 50
},
{
"epoch": 0.09,
"learning_rate": 1.76e-05,
"loss": 0.4411,
"step": 60
},
{
"epoch": 0.09,
"eval_f1": 0.8356587695730946,
"eval_loss": 0.3781413733959198,
"eval_runtime": 23.4641,
"eval_samples_per_second": 236.021,
"eval_steps_per_second": 7.416,
"step": 60
},
{
"epoch": 0.1,
"learning_rate": 1.72e-05,
"loss": 0.4197,
"step": 70
},
{
"epoch": 0.1,
"eval_f1": 0.8497564694972085,
"eval_loss": 0.3615292012691498,
"eval_runtime": 23.1269,
"eval_samples_per_second": 239.462,
"eval_steps_per_second": 7.524,
"step": 70
},
{
"epoch": 0.12,
"learning_rate": 1.6800000000000002e-05,
"loss": 0.3702,
"step": 80
},
{
"epoch": 0.12,
"eval_f1": 0.8725646676802798,
"eval_loss": 0.3257770836353302,
"eval_runtime": 22.9073,
"eval_samples_per_second": 241.757,
"eval_steps_per_second": 7.596,
"step": 80
},
{
"epoch": 0.13,
"learning_rate": 1.64e-05,
"loss": 0.3357,
"step": 90
},
{
"epoch": 0.13,
"eval_f1": 0.8735307760659007,
"eval_loss": 0.31817400455474854,
"eval_runtime": 23.2006,
"eval_samples_per_second": 238.701,
"eval_steps_per_second": 7.5,
"step": 90
},
{
"epoch": 0.14,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.4082,
"step": 100
},
{
"epoch": 0.14,
"eval_f1": 0.8404628911456754,
"eval_loss": 0.35047250986099243,
"eval_runtime": 22.7192,
"eval_samples_per_second": 243.758,
"eval_steps_per_second": 7.659,
"step": 100
},
{
"epoch": 0.16,
"learning_rate": 1.5600000000000003e-05,
"loss": 0.4107,
"step": 110
},
{
"epoch": 0.16,
"eval_f1": 0.8586341129440507,
"eval_loss": 0.3375680148601532,
"eval_runtime": 23.371,
"eval_samples_per_second": 236.96,
"eval_steps_per_second": 7.445,
"step": 110
},
{
"epoch": 0.17,
"learning_rate": 1.5200000000000002e-05,
"loss": 0.2864,
"step": 120
},
{
"epoch": 0.17,
"eval_f1": 0.8561894871027176,
"eval_loss": 0.32658323645591736,
"eval_runtime": 23.2113,
"eval_samples_per_second": 238.591,
"eval_steps_per_second": 7.496,
"step": 120
},
{
"epoch": 0.19,
"learning_rate": 1.48e-05,
"loss": 0.3483,
"step": 130
},
{
"epoch": 0.19,
"eval_f1": 0.8645368802572093,
"eval_loss": 0.3245397210121155,
"eval_runtime": 23.0332,
"eval_samples_per_second": 240.436,
"eval_steps_per_second": 7.554,
"step": 130
},
{
"epoch": 0.2,
"learning_rate": 1.4400000000000001e-05,
"loss": 0.317,
"step": 140
},
{
"epoch": 0.2,
"eval_f1": 0.8735823993344022,
"eval_loss": 0.32588475942611694,
"eval_runtime": 23.3446,
"eval_samples_per_second": 237.228,
"eval_steps_per_second": 7.454,
"step": 140
},
{
"epoch": 0.22,
"learning_rate": 1.4e-05,
"loss": 0.3114,
"step": 150
},
{
"epoch": 0.22,
"eval_f1": 0.8704014314485559,
"eval_loss": 0.2997977137565613,
"eval_runtime": 22.6714,
"eval_samples_per_second": 244.273,
"eval_steps_per_second": 7.675,
"step": 150
},
{
"epoch": 0.23,
"learning_rate": 1.3600000000000002e-05,
"loss": 0.3323,
"step": 160
},
{
"epoch": 0.23,
"eval_f1": 0.8661724356256255,
"eval_loss": 0.3152276575565338,
"eval_runtime": 23.046,
"eval_samples_per_second": 240.302,
"eval_steps_per_second": 7.55,
"step": 160
}
],
"logging_steps": 10,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"total_flos": 359241714253440.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}