|
{ |
|
"best_metric": 0.2997977137565613, |
|
"best_model_checkpoint": "models/toxic-bert-hubert/checkpoint-150", |
|
"epoch": 0.23088023088023088, |
|
"eval_steps": 10, |
|
"global_step": 160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 0.206, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_f1": 0.7730744144170906, |
|
"eval_loss": 0.6420192122459412, |
|
"eval_runtime": 22.3453, |
|
"eval_samples_per_second": 247.837, |
|
"eval_steps_per_second": 7.787, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 0.5054, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_f1": 0.8048163482788003, |
|
"eval_loss": 0.5228331685066223, |
|
"eval_runtime": 22.8204, |
|
"eval_samples_per_second": 242.677, |
|
"eval_steps_per_second": 7.625, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.88e-05, |
|
"loss": 0.5716, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_f1": 0.8136074625725318, |
|
"eval_loss": 0.4328407943248749, |
|
"eval_runtime": 23.1876, |
|
"eval_samples_per_second": 238.834, |
|
"eval_steps_per_second": 7.504, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"loss": 0.4257, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_f1": 0.8259426500452947, |
|
"eval_loss": 0.4285435080528259, |
|
"eval_runtime": 23.2092, |
|
"eval_samples_per_second": 238.613, |
|
"eval_steps_per_second": 7.497, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.4937, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_f1": 0.8317824244449398, |
|
"eval_loss": 0.39296483993530273, |
|
"eval_runtime": 23.2217, |
|
"eval_samples_per_second": 238.484, |
|
"eval_steps_per_second": 7.493, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.76e-05, |
|
"loss": 0.4411, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_f1": 0.8356587695730946, |
|
"eval_loss": 0.3781413733959198, |
|
"eval_runtime": 23.4641, |
|
"eval_samples_per_second": 236.021, |
|
"eval_steps_per_second": 7.416, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.72e-05, |
|
"loss": 0.4197, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_f1": 0.8497564694972085, |
|
"eval_loss": 0.3615292012691498, |
|
"eval_runtime": 23.1269, |
|
"eval_samples_per_second": 239.462, |
|
"eval_steps_per_second": 7.524, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 0.3702, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_f1": 0.8725646676802798, |
|
"eval_loss": 0.3257770836353302, |
|
"eval_runtime": 22.9073, |
|
"eval_samples_per_second": 241.757, |
|
"eval_steps_per_second": 7.596, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.64e-05, |
|
"loss": 0.3357, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_f1": 0.8735307760659007, |
|
"eval_loss": 0.31817400455474854, |
|
"eval_runtime": 23.2006, |
|
"eval_samples_per_second": 238.701, |
|
"eval_steps_per_second": 7.5, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.4082, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_f1": 0.8404628911456754, |
|
"eval_loss": 0.35047250986099243, |
|
"eval_runtime": 22.7192, |
|
"eval_samples_per_second": 243.758, |
|
"eval_steps_per_second": 7.659, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.5600000000000003e-05, |
|
"loss": 0.4107, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_f1": 0.8586341129440507, |
|
"eval_loss": 0.3375680148601532, |
|
"eval_runtime": 23.371, |
|
"eval_samples_per_second": 236.96, |
|
"eval_steps_per_second": 7.445, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.5200000000000002e-05, |
|
"loss": 0.2864, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_f1": 0.8561894871027176, |
|
"eval_loss": 0.32658323645591736, |
|
"eval_runtime": 23.2113, |
|
"eval_samples_per_second": 238.591, |
|
"eval_steps_per_second": 7.496, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.48e-05, |
|
"loss": 0.3483, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_f1": 0.8645368802572093, |
|
"eval_loss": 0.3245397210121155, |
|
"eval_runtime": 23.0332, |
|
"eval_samples_per_second": 240.436, |
|
"eval_steps_per_second": 7.554, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.4400000000000001e-05, |
|
"loss": 0.317, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_f1": 0.8735823993344022, |
|
"eval_loss": 0.32588475942611694, |
|
"eval_runtime": 23.3446, |
|
"eval_samples_per_second": 237.228, |
|
"eval_steps_per_second": 7.454, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.3114, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_f1": 0.8704014314485559, |
|
"eval_loss": 0.2997977137565613, |
|
"eval_runtime": 22.6714, |
|
"eval_samples_per_second": 244.273, |
|
"eval_steps_per_second": 7.675, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 0.3323, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_f1": 0.8661724356256255, |
|
"eval_loss": 0.3152276575565338, |
|
"eval_runtime": 23.046, |
|
"eval_samples_per_second": 240.302, |
|
"eval_steps_per_second": 7.55, |
|
"step": 160 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"total_flos": 359241714253440.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|