|
{ |
|
"best_metric": 0.6268890955591723, |
|
"best_model_checkpoint": "modernbert-medical-classifier/checkpoint-644", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 736, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.2555762427123016, |
|
"eval_loss": 1.1453216075897217, |
|
"eval_runtime": 7.2906, |
|
"eval_samples_per_second": 12.619, |
|
"eval_steps_per_second": 3.155, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.0869565217391304, |
|
"grad_norm": 27.595176696777344, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 1.2432, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.22693092661056924, |
|
"eval_loss": 1.1348090171813965, |
|
"eval_runtime": 7.2682, |
|
"eval_samples_per_second": 12.658, |
|
"eval_steps_per_second": 3.164, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 2.1739130434782608, |
|
"grad_norm": 10.296772003173828, |
|
"learning_rate": 1.739130434782609e-05, |
|
"loss": 1.1447, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.2623751468860165, |
|
"eval_loss": 1.194385051727295, |
|
"eval_runtime": 7.2397, |
|
"eval_samples_per_second": 12.708, |
|
"eval_steps_per_second": 3.177, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 3.260869565217391, |
|
"grad_norm": 8.205018997192383, |
|
"learning_rate": 1.932367149758454e-05, |
|
"loss": 1.0924, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.46725195094760313, |
|
"eval_loss": 0.9543380737304688, |
|
"eval_runtime": 7.257, |
|
"eval_samples_per_second": 12.677, |
|
"eval_steps_per_second": 3.169, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 4.3478260869565215, |
|
"grad_norm": 11.217053413391113, |
|
"learning_rate": 1.8357487922705315e-05, |
|
"loss": 0.9918, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.48917610522469857, |
|
"eval_loss": 1.0969688892364502, |
|
"eval_runtime": 7.2942, |
|
"eval_samples_per_second": 12.613, |
|
"eval_steps_per_second": 3.153, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.434782608695652, |
|
"grad_norm": 17.887250900268555, |
|
"learning_rate": 1.739130434782609e-05, |
|
"loss": 0.8981, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.5668988939449298, |
|
"eval_loss": 1.2105615139007568, |
|
"eval_runtime": 7.3182, |
|
"eval_samples_per_second": 12.571, |
|
"eval_steps_per_second": 3.143, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 6.521739130434782, |
|
"grad_norm": 26.001848220825195, |
|
"learning_rate": 1.6425120772946863e-05, |
|
"loss": 0.9261, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.6268890955591723, |
|
"eval_loss": 0.8952301740646362, |
|
"eval_runtime": 7.2679, |
|
"eval_samples_per_second": 12.658, |
|
"eval_steps_per_second": 3.165, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 7.608695652173913, |
|
"grad_norm": 21.24846649169922, |
|
"learning_rate": 1.5458937198067633e-05, |
|
"loss": 0.8208, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.5480383816552855, |
|
"eval_loss": 1.1408698558807373, |
|
"eval_runtime": 7.2908, |
|
"eval_samples_per_second": 12.619, |
|
"eval_steps_per_second": 3.155, |
|
"step": 736 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5052834964944000.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|