|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9920318725099602, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.398406374501992, |
|
"grad_norm": 8.759617805480957, |
|
"learning_rate": 1.7343957503320053e-05, |
|
"loss": 0.8369, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.796812749003984, |
|
"grad_norm": 34.44209671020508, |
|
"learning_rate": 1.4687915006640108e-05, |
|
"loss": 0.1261, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1952191235059761, |
|
"grad_norm": 0.7429738640785217, |
|
"learning_rate": 1.2031872509960161e-05, |
|
"loss": 0.056, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.593625498007968, |
|
"grad_norm": 0.08053913712501526, |
|
"learning_rate": 9.375830013280214e-06, |
|
"loss": 0.0606, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.9920318725099602, |
|
"grad_norm": 0.04419610649347305, |
|
"learning_rate": 6.719787516600266e-06, |
|
"loss": 0.0523, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.9920318725099602, |
|
"eval_accuracy": 0.9820627802690582, |
|
"eval_f1": 0.9819921426216374, |
|
"eval_loss": 0.126918762922287, |
|
"eval_precision": 0.9824181570038463, |
|
"eval_recall": 0.9820627802690582, |
|
"eval_runtime": 97.4831, |
|
"eval_samples_per_second": 2.288, |
|
"eval_steps_per_second": 0.287, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 753, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 262723465250304.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|