|
{ |
|
"best_metric": 0.6931407942238267, |
|
"best_model_checkpoint": "output/fine_tuned/t5-base/RTE/checkpoint-780", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 780, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.49458483754512633, |
|
"eval_loss": 0.6981508135795593, |
|
"eval_runtime": 0.6368, |
|
"eval_samples_per_second": 434.966, |
|
"eval_steps_per_second": 54.96, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5631768953068592, |
|
"eval_loss": 0.6822376251220703, |
|
"eval_runtime": 0.6345, |
|
"eval_samples_per_second": 436.546, |
|
"eval_steps_per_second": 55.159, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.592057761732852, |
|
"eval_loss": 0.664194643497467, |
|
"eval_runtime": 0.627, |
|
"eval_samples_per_second": 441.797, |
|
"eval_steps_per_second": 55.823, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6101083032490975, |
|
"eval_loss": 0.6544933915138245, |
|
"eval_runtime": 0.632, |
|
"eval_samples_per_second": 438.261, |
|
"eval_steps_per_second": 55.376, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6389891696750902, |
|
"eval_loss": 0.6432966589927673, |
|
"eval_runtime": 0.6232, |
|
"eval_samples_per_second": 444.454, |
|
"eval_steps_per_second": 56.158, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6606498194945848, |
|
"eval_loss": 0.6844415068626404, |
|
"eval_runtime": 0.6263, |
|
"eval_samples_per_second": 442.269, |
|
"eval_steps_per_second": 55.882, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 6.410256410256411, |
|
"grad_norm": 4.859732627868652, |
|
"learning_rate": 1.794871794871795e-05, |
|
"loss": 0.5942, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6462093862815884, |
|
"eval_loss": 0.7054325342178345, |
|
"eval_runtime": 0.6316, |
|
"eval_samples_per_second": 438.543, |
|
"eval_steps_per_second": 55.412, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6642599277978339, |
|
"eval_loss": 0.7449424266815186, |
|
"eval_runtime": 0.6256, |
|
"eval_samples_per_second": 442.773, |
|
"eval_steps_per_second": 55.946, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6714801444043321, |
|
"eval_loss": 0.7661617994308472, |
|
"eval_runtime": 0.6292, |
|
"eval_samples_per_second": 440.229, |
|
"eval_steps_per_second": 55.625, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6931407942238267, |
|
"eval_loss": 0.7698411345481873, |
|
"eval_runtime": 0.636, |
|
"eval_samples_per_second": 435.503, |
|
"eval_steps_per_second": 55.028, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 780, |
|
"total_flos": 3802086289152000.0, |
|
"train_loss": 0.5113652938451523, |
|
"train_runtime": 165.5156, |
|
"train_samples_per_second": 150.439, |
|
"train_steps_per_second": 4.713 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 780, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3802086289152000.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|