|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.998535871156662, |
|
"eval_steps": 500, |
|
"global_step": 1024, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4842, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4016, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3892, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4543589743589744, |
|
"eval_loss": 3.4055869579315186, |
|
"eval_runtime": 4.1688, |
|
"eval_samples_per_second": 119.938, |
|
"eval_steps_per_second": 15.112, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.09711501224106496, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.054, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3608, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3456, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3499, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4576923076923077, |
|
"eval_loss": 3.453113317489624, |
|
"eval_runtime": 4.4031, |
|
"eval_samples_per_second": 113.556, |
|
"eval_steps_per_second": 14.308, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.1225593574040313, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.056, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3238, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2486, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2452, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2427, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4584102564102564, |
|
"eval_loss": 3.671137809753418, |
|
"eval_runtime": 4.4094, |
|
"eval_samples_per_second": 113.393, |
|
"eval_steps_per_second": 14.288, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.13165889774889974, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.056, |
|
"step": 1024 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 17050, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 2.0298630319151514e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|