|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 3415, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4842, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4016, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3892, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4543589743589744, |
|
"eval_loss": 3.4055869579315186, |
|
"eval_runtime": 4.1688, |
|
"eval_samples_per_second": 119.938, |
|
"eval_steps_per_second": 15.112, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.09711501224106496, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.054, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3608, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3456, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3499, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4576923076923077, |
|
"eval_loss": 3.453113317489624, |
|
"eval_runtime": 4.4031, |
|
"eval_samples_per_second": 113.556, |
|
"eval_steps_per_second": 14.308, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.1225593574040313, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.056, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3238, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2486, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2452, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2427, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4584102564102564, |
|
"eval_loss": 3.671137809753418, |
|
"eval_runtime": 4.4094, |
|
"eval_samples_per_second": 113.393, |
|
"eval_steps_per_second": 14.288, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.13165889774889974, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.056, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1507, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1218, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1231, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.45697435897435895, |
|
"eval_loss": 3.7999510765075684, |
|
"eval_runtime": 4.0886, |
|
"eval_samples_per_second": 122.29, |
|
"eval_steps_per_second": 15.409, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.15413978034957934, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.054, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0823, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.0001, |
|
"loss": 0.997, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0024, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.0001, |
|
"loss": 0.995, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.4552307692307692, |
|
"eval_loss": 3.953216314315796, |
|
"eval_runtime": 4.9824, |
|
"eval_samples_per_second": 100.354, |
|
"eval_steps_per_second": 12.645, |
|
"step": 1707 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.1611344512465791, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.046, |
|
"step": 1707 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8495, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.854, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8693, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.45261538461538464, |
|
"eval_loss": 4.0766072273254395, |
|
"eval_runtime": 4.8095, |
|
"eval_samples_per_second": 103.961, |
|
"eval_steps_per_second": 13.099, |
|
"step": 2049 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.1489904300924264, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.044, |
|
"step": 2049 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7917, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7061, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7302, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.4501025641025641, |
|
"eval_loss": 4.171727180480957, |
|
"eval_runtime": 4.2169, |
|
"eval_samples_per_second": 118.57, |
|
"eval_steps_per_second": 14.94, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7227, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.573, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6036, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6033, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.448, |
|
"eval_loss": 4.277780055999756, |
|
"eval_runtime": 4.0947, |
|
"eval_samples_per_second": 122.11, |
|
"eval_steps_per_second": 15.386, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4945, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4718, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4825, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.4462051282051282, |
|
"eval_loss": 4.34151554107666, |
|
"eval_runtime": 4.8113, |
|
"eval_samples_per_second": 103.921, |
|
"eval_steps_per_second": 13.094, |
|
"step": 3073 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 3073 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4538, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3609, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3769, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.387, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.4463076923076923, |
|
"eval_loss": 4.413115978240967, |
|
"eval_runtime": 4.2874, |
|
"eval_samples_per_second": 116.62, |
|
"eval_steps_per_second": 14.694, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 3415 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 17050, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 6.763068176298148e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|