|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 44.0, |
|
"eval_steps": 500, |
|
"global_step": 15026, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4842, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4016, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3892, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4543589743589744, |
|
"eval_loss": 3.4055869579315186, |
|
"eval_runtime": 4.1688, |
|
"eval_samples_per_second": 119.938, |
|
"eval_steps_per_second": 15.112, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.09711501224106496, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.054, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3608, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3456, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3499, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4576923076923077, |
|
"eval_loss": 3.453113317489624, |
|
"eval_runtime": 4.4031, |
|
"eval_samples_per_second": 113.556, |
|
"eval_steps_per_second": 14.308, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.1225593574040313, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.056, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3238, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2486, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2452, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2427, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4584102564102564, |
|
"eval_loss": 3.671137809753418, |
|
"eval_runtime": 4.4094, |
|
"eval_samples_per_second": 113.393, |
|
"eval_steps_per_second": 14.288, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.13165889774889974, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.056, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1507, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1218, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1231, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.45697435897435895, |
|
"eval_loss": 3.7999510765075684, |
|
"eval_runtime": 4.0886, |
|
"eval_samples_per_second": 122.29, |
|
"eval_steps_per_second": 15.409, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.15413978034957934, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.054, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0823, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.0001, |
|
"loss": 0.997, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0024, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.0001, |
|
"loss": 0.995, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.4552307692307692, |
|
"eval_loss": 3.953216314315796, |
|
"eval_runtime": 4.9824, |
|
"eval_samples_per_second": 100.354, |
|
"eval_steps_per_second": 12.645, |
|
"step": 1707 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.1611344512465791, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.046, |
|
"step": 1707 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8495, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.854, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8693, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.45261538461538464, |
|
"eval_loss": 4.0766072273254395, |
|
"eval_runtime": 4.8095, |
|
"eval_samples_per_second": 103.961, |
|
"eval_steps_per_second": 13.099, |
|
"step": 2049 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.1489904300924264, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.044, |
|
"step": 2049 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7917, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7061, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7302, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.4501025641025641, |
|
"eval_loss": 4.171727180480957, |
|
"eval_runtime": 4.2169, |
|
"eval_samples_per_second": 118.57, |
|
"eval_steps_per_second": 14.94, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7227, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.573, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6036, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6033, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.448, |
|
"eval_loss": 4.277780055999756, |
|
"eval_runtime": 4.0947, |
|
"eval_samples_per_second": 122.11, |
|
"eval_steps_per_second": 15.386, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4945, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4718, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4825, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.4462051282051282, |
|
"eval_loss": 4.34151554107666, |
|
"eval_runtime": 4.8113, |
|
"eval_samples_per_second": 103.921, |
|
"eval_steps_per_second": 13.094, |
|
"step": 3073 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 3073 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4538, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3609, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3769, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.387, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.4463076923076923, |
|
"eval_loss": 4.413115978240967, |
|
"eval_runtime": 4.2874, |
|
"eval_samples_per_second": 116.62, |
|
"eval_steps_per_second": 14.694, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2911, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2881, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2933, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.44343589743589745, |
|
"eval_loss": 4.490556716918945, |
|
"eval_runtime": 4.2824, |
|
"eval_samples_per_second": 116.757, |
|
"eval_steps_per_second": 14.711, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2653, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2181, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2344, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.44251282051282054, |
|
"eval_loss": 4.651719570159912, |
|
"eval_runtime": 4.0941, |
|
"eval_samples_per_second": 122.126, |
|
"eval_steps_per_second": 15.388, |
|
"step": 4098 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"step": 4098 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2414, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 0.0001, |
|
"loss": 0.173, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.183, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1919, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.44076923076923075, |
|
"eval_loss": 4.751525402069092, |
|
"eval_runtime": 4.0902, |
|
"eval_samples_per_second": 122.244, |
|
"eval_steps_per_second": 15.403, |
|
"step": 4439 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"step": 4439 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1661, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1501, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1581, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.442051282051282, |
|
"eval_loss": 4.7323455810546875, |
|
"eval_runtime": 4.8128, |
|
"eval_samples_per_second": 103.89, |
|
"eval_steps_per_second": 13.09, |
|
"step": 4781 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 0.01991302915032707, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.0, |
|
"step": 4781 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1563, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.13, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1388, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1429, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.44066666666666665, |
|
"eval_loss": 4.810059547424316, |
|
"eval_runtime": 5.0086, |
|
"eval_samples_per_second": 99.829, |
|
"eval_steps_per_second": 12.578, |
|
"step": 5122 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 0.007355658423274405, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.002, |
|
"step": 5122 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1228, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1233, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1279, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.44061538461538463, |
|
"eval_loss": 4.84822940826416, |
|
"eval_runtime": 4.1962, |
|
"eval_samples_per_second": 119.156, |
|
"eval_steps_per_second": 15.014, |
|
"step": 5464 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 0.020518239891916933, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.0, |
|
"step": 5464 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1248, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1118, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 16.69, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1172, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1231, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.4410769230769231, |
|
"eval_loss": 4.973475933074951, |
|
"eval_runtime": 4.4042, |
|
"eval_samples_per_second": 113.529, |
|
"eval_steps_per_second": 14.305, |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 0.025070041424787165, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.0, |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1068, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"learning_rate": 0.0001, |
|
"loss": 0.111, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1145, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.44153846153846155, |
|
"eval_loss": 5.012055397033691, |
|
"eval_runtime": 4.1955, |
|
"eval_samples_per_second": 119.174, |
|
"eval_steps_per_second": 15.016, |
|
"step": 6147 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 0.0018720973379033757, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.0, |
|
"step": 6147 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1093, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1051, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1087, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.43943589743589745, |
|
"eval_loss": 4.983563423156738, |
|
"eval_runtime": 4.0932, |
|
"eval_samples_per_second": 122.154, |
|
"eval_steps_per_second": 15.391, |
|
"step": 6488 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 0.0014273377715875865, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.0, |
|
"step": 6488 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1115, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 19.33, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1001, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 19.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1049, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1084, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.4388205128205128, |
|
"eval_loss": 5.117064952850342, |
|
"eval_runtime": 4.0908, |
|
"eval_samples_per_second": 122.227, |
|
"eval_steps_per_second": 15.401, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bleu": 0.004914730802266865, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.0, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"learning_rate": 0.0001, |
|
"loss": 0.102, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 20.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1014, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 20.79, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1069, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.44051282051282054, |
|
"eval_loss": 5.011991500854492, |
|
"eval_runtime": 4.1961, |
|
"eval_samples_per_second": 119.159, |
|
"eval_steps_per_second": 15.014, |
|
"step": 7171 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"step": 7171 |
|
}, |
|
{ |
|
"epoch": 21.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1075, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 21.38, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0985, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1032, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1075, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 5.234306812286377, |
|
"eval_runtime": 4.4185, |
|
"eval_samples_per_second": 113.162, |
|
"eval_steps_per_second": 14.258, |
|
"step": 7513 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"step": 7513 |
|
}, |
|
{ |
|
"epoch": 22.25, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0959, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 22.55, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0998, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 22.84, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1024, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.4408717948717949, |
|
"eval_loss": 5.150094985961914, |
|
"eval_runtime": 4.2006, |
|
"eval_samples_per_second": 119.032, |
|
"eval_steps_per_second": 14.998, |
|
"step": 7854 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bleu": 0.008507706871345696, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.002, |
|
"step": 7854 |
|
}, |
|
{ |
|
"epoch": 23.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0997, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 23.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0946, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 23.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0981, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.44025641025641027, |
|
"eval_loss": 5.080117225646973, |
|
"eval_runtime": 4.1942, |
|
"eval_samples_per_second": 119.212, |
|
"eval_steps_per_second": 15.021, |
|
"step": 8196 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"step": 8196 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 0.0001, |
|
"loss": 0.101, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 24.3, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0923, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 24.6, |
|
"learning_rate": 0.0001, |
|
"loss": 0.095, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"learning_rate": 0.0001, |
|
"loss": 0.097, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.44158974358974357, |
|
"eval_loss": 5.103724956512451, |
|
"eval_runtime": 4.307, |
|
"eval_samples_per_second": 116.09, |
|
"eval_steps_per_second": 14.627, |
|
"step": 8537 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"step": 8537 |
|
}, |
|
{ |
|
"epoch": 25.18, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0925, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 25.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0926, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 25.77, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0963, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.4397948717948718, |
|
"eval_loss": 5.206360340118408, |
|
"eval_runtime": 4.8198, |
|
"eval_samples_per_second": 103.739, |
|
"eval_steps_per_second": 13.071, |
|
"step": 8879 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"step": 8879 |
|
}, |
|
{ |
|
"epoch": 26.06, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0966, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 26.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0909, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 26.65, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0951, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 26.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0983, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.4413846153846154, |
|
"eval_loss": 5.066434383392334, |
|
"eval_runtime": 4.1912, |
|
"eval_samples_per_second": 119.298, |
|
"eval_steps_per_second": 15.032, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 27.23, |
|
"learning_rate": 0.0001, |
|
"loss": 0.092, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 27.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0935, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 27.82, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0969, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.440974358974359, |
|
"eval_loss": 5.255864143371582, |
|
"eval_runtime": 4.1948, |
|
"eval_samples_per_second": 119.194, |
|
"eval_steps_per_second": 15.018, |
|
"step": 9562 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"step": 9562 |
|
}, |
|
{ |
|
"epoch": 28.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0947, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0915, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 28.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0948, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0966, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.44035897435897436, |
|
"eval_loss": 5.196049690246582, |
|
"eval_runtime": 5.1571, |
|
"eval_samples_per_second": 96.953, |
|
"eval_steps_per_second": 12.216, |
|
"step": 9903 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"step": 9903 |
|
}, |
|
{ |
|
"epoch": 29.28, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0884, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 29.58, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0913, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 29.87, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0954, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.43958974358974356, |
|
"eval_loss": 5.223780632019043, |
|
"eval_runtime": 4.8025, |
|
"eval_samples_per_second": 104.112, |
|
"eval_steps_per_second": 13.118, |
|
"step": 10245 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 10245 |
|
}, |
|
{ |
|
"epoch": 30.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0913, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0901, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 30.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0931, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.4402051282051282, |
|
"eval_loss": 5.219459056854248, |
|
"eval_runtime": 4.2447, |
|
"eval_samples_per_second": 117.794, |
|
"eval_steps_per_second": 14.842, |
|
"step": 10586 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"step": 10586 |
|
}, |
|
{ |
|
"epoch": 31.04, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0939, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 31.33, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0879, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 31.63, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0902, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 31.92, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0923, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.44066666666666665, |
|
"eval_loss": 5.287062644958496, |
|
"eval_runtime": 4.1005, |
|
"eval_samples_per_second": 121.936, |
|
"eval_steps_per_second": 15.364, |
|
"step": 10928 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"step": 10928 |
|
}, |
|
{ |
|
"epoch": 32.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0881, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0891, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0911, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.4391794871794872, |
|
"eval_loss": 5.320067405700684, |
|
"eval_runtime": 4.1991, |
|
"eval_samples_per_second": 119.074, |
|
"eval_steps_per_second": 15.003, |
|
"step": 11269 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"step": 11269 |
|
}, |
|
{ |
|
"epoch": 33.09, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0915, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 33.38, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0873, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 33.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0898, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 33.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0934, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.4387179487179487, |
|
"eval_loss": 5.362847328186035, |
|
"eval_runtime": 4.1975, |
|
"eval_samples_per_second": 119.119, |
|
"eval_steps_per_second": 15.009, |
|
"step": 11611 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"step": 11611 |
|
}, |
|
{ |
|
"epoch": 34.26, |
|
"learning_rate": 0.0001, |
|
"loss": 0.086, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 34.55, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0883, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 34.85, |
|
"learning_rate": 0.0001, |
|
"loss": 0.091, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.438974358974359, |
|
"eval_loss": 5.319708347320557, |
|
"eval_runtime": 4.201, |
|
"eval_samples_per_second": 119.019, |
|
"eval_steps_per_second": 14.996, |
|
"step": 11952 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"step": 11952 |
|
}, |
|
{ |
|
"epoch": 35.14, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0897, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 35.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0876, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 35.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0902, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.4391282051282051, |
|
"eval_loss": 5.186809539794922, |
|
"eval_runtime": 4.2, |
|
"eval_samples_per_second": 119.046, |
|
"eval_steps_per_second": 15.0, |
|
"step": 12294 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"step": 12294 |
|
}, |
|
{ |
|
"epoch": 36.02, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0931, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 36.31, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0857, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 36.6, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0894, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 36.9, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0916, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.44235897435897437, |
|
"eval_loss": 5.122660160064697, |
|
"eval_runtime": 4.4215, |
|
"eval_samples_per_second": 113.083, |
|
"eval_steps_per_second": 14.248, |
|
"step": 12635 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"step": 12635 |
|
}, |
|
{ |
|
"epoch": 37.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0894, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 37.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0888, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 37.77, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0905, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.43666666666666665, |
|
"eval_loss": 5.221393585205078, |
|
"eval_runtime": 4.6657, |
|
"eval_samples_per_second": 107.164, |
|
"eval_steps_per_second": 13.503, |
|
"step": 12977 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"step": 12977 |
|
}, |
|
{ |
|
"epoch": 38.07, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0906, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 38.36, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0857, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 38.65, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0884, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 38.95, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0907, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.4411794871794872, |
|
"eval_loss": 5.241151332855225, |
|
"eval_runtime": 4.1927, |
|
"eval_samples_per_second": 119.255, |
|
"eval_steps_per_second": 15.026, |
|
"step": 13318 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_bleu": 0.05134364405270662, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.006, |
|
"step": 13318 |
|
}, |
|
{ |
|
"epoch": 39.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0862, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 39.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0864, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 39.82, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0883, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.43948717948717947, |
|
"eval_loss": 5.301468372344971, |
|
"eval_runtime": 4.0986, |
|
"eval_samples_per_second": 121.992, |
|
"eval_steps_per_second": 15.371, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 40.12, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0864, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 40.41, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0838, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 40.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0863, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0892, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.43923076923076926, |
|
"eval_loss": 5.2815632820129395, |
|
"eval_runtime": 4.4744, |
|
"eval_samples_per_second": 111.747, |
|
"eval_steps_per_second": 14.08, |
|
"step": 14001 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"step": 14001 |
|
}, |
|
{ |
|
"epoch": 41.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0835, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 41.58, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0854, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 41.87, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0881, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.4350769230769231, |
|
"eval_loss": 5.358331203460693, |
|
"eval_runtime": 4.3999, |
|
"eval_samples_per_second": 113.64, |
|
"eval_steps_per_second": 14.319, |
|
"step": 14343 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"step": 14343 |
|
}, |
|
{ |
|
"epoch": 42.17, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0855, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 42.46, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0855, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 42.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0881, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.43651282051282053, |
|
"eval_loss": 5.267802715301514, |
|
"eval_runtime": 4.9237, |
|
"eval_samples_per_second": 101.549, |
|
"eval_steps_per_second": 12.795, |
|
"step": 14684 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"step": 14684 |
|
}, |
|
{ |
|
"epoch": 43.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0887, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 43.34, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0841, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 43.63, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0867, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 43.92, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0898, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.4371794871794872, |
|
"eval_loss": 5.385359287261963, |
|
"eval_runtime": 4.4074, |
|
"eval_samples_per_second": 113.445, |
|
"eval_steps_per_second": 14.294, |
|
"step": 15026 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"step": 15026 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 17050, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 2.975669879526392e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|