|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 22.998535871156662, |
|
"eval_steps": 500, |
|
"global_step": 7854, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4842, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4016, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3892, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4543589743589744, |
|
"eval_loss": 3.4055869579315186, |
|
"eval_runtime": 4.1688, |
|
"eval_samples_per_second": 119.938, |
|
"eval_steps_per_second": 15.112, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.09711501224106496, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.054, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3608, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3456, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3499, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4576923076923077, |
|
"eval_loss": 3.453113317489624, |
|
"eval_runtime": 4.4031, |
|
"eval_samples_per_second": 113.556, |
|
"eval_steps_per_second": 14.308, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.1225593574040313, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.056, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3238, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2486, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2452, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2427, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4584102564102564, |
|
"eval_loss": 3.671137809753418, |
|
"eval_runtime": 4.4094, |
|
"eval_samples_per_second": 113.393, |
|
"eval_steps_per_second": 14.288, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.13165889774889974, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.056, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1507, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1218, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1231, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.45697435897435895, |
|
"eval_loss": 3.7999510765075684, |
|
"eval_runtime": 4.0886, |
|
"eval_samples_per_second": 122.29, |
|
"eval_steps_per_second": 15.409, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.15413978034957934, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.054, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0823, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.0001, |
|
"loss": 0.997, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0024, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.0001, |
|
"loss": 0.995, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.4552307692307692, |
|
"eval_loss": 3.953216314315796, |
|
"eval_runtime": 4.9824, |
|
"eval_samples_per_second": 100.354, |
|
"eval_steps_per_second": 12.645, |
|
"step": 1707 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.1611344512465791, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.046, |
|
"step": 1707 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8495, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.854, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8693, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.45261538461538464, |
|
"eval_loss": 4.0766072273254395, |
|
"eval_runtime": 4.8095, |
|
"eval_samples_per_second": 103.961, |
|
"eval_steps_per_second": 13.099, |
|
"step": 2049 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.1489904300924264, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.044, |
|
"step": 2049 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7917, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7061, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7302, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.4501025641025641, |
|
"eval_loss": 4.171727180480957, |
|
"eval_runtime": 4.2169, |
|
"eval_samples_per_second": 118.57, |
|
"eval_steps_per_second": 14.94, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7227, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.573, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6036, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6033, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.448, |
|
"eval_loss": 4.277780055999756, |
|
"eval_runtime": 4.0947, |
|
"eval_samples_per_second": 122.11, |
|
"eval_steps_per_second": 15.386, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4945, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4718, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4825, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.4462051282051282, |
|
"eval_loss": 4.34151554107666, |
|
"eval_runtime": 4.8113, |
|
"eval_samples_per_second": 103.921, |
|
"eval_steps_per_second": 13.094, |
|
"step": 3073 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 3073 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4538, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3609, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3769, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.387, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.4463076923076923, |
|
"eval_loss": 4.413115978240967, |
|
"eval_runtime": 4.2874, |
|
"eval_samples_per_second": 116.62, |
|
"eval_steps_per_second": 14.694, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2911, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2881, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2933, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.44343589743589745, |
|
"eval_loss": 4.490556716918945, |
|
"eval_runtime": 4.2824, |
|
"eval_samples_per_second": 116.757, |
|
"eval_steps_per_second": 14.711, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2653, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2181, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2344, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.44251282051282054, |
|
"eval_loss": 4.651719570159912, |
|
"eval_runtime": 4.0941, |
|
"eval_samples_per_second": 122.126, |
|
"eval_steps_per_second": 15.388, |
|
"step": 4098 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"step": 4098 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2414, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 0.0001, |
|
"loss": 0.173, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.183, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1919, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.44076923076923075, |
|
"eval_loss": 4.751525402069092, |
|
"eval_runtime": 4.0902, |
|
"eval_samples_per_second": 122.244, |
|
"eval_steps_per_second": 15.403, |
|
"step": 4439 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"step": 4439 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1661, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1501, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1581, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.442051282051282, |
|
"eval_loss": 4.7323455810546875, |
|
"eval_runtime": 4.8128, |
|
"eval_samples_per_second": 103.89, |
|
"eval_steps_per_second": 13.09, |
|
"step": 4781 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 0.01991302915032707, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.0, |
|
"step": 4781 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1563, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.13, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1388, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1429, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.44066666666666665, |
|
"eval_loss": 4.810059547424316, |
|
"eval_runtime": 5.0086, |
|
"eval_samples_per_second": 99.829, |
|
"eval_steps_per_second": 12.578, |
|
"step": 5122 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 0.007355658423274405, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.002, |
|
"step": 5122 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1228, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1233, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1279, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.44061538461538463, |
|
"eval_loss": 4.84822940826416, |
|
"eval_runtime": 4.1962, |
|
"eval_samples_per_second": 119.156, |
|
"eval_steps_per_second": 15.014, |
|
"step": 5464 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 0.020518239891916933, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.0, |
|
"step": 5464 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1248, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1118, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 16.69, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1172, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1231, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.4410769230769231, |
|
"eval_loss": 4.973475933074951, |
|
"eval_runtime": 4.4042, |
|
"eval_samples_per_second": 113.529, |
|
"eval_steps_per_second": 14.305, |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 0.025070041424787165, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.0, |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1068, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"learning_rate": 0.0001, |
|
"loss": 0.111, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1145, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.44153846153846155, |
|
"eval_loss": 5.012055397033691, |
|
"eval_runtime": 4.1955, |
|
"eval_samples_per_second": 119.174, |
|
"eval_steps_per_second": 15.016, |
|
"step": 6147 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 0.0018720973379033757, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.0, |
|
"step": 6147 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1093, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1051, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1087, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.43943589743589745, |
|
"eval_loss": 4.983563423156738, |
|
"eval_runtime": 4.0932, |
|
"eval_samples_per_second": 122.154, |
|
"eval_steps_per_second": 15.391, |
|
"step": 6488 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 0.0014273377715875865, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.0, |
|
"step": 6488 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1115, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 19.33, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1001, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 19.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1049, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1084, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.4388205128205128, |
|
"eval_loss": 5.117064952850342, |
|
"eval_runtime": 4.0908, |
|
"eval_samples_per_second": 122.227, |
|
"eval_steps_per_second": 15.401, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bleu": 0.004914730802266865, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.0, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"learning_rate": 0.0001, |
|
"loss": 0.102, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 20.5, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1014, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 20.79, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1069, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.44051282051282054, |
|
"eval_loss": 5.011991500854492, |
|
"eval_runtime": 4.1961, |
|
"eval_samples_per_second": 119.159, |
|
"eval_steps_per_second": 15.014, |
|
"step": 7171 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"step": 7171 |
|
}, |
|
{ |
|
"epoch": 21.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1075, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 21.38, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0985, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1032, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1075, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 5.234306812286377, |
|
"eval_runtime": 4.4185, |
|
"eval_samples_per_second": 113.162, |
|
"eval_steps_per_second": 14.258, |
|
"step": 7513 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"step": 7513 |
|
}, |
|
{ |
|
"epoch": 22.25, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0959, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 22.55, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0998, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 22.84, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1024, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.4408717948717949, |
|
"eval_loss": 5.150094985961914, |
|
"eval_runtime": 4.2006, |
|
"eval_samples_per_second": 119.032, |
|
"eval_steps_per_second": 14.998, |
|
"step": 7854 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bleu": 0.008507706871345696, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.002, |
|
"step": 7854 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 17050, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 1.555328255161008e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|