|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 26.0, |
|
"eval_steps": 500, |
|
"global_step": 6500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0005, |
|
"loss": 2.3712, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0005, |
|
"loss": 1.8732, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.593873417721519, |
|
"eval_loss": 2.011091947555542, |
|
"eval_runtime": 4.861, |
|
"eval_samples_per_second": 102.859, |
|
"eval_steps_per_second": 12.96, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.32835692091264185, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0005, |
|
"loss": 1.7329, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0005, |
|
"loss": 1.6233, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0005, |
|
"loss": 1.6142, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6051139240506329, |
|
"eval_loss": 1.844284176826477, |
|
"eval_runtime": 4.8205, |
|
"eval_samples_per_second": 103.724, |
|
"eval_steps_per_second": 13.069, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.7215873015873017, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0005, |
|
"loss": 1.1625, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0005, |
|
"loss": 1.206, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6007341772151898, |
|
"eval_loss": 1.981817603111267, |
|
"eval_runtime": 4.671, |
|
"eval_samples_per_second": 107.044, |
|
"eval_steps_per_second": 13.487, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.4163636363636364, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0005, |
|
"loss": 1.0064, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0005, |
|
"loss": 0.8141, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.8693, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.594126582278481, |
|
"eval_loss": 2.2100281715393066, |
|
"eval_runtime": 5.2488, |
|
"eval_samples_per_second": 95.26, |
|
"eval_steps_per_second": 12.003, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 0.2, |
|
"eval_f1": 1.559097642686839, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.0005, |
|
"loss": 0.5477, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0005, |
|
"loss": 0.6023, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5910126582278481, |
|
"eval_loss": 2.375615119934082, |
|
"eval_runtime": 4.4156, |
|
"eval_samples_per_second": 113.235, |
|
"eval_steps_per_second": 14.268, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 0.4, |
|
"eval_f1": 2.41971862073413, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0005, |
|
"loss": 0.514, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.0005, |
|
"loss": 0.4479, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.4717, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5895696202531645, |
|
"eval_loss": 2.542104482650757, |
|
"eval_runtime": 5.1447, |
|
"eval_samples_per_second": 97.188, |
|
"eval_steps_per_second": 12.246, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 9.8, |
|
"eval_f1": 17.19683736589267, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3723, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3938, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5891139240506329, |
|
"eval_loss": 2.658656597137451, |
|
"eval_runtime": 4.7175, |
|
"eval_samples_per_second": 105.987, |
|
"eval_steps_per_second": 13.354, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 3.6, |
|
"eval_f1": 8.743843476498272, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3727, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3538, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3697, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5872911392405064, |
|
"eval_loss": 2.7531518936157227, |
|
"eval_runtime": 4.7213, |
|
"eval_samples_per_second": 105.902, |
|
"eval_steps_per_second": 13.344, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.3733369257948794, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3431, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3617, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5870126582278481, |
|
"eval_loss": 2.7664403915405273, |
|
"eval_runtime": 4.7203, |
|
"eval_samples_per_second": 105.925, |
|
"eval_steps_per_second": 13.347, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.18, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3424, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3418, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3607, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5866835443037974, |
|
"eval_loss": 2.8513779640197754, |
|
"eval_runtime": 4.5658, |
|
"eval_samples_per_second": 109.511, |
|
"eval_steps_per_second": 13.798, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 15.8, |
|
"eval_f1": 24.03880563880564, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 0.0005, |
|
"loss": 0.339, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3414, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5860759493670886, |
|
"eval_loss": 2.8931641578674316, |
|
"eval_runtime": 5.1473, |
|
"eval_samples_per_second": 97.139, |
|
"eval_steps_per_second": 12.24, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 1.8004313814840132, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3302, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 0.0005, |
|
"loss": 0.329, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3439, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5854683544303797, |
|
"eval_loss": 2.9544754028320312, |
|
"eval_runtime": 4.5673, |
|
"eval_samples_per_second": 109.473, |
|
"eval_steps_per_second": 13.794, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_exact_match": 0.8, |
|
"eval_f1": 3.4374976657329603, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3182, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.0005, |
|
"loss": 0.335, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5842784810126582, |
|
"eval_loss": 2.899094343185425, |
|
"eval_runtime": 4.7188, |
|
"eval_samples_per_second": 105.96, |
|
"eval_steps_per_second": 13.351, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_exact_match": 14.4, |
|
"eval_f1": 23.30256743256743, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 0.0005, |
|
"loss": 0.327, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3223, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3391, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5839746835443038, |
|
"eval_loss": 2.879316806793213, |
|
"eval_runtime": 4.415, |
|
"eval_samples_per_second": 113.251, |
|
"eval_steps_per_second": 14.27, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_exact_match": 17.2, |
|
"eval_f1": 25.861362134597435, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3128, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 0.0005, |
|
"loss": 0.328, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5851392405063292, |
|
"eval_loss": 2.8953680992126465, |
|
"eval_runtime": 4.7507, |
|
"eval_samples_per_second": 105.249, |
|
"eval_steps_per_second": 13.261, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_exact_match": 15.8, |
|
"eval_f1": 23.802868797868808, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3233, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3216, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3351, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5838481012658228, |
|
"eval_loss": 2.913999080657959, |
|
"eval_runtime": 4.5211, |
|
"eval_samples_per_second": 110.593, |
|
"eval_steps_per_second": 13.935, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_exact_match": 15.8, |
|
"eval_f1": 23.843056551291856, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3087, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3252, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5825063291139241, |
|
"eval_loss": 2.929701089859009, |
|
"eval_runtime": 5.1507, |
|
"eval_samples_per_second": 97.074, |
|
"eval_steps_per_second": 12.231, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_exact_match": 17.6, |
|
"eval_f1": 25.533542568542583, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3152, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3161, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.332, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5833924050632912, |
|
"eval_loss": 2.9811997413635254, |
|
"eval_runtime": 4.5707, |
|
"eval_samples_per_second": 109.392, |
|
"eval_steps_per_second": 13.783, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_exact_match": 14.8, |
|
"eval_f1": 23.04023215346745, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3089, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 0.0005, |
|
"loss": 0.324, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5807848101265822, |
|
"eval_loss": 2.982343912124634, |
|
"eval_runtime": 4.5156, |
|
"eval_samples_per_second": 110.728, |
|
"eval_steps_per_second": 13.952, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_exact_match": 17.2, |
|
"eval_f1": 24.75057253204313, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3169, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3116, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3329, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5813164556962025, |
|
"eval_loss": 2.9419918060302734, |
|
"eval_runtime": 5.1584, |
|
"eval_samples_per_second": 96.929, |
|
"eval_steps_per_second": 12.213, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_exact_match": 17.0, |
|
"eval_f1": 24.681692914928213, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3052, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3242, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.5806075949367089, |
|
"eval_loss": 3.0366506576538086, |
|
"eval_runtime": 4.4135, |
|
"eval_samples_per_second": 113.288, |
|
"eval_steps_per_second": 14.274, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_exact_match": 15.8, |
|
"eval_f1": 24.507380005615303, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3169, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3104, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3271, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5807594936708861, |
|
"eval_loss": 3.0456035137176514, |
|
"eval_runtime": 4.6241, |
|
"eval_samples_per_second": 108.13, |
|
"eval_steps_per_second": 13.624, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_exact_match": 16.8, |
|
"eval_f1": 24.95916155739686, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 0.0005, |
|
"loss": 0.4232, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"learning_rate": 0.0005, |
|
"loss": 2.1778, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.3285822784810127, |
|
"eval_loss": 10.320929527282715, |
|
"eval_runtime": 4.5651, |
|
"eval_samples_per_second": 109.526, |
|
"eval_steps_per_second": 13.8, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"learning_rate": 0.0005, |
|
"loss": 10.0061, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"learning_rate": 0.0005, |
|
"loss": 8.9834, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 0.0005, |
|
"loss": 9.433, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.3188607594936709, |
|
"eval_loss": 11.153959274291992, |
|
"eval_runtime": 5.1388, |
|
"eval_samples_per_second": 97.298, |
|
"eval_steps_per_second": 12.26, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.4, |
|
"learning_rate": 0.0005, |
|
"loss": 8.4619, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"learning_rate": 0.0005, |
|
"loss": 7.8728, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.32336708860759494, |
|
"eval_loss": 10.043231964111328, |
|
"eval_runtime": 4.5666, |
|
"eval_samples_per_second": 109.491, |
|
"eval_steps_per_second": 13.796, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"learning_rate": 0.0005, |
|
"loss": 9.8186, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 0.0005, |
|
"loss": 7.7736, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 0.0005, |
|
"loss": 8.6557, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.329873417721519, |
|
"eval_loss": 10.657929420471191, |
|
"eval_runtime": 4.5712, |
|
"eval_samples_per_second": 109.38, |
|
"eval_steps_per_second": 13.782, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0, |
|
"step": 6500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 12500, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 4.155661298769592e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|