|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 31.0, |
|
"eval_steps": 500, |
|
"global_step": 7750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0003, |
|
"loss": 1.9296, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0003, |
|
"loss": 1.7554, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6093164556962025, |
|
"eval_loss": 1.7939746379852295, |
|
"eval_runtime": 5.0177, |
|
"eval_samples_per_second": 99.648, |
|
"eval_steps_per_second": 12.556, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 26.0, |
|
"eval_f1": 36.71136363636365, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0003, |
|
"loss": 1.6394, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0003, |
|
"loss": 1.5315, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0003, |
|
"loss": 1.5248, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6085063291139241, |
|
"eval_loss": 1.8273799419403076, |
|
"eval_runtime": 4.4023, |
|
"eval_samples_per_second": 113.576, |
|
"eval_steps_per_second": 14.311, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 24.4, |
|
"eval_f1": 33.57723554223556, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0003, |
|
"loss": 1.1692, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0003, |
|
"loss": 1.2054, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6027088607594937, |
|
"eval_loss": 1.9717934131622314, |
|
"eval_runtime": 5.1273, |
|
"eval_samples_per_second": 97.518, |
|
"eval_steps_per_second": 12.287, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 20.8, |
|
"eval_f1": 30.584243697478996, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0003, |
|
"loss": 1.0252, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.8608, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.8989, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5986835443037974, |
|
"eval_loss": 2.151914596557617, |
|
"eval_runtime": 5.3325, |
|
"eval_samples_per_second": 93.765, |
|
"eval_steps_per_second": 11.814, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 21.0, |
|
"eval_f1": 30.363229548229558, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.5842, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6306, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5960506329113924, |
|
"eval_loss": 2.329284191131592, |
|
"eval_runtime": 4.5585, |
|
"eval_samples_per_second": 109.686, |
|
"eval_steps_per_second": 13.82, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 20.4, |
|
"eval_f1": 30.02542624042625, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.5368, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.4495, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.4712, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5935696202531645, |
|
"eval_loss": 2.5598793029785156, |
|
"eval_runtime": 4.4094, |
|
"eval_samples_per_second": 113.395, |
|
"eval_steps_per_second": 14.288, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 30.108149073149082, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3625, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3797, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5935696202531645, |
|
"eval_loss": 2.732860803604126, |
|
"eval_runtime": 5.2701, |
|
"eval_samples_per_second": 94.876, |
|
"eval_steps_per_second": 11.954, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 19.6, |
|
"eval_f1": 29.002709512709526, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3582, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3399, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3527, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5912911392405064, |
|
"eval_loss": 2.8185083866119385, |
|
"eval_runtime": 4.4105, |
|
"eval_samples_per_second": 113.366, |
|
"eval_steps_per_second": 14.284, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 20.8, |
|
"eval_f1": 29.191525696525694, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3202, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3314, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.592, |
|
"eval_loss": 2.824962854385376, |
|
"eval_runtime": 4.5596, |
|
"eval_samples_per_second": 109.66, |
|
"eval_steps_per_second": 13.817, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 21.0, |
|
"eval_f1": 29.434571539571547, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3174, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3157, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3265, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5911392405063292, |
|
"eval_loss": 2.9242382049560547, |
|
"eval_runtime": 4.7857, |
|
"eval_samples_per_second": 104.477, |
|
"eval_steps_per_second": 13.164, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 20.8, |
|
"eval_f1": 29.524657009657016, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2989, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3148, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.591240506329114, |
|
"eval_loss": 3.0012593269348145, |
|
"eval_runtime": 4.5582, |
|
"eval_samples_per_second": 109.691, |
|
"eval_steps_per_second": 13.821, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_exact_match": 20.0, |
|
"eval_f1": 28.128880727116037, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3028, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3047, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3184, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.590632911392405, |
|
"eval_loss": 2.931525707244873, |
|
"eval_runtime": 4.4989, |
|
"eval_samples_per_second": 111.138, |
|
"eval_steps_per_second": 14.003, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_exact_match": 21.2, |
|
"eval_f1": 30.12248362748364, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3101, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5896962025316456, |
|
"eval_loss": 2.9116382598876953, |
|
"eval_runtime": 4.6155, |
|
"eval_samples_per_second": 108.331, |
|
"eval_steps_per_second": 13.65, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_exact_match": 19.2, |
|
"eval_f1": 27.90991785991786, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3063, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3041, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3164, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5902025316455696, |
|
"eval_loss": 2.920793056488037, |
|
"eval_runtime": 4.4066, |
|
"eval_samples_per_second": 113.467, |
|
"eval_steps_per_second": 14.297, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_exact_match": 18.0, |
|
"eval_f1": 27.415170385170395, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2957, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3074, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5908607594936709, |
|
"eval_loss": 2.9385440349578857, |
|
"eval_runtime": 4.8175, |
|
"eval_samples_per_second": 103.788, |
|
"eval_steps_per_second": 13.077, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_exact_match": 21.2, |
|
"eval_f1": 29.687630702630706, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3013, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3002, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3107, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5891645569620253, |
|
"eval_loss": 2.9519243240356445, |
|
"eval_runtime": 4.5619, |
|
"eval_samples_per_second": 109.603, |
|
"eval_steps_per_second": 13.81, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_exact_match": 19.8, |
|
"eval_f1": 28.670117105117104, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2892, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3054, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5898227848101266, |
|
"eval_loss": 3.010847568511963, |
|
"eval_runtime": 4.718, |
|
"eval_samples_per_second": 105.977, |
|
"eval_steps_per_second": 13.353, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_exact_match": 19.6, |
|
"eval_f1": 27.744197632432932, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2959, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.297, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.309, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5903544303797469, |
|
"eval_loss": 3.003683567047119, |
|
"eval_runtime": 4.4085, |
|
"eval_samples_per_second": 113.418, |
|
"eval_steps_per_second": 14.291, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 28.64154694978225, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2883, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3005, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5898481012658228, |
|
"eval_loss": 3.0279438495635986, |
|
"eval_runtime": 4.7099, |
|
"eval_samples_per_second": 106.16, |
|
"eval_steps_per_second": 13.376, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_exact_match": 21.0, |
|
"eval_f1": 28.707790706026003, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2959, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2911, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3127, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5883291139240506, |
|
"eval_loss": 2.9650285243988037, |
|
"eval_runtime": 4.711, |
|
"eval_samples_per_second": 106.135, |
|
"eval_steps_per_second": 13.373, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_exact_match": 11.4, |
|
"eval_f1": 20.32415197623728, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2868, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2989, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.5895696202531645, |
|
"eval_loss": 3.013432502746582, |
|
"eval_runtime": 4.8101, |
|
"eval_samples_per_second": 103.947, |
|
"eval_steps_per_second": 13.097, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_exact_match": 10.8, |
|
"eval_f1": 17.55567536105352, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2948, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2921, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3072, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5885822784810126, |
|
"eval_loss": 3.0298521518707275, |
|
"eval_runtime": 4.7113, |
|
"eval_samples_per_second": 106.128, |
|
"eval_steps_per_second": 13.372, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.7666666666666667, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2914, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2982, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.5870126582278481, |
|
"eval_loss": 3.0277092456817627, |
|
"eval_runtime": 4.4065, |
|
"eval_samples_per_second": 113.468, |
|
"eval_steps_per_second": 14.297, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_exact_match": 7.6, |
|
"eval_f1": 9.586507936507935, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3014, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2907, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3029, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5877215189873418, |
|
"eval_loss": 3.02402925491333, |
|
"eval_runtime": 4.7089, |
|
"eval_samples_per_second": 106.182, |
|
"eval_steps_per_second": 13.379, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_exact_match": 4.2, |
|
"eval_f1": 7.583745822888591, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2853, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2989, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.5874430379746836, |
|
"eval_loss": 3.0006086826324463, |
|
"eval_runtime": 4.5617, |
|
"eval_samples_per_second": 109.609, |
|
"eval_steps_per_second": 13.811, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.5121500721500722, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2904, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.29, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3009, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.5873417721518988, |
|
"eval_loss": 3.058345317840576, |
|
"eval_runtime": 4.708, |
|
"eval_samples_per_second": 106.202, |
|
"eval_steps_per_second": 13.381, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_exact_match": 2.2, |
|
"eval_f1": 4.8883016983016985, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2793, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2905, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.5867848101265822, |
|
"eval_loss": 3.1007750034332275, |
|
"eval_runtime": 4.5013, |
|
"eval_samples_per_second": 111.079, |
|
"eval_steps_per_second": 13.996, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_exact_match": 1.2, |
|
"eval_f1": 2.0656084656084652, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.2964, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 27.6, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3021, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3237, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5851139240506329, |
|
"eval_loss": 3.192138671875, |
|
"eval_runtime": 4.5595, |
|
"eval_samples_per_second": 109.661, |
|
"eval_steps_per_second": 13.817, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_exact_match": 0.8, |
|
"eval_f1": 1.5059791467236656, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3584, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 0.0003, |
|
"loss": 0.4738, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.5789873417721519, |
|
"eval_loss": 3.32121205329895, |
|
"eval_runtime": 4.4073, |
|
"eval_samples_per_second": 113.447, |
|
"eval_steps_per_second": 14.294, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_exact_match": 3.8, |
|
"eval_f1": 6.689863653841047, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"learning_rate": 0.0003, |
|
"loss": 0.8318, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 29.6, |
|
"learning_rate": 0.0003, |
|
"loss": 1.646, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 0.0003, |
|
"loss": 2.5106, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.5371645569620254, |
|
"eval_loss": 3.9834704399108887, |
|
"eval_runtime": 5.1142, |
|
"eval_samples_per_second": 97.766, |
|
"eval_steps_per_second": 12.319, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_exact_match": 3.8, |
|
"eval_f1": 5.247779866332498, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 0.0003, |
|
"loss": 2.5833, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 30.8, |
|
"learning_rate": 0.0003, |
|
"loss": 2.4442, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.5435443037974683, |
|
"eval_loss": 3.8099365234375, |
|
"eval_runtime": 5.1354, |
|
"eval_samples_per_second": 97.364, |
|
"eval_steps_per_second": 12.268, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_exact_match": 8.0, |
|
"eval_f1": 11.652945717183004, |
|
"step": 7750 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 12500, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 4.955087674005258e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|