|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.86666666666667, |
|
"eval_steps": 500, |
|
"global_step": 9350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0369, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6128205128205129, |
|
"eval_loss": 1.2953312397003174, |
|
"eval_runtime": 4.1114, |
|
"eval_samples_per_second": 121.614, |
|
"eval_steps_per_second": 15.323, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 27.2, |
|
"eval_f1": 37.44218938149973, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3264, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2821, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6146153846153846, |
|
"eval_loss": 1.2741013765335083, |
|
"eval_runtime": 4.8302, |
|
"eval_samples_per_second": 103.516, |
|
"eval_steps_per_second": 13.043, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 25.6, |
|
"eval_f1": 38.026666666666664, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2348, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1987, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6161538461538462, |
|
"eval_loss": 1.2714948654174805, |
|
"eval_runtime": 4.868, |
|
"eval_samples_per_second": 102.712, |
|
"eval_steps_per_second": 12.942, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 28.0, |
|
"eval_f1": 40.6962091503268, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1537, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 3e-05, |
|
"loss": 1.066, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6151282051282051, |
|
"eval_loss": 1.3011157512664795, |
|
"eval_runtime": 4.407, |
|
"eval_samples_per_second": 113.455, |
|
"eval_steps_per_second": 14.295, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 26.2, |
|
"eval_f1": 38.19190476190477, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 3e-05, |
|
"loss": 1.001, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9381, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6125641025641025, |
|
"eval_loss": 1.3728467226028442, |
|
"eval_runtime": 5.153, |
|
"eval_samples_per_second": 97.03, |
|
"eval_steps_per_second": 12.226, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 24.6, |
|
"eval_f1": 37.08666666666668, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8667, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8238, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6090769230769231, |
|
"eval_loss": 1.4598820209503174, |
|
"eval_runtime": 4.0994, |
|
"eval_samples_per_second": 121.97, |
|
"eval_steps_per_second": 15.368, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 23.0, |
|
"eval_f1": 35.324444444444445, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7461, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7289, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6064102564102564, |
|
"eval_loss": 1.5455493927001953, |
|
"eval_runtime": 4.1683, |
|
"eval_samples_per_second": 119.952, |
|
"eval_steps_per_second": 15.114, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 21.8, |
|
"eval_f1": 34.57222222222222, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6464, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6559, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6025641025641025, |
|
"eval_loss": 1.6359158754348755, |
|
"eval_runtime": 4.3161, |
|
"eval_samples_per_second": 115.844, |
|
"eval_steps_per_second": 14.596, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 33.70507936507938, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5733, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6006153846153847, |
|
"eval_loss": 1.7149169445037842, |
|
"eval_runtime": 4.1084, |
|
"eval_samples_per_second": 121.702, |
|
"eval_steps_per_second": 15.334, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 22.0, |
|
"eval_f1": 33.980476190476196, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5859, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5336, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5989230769230769, |
|
"eval_loss": 1.8006359338760376, |
|
"eval_runtime": 4.4039, |
|
"eval_samples_per_second": 113.535, |
|
"eval_steps_per_second": 14.305, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 20.4, |
|
"eval_f1": 32.68793650793652, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5379, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5116, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5982051282051282, |
|
"eval_loss": 1.8851306438446045, |
|
"eval_runtime": 4.8059, |
|
"eval_samples_per_second": 104.038, |
|
"eval_steps_per_second": 13.109, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_exact_match": 19.8, |
|
"eval_f1": 31.627460317460333, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5025, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4934, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5982051282051282, |
|
"eval_loss": 1.9262274503707886, |
|
"eval_runtime": 4.2024, |
|
"eval_samples_per_second": 118.98, |
|
"eval_steps_per_second": 14.992, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_exact_match": 19.2, |
|
"eval_f1": 30.531746031746042, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4827, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4823, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5974358974358974, |
|
"eval_loss": 1.9412546157836914, |
|
"eval_runtime": 4.4921, |
|
"eval_samples_per_second": 111.306, |
|
"eval_steps_per_second": 14.025, |
|
"step": 2437 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_exact_match": 18.8, |
|
"eval_f1": 30.277619047619062, |
|
"step": 2437 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.467, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.47, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5967179487179487, |
|
"eval_loss": 2.01212739944458, |
|
"eval_runtime": 4.0908, |
|
"eval_samples_per_second": 122.226, |
|
"eval_steps_per_second": 15.4, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_exact_match": 18.8, |
|
"eval_f1": 30.478571428571446, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.454, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4661, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5968205128205128, |
|
"eval_loss": 2.0249853134155273, |
|
"eval_runtime": 4.0875, |
|
"eval_samples_per_second": 122.323, |
|
"eval_steps_per_second": 15.413, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 32.526031746031755, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4423, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.462, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.598974358974359, |
|
"eval_loss": 1.9804810285568237, |
|
"eval_runtime": 4.0893, |
|
"eval_samples_per_second": 122.269, |
|
"eval_steps_per_second": 15.406, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_exact_match": 19.8, |
|
"eval_f1": 31.533809523809534, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4357, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5975897435897436, |
|
"eval_loss": 2.0656166076660156, |
|
"eval_runtime": 4.1949, |
|
"eval_samples_per_second": 119.193, |
|
"eval_steps_per_second": 15.018, |
|
"step": 3187 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_exact_match": 18.8, |
|
"eval_f1": 31.295238095238105, |
|
"step": 3187 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4511, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4348, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5978974358974359, |
|
"eval_loss": 2.030846118927002, |
|
"eval_runtime": 4.1974, |
|
"eval_samples_per_second": 119.122, |
|
"eval_steps_per_second": 15.009, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_exact_match": 19.6, |
|
"eval_f1": 31.137619047619065, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4436, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4331, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.598974358974359, |
|
"eval_loss": 2.062859058380127, |
|
"eval_runtime": 4.7948, |
|
"eval_samples_per_second": 104.279, |
|
"eval_steps_per_second": 13.139, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 32.42380952380953, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4338, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 19.73, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4341, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5983076923076923, |
|
"eval_loss": 2.0815415382385254, |
|
"eval_runtime": 4.4041, |
|
"eval_samples_per_second": 113.531, |
|
"eval_steps_per_second": 14.305, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_exact_match": 21.6, |
|
"eval_f1": 33.345238095238116, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4316, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.434, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.5968205128205128, |
|
"eval_loss": 2.1252710819244385, |
|
"eval_runtime": 4.3531, |
|
"eval_samples_per_second": 114.862, |
|
"eval_steps_per_second": 14.473, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_exact_match": 21.4, |
|
"eval_f1": 32.32285714285715, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4255, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4335, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5971282051282051, |
|
"eval_loss": 2.178868293762207, |
|
"eval_runtime": 4.1999, |
|
"eval_samples_per_second": 119.05, |
|
"eval_steps_per_second": 15.0, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_exact_match": 21.2, |
|
"eval_f1": 31.35571428571428, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4201, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 22.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4346, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.5952307692307692, |
|
"eval_loss": 2.145519971847534, |
|
"eval_runtime": 4.0955, |
|
"eval_samples_per_second": 122.084, |
|
"eval_steps_per_second": 15.383, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_exact_match": 21.0, |
|
"eval_f1": 32.04714285714286, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 23.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4177, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4326, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5971282051282051, |
|
"eval_loss": 2.1990349292755127, |
|
"eval_runtime": 4.0925, |
|
"eval_samples_per_second": 122.176, |
|
"eval_steps_per_second": 15.394, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_exact_match": 19.6, |
|
"eval_f1": 30.60428571428572, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4139, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.5976410256410256, |
|
"eval_loss": 2.1889517307281494, |
|
"eval_runtime": 4.1957, |
|
"eval_samples_per_second": 119.17, |
|
"eval_steps_per_second": 15.015, |
|
"step": 4687 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_exact_match": 20.4, |
|
"eval_f1": 31.052554112554116, |
|
"step": 4687 |
|
}, |
|
{ |
|
"epoch": 25.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4268, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4139, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.5967692307692307, |
|
"eval_loss": 2.1938648223876953, |
|
"eval_runtime": 4.417, |
|
"eval_samples_per_second": 113.198, |
|
"eval_steps_per_second": 14.263, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_exact_match": 20.4, |
|
"eval_f1": 32.143030303030315, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 26.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4222, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4162, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.5964615384615385, |
|
"eval_loss": 2.218991279602051, |
|
"eval_runtime": 4.2318, |
|
"eval_samples_per_second": 118.152, |
|
"eval_steps_per_second": 14.887, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_exact_match": 21.2, |
|
"eval_f1": 31.969523809523817, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.419, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 27.73, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4177, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5954871794871794, |
|
"eval_loss": 2.2781052589416504, |
|
"eval_runtime": 4.1923, |
|
"eval_samples_per_second": 119.265, |
|
"eval_steps_per_second": 15.027, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_exact_match": 19.8, |
|
"eval_f1": 30.879220779220788, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 28.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4143, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4173, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.5975897435897436, |
|
"eval_loss": 2.2680609226226807, |
|
"eval_runtime": 4.401, |
|
"eval_samples_per_second": 113.61, |
|
"eval_steps_per_second": 14.315, |
|
"step": 5437 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 31.83809523809525, |
|
"step": 5437 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4117, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 29.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4187, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.5958974358974359, |
|
"eval_loss": 2.2996103763580322, |
|
"eval_runtime": 4.199, |
|
"eval_samples_per_second": 119.077, |
|
"eval_steps_per_second": 15.004, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_exact_match": 20.4, |
|
"eval_f1": 32.61430014430016, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.408, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 30.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4199, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.5981025641025641, |
|
"eval_loss": 2.239492177963257, |
|
"eval_runtime": 4.8298, |
|
"eval_samples_per_second": 103.523, |
|
"eval_steps_per_second": 13.044, |
|
"step": 5812 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_exact_match": 20.2, |
|
"eval_f1": 31.465887445887457, |
|
"step": 5812 |
|
}, |
|
{ |
|
"epoch": 31.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4055, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4213, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5957435897435898, |
|
"eval_loss": 2.299053907394409, |
|
"eval_runtime": 4.0871, |
|
"eval_samples_per_second": 122.335, |
|
"eval_steps_per_second": 15.414, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_exact_match": 20.0, |
|
"eval_f1": 31.56428571428572, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 32.53, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4015, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.5952307692307692, |
|
"eval_loss": 2.3223156929016113, |
|
"eval_runtime": 4.1951, |
|
"eval_samples_per_second": 119.187, |
|
"eval_steps_per_second": 15.018, |
|
"step": 6187 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_exact_match": 21.2, |
|
"eval_f1": 32.325714285714284, |
|
"step": 6187 |
|
}, |
|
{ |
|
"epoch": 33.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4161, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4058, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.5956923076923077, |
|
"eval_loss": 2.3266167640686035, |
|
"eval_runtime": 4.0888, |
|
"eval_samples_per_second": 122.286, |
|
"eval_steps_per_second": 15.408, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_exact_match": 20.2, |
|
"eval_f1": 31.896666666666672, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 34.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4118, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 34.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4056, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.5946153846153847, |
|
"eval_loss": 2.3779330253601074, |
|
"eval_runtime": 4.1964, |
|
"eval_samples_per_second": 119.15, |
|
"eval_steps_per_second": 15.013, |
|
"step": 6562 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 32.23207792207793, |
|
"step": 6562 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4103, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 35.73, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4078, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.5950769230769231, |
|
"eval_loss": 2.345280408859253, |
|
"eval_runtime": 5.1209, |
|
"eval_samples_per_second": 97.64, |
|
"eval_steps_per_second": 12.303, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_exact_match": 21.0, |
|
"eval_f1": 32.03683982683983, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 36.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4048, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4097, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.5965128205128205, |
|
"eval_loss": 2.3379180431365967, |
|
"eval_runtime": 4.1922, |
|
"eval_samples_per_second": 119.268, |
|
"eval_steps_per_second": 15.028, |
|
"step": 6937 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 32.22937950937952, |
|
"step": 6937 |
|
}, |
|
{ |
|
"epoch": 37.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4059, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 37.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4105, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.5969230769230769, |
|
"eval_loss": 2.362399101257324, |
|
"eval_runtime": 4.1963, |
|
"eval_samples_per_second": 119.153, |
|
"eval_steps_per_second": 15.013, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 31.836363636363643, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4013, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 38.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4116, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.5961538461538461, |
|
"eval_loss": 2.3846352100372314, |
|
"eval_runtime": 4.0932, |
|
"eval_samples_per_second": 122.154, |
|
"eval_steps_per_second": 15.391, |
|
"step": 7312 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_exact_match": 20.0, |
|
"eval_f1": 31.287619047619053, |
|
"step": 7312 |
|
}, |
|
{ |
|
"epoch": 39.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3993, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4121, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.5944615384615385, |
|
"eval_loss": 2.3748481273651123, |
|
"eval_runtime": 4.9211, |
|
"eval_samples_per_second": 101.603, |
|
"eval_steps_per_second": 12.802, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_exact_match": 20.4, |
|
"eval_f1": 32.21826839826841, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 40.53, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3973, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.5955897435897436, |
|
"eval_loss": 2.3797402381896973, |
|
"eval_runtime": 4.0885, |
|
"eval_samples_per_second": 122.293, |
|
"eval_steps_per_second": 15.409, |
|
"step": 7687 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_exact_match": 21.8, |
|
"eval_f1": 33.78445887445889, |
|
"step": 7687 |
|
}, |
|
{ |
|
"epoch": 41.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4092, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3985, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.5966666666666667, |
|
"eval_loss": 2.359941005706787, |
|
"eval_runtime": 4.319, |
|
"eval_samples_per_second": 115.769, |
|
"eval_steps_per_second": 14.587, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_exact_match": 22.6, |
|
"eval_f1": 34.189696969696975, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 42.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4075, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 42.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4014, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.5970769230769231, |
|
"eval_loss": 2.347489356994629, |
|
"eval_runtime": 4.0917, |
|
"eval_samples_per_second": 122.197, |
|
"eval_steps_per_second": 15.397, |
|
"step": 8062 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_exact_match": 21.4, |
|
"eval_f1": 33.01255411255412, |
|
"step": 8062 |
|
}, |
|
{ |
|
"epoch": 43.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4032, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 43.73, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4032, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.5987179487179487, |
|
"eval_loss": 2.3937387466430664, |
|
"eval_runtime": 4.3012, |
|
"eval_samples_per_second": 116.246, |
|
"eval_steps_per_second": 14.647, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 31.90666666666667, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 44.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4028, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.5967179487179487, |
|
"eval_loss": 2.386258363723755, |
|
"eval_runtime": 4.1018, |
|
"eval_samples_per_second": 121.897, |
|
"eval_steps_per_second": 15.359, |
|
"step": 8437 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_exact_match": 20.2, |
|
"eval_f1": 31.19238095238096, |
|
"step": 8437 |
|
}, |
|
{ |
|
"epoch": 45.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3992, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 45.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4027, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.5956410256410256, |
|
"eval_loss": 2.4195497035980225, |
|
"eval_runtime": 4.0913, |
|
"eval_samples_per_second": 122.211, |
|
"eval_steps_per_second": 15.399, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_exact_match": 21.4, |
|
"eval_f1": 32.3282683982684, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3976, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 46.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4046, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.597025641025641, |
|
"eval_loss": 2.3832449913024902, |
|
"eval_runtime": 4.1919, |
|
"eval_samples_per_second": 119.277, |
|
"eval_steps_per_second": 15.029, |
|
"step": 8812 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_exact_match": 20.8, |
|
"eval_f1": 32.23841269841271, |
|
"step": 8812 |
|
}, |
|
{ |
|
"epoch": 47.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3948, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4067, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.5973333333333334, |
|
"eval_loss": 2.380531072616577, |
|
"eval_runtime": 4.1918, |
|
"eval_samples_per_second": 119.282, |
|
"eval_steps_per_second": 15.03, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_exact_match": 22.4, |
|
"eval_f1": 32.9035064935065, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 48.53, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3923, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.5956923076923077, |
|
"eval_loss": 2.4460361003875732, |
|
"eval_runtime": 4.8374, |
|
"eval_samples_per_second": 103.362, |
|
"eval_steps_per_second": 13.024, |
|
"step": 9187 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_exact_match": 21.6, |
|
"eval_f1": 32.555714285714295, |
|
"step": 9187 |
|
}, |
|
{ |
|
"epoch": 49.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4045, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3949, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 49.87, |
|
"eval_accuracy": 0.5965641025641025, |
|
"eval_loss": 2.4443256855010986, |
|
"eval_runtime": 4.0946, |
|
"eval_samples_per_second": 122.113, |
|
"eval_steps_per_second": 15.386, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 49.87, |
|
"eval_exact_match": 21.8, |
|
"eval_f1": 32.88666666666667, |
|
"step": 9350 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 9350, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 3.489165488087368e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|