|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 32.0, |
|
"eval_steps": 500, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0369, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6128205128205129, |
|
"eval_loss": 1.2953312397003174, |
|
"eval_runtime": 4.1114, |
|
"eval_samples_per_second": 121.614, |
|
"eval_steps_per_second": 15.323, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 27.2, |
|
"eval_f1": 37.44218938149973, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3264, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2821, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6146153846153846, |
|
"eval_loss": 1.2741013765335083, |
|
"eval_runtime": 4.8302, |
|
"eval_samples_per_second": 103.516, |
|
"eval_steps_per_second": 13.043, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 25.6, |
|
"eval_f1": 38.026666666666664, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2348, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1987, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6161538461538462, |
|
"eval_loss": 1.2714948654174805, |
|
"eval_runtime": 4.868, |
|
"eval_samples_per_second": 102.712, |
|
"eval_steps_per_second": 12.942, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 28.0, |
|
"eval_f1": 40.6962091503268, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1537, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 3e-05, |
|
"loss": 1.066, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6151282051282051, |
|
"eval_loss": 1.3011157512664795, |
|
"eval_runtime": 4.407, |
|
"eval_samples_per_second": 113.455, |
|
"eval_steps_per_second": 14.295, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 26.2, |
|
"eval_f1": 38.19190476190477, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 3e-05, |
|
"loss": 1.001, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9381, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6125641025641025, |
|
"eval_loss": 1.3728467226028442, |
|
"eval_runtime": 5.153, |
|
"eval_samples_per_second": 97.03, |
|
"eval_steps_per_second": 12.226, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 24.6, |
|
"eval_f1": 37.08666666666668, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8667, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8238, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6090769230769231, |
|
"eval_loss": 1.4598820209503174, |
|
"eval_runtime": 4.0994, |
|
"eval_samples_per_second": 121.97, |
|
"eval_steps_per_second": 15.368, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 23.0, |
|
"eval_f1": 35.324444444444445, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7461, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7289, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6064102564102564, |
|
"eval_loss": 1.5455493927001953, |
|
"eval_runtime": 4.1683, |
|
"eval_samples_per_second": 119.952, |
|
"eval_steps_per_second": 15.114, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 21.8, |
|
"eval_f1": 34.57222222222222, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6464, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6559, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6025641025641025, |
|
"eval_loss": 1.6359158754348755, |
|
"eval_runtime": 4.3161, |
|
"eval_samples_per_second": 115.844, |
|
"eval_steps_per_second": 14.596, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 33.70507936507938, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5733, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6006153846153847, |
|
"eval_loss": 1.7149169445037842, |
|
"eval_runtime": 4.1084, |
|
"eval_samples_per_second": 121.702, |
|
"eval_steps_per_second": 15.334, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 22.0, |
|
"eval_f1": 33.980476190476196, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5859, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5336, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5989230769230769, |
|
"eval_loss": 1.8006359338760376, |
|
"eval_runtime": 4.4039, |
|
"eval_samples_per_second": 113.535, |
|
"eval_steps_per_second": 14.305, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 20.4, |
|
"eval_f1": 32.68793650793652, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5379, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5116, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5982051282051282, |
|
"eval_loss": 1.8851306438446045, |
|
"eval_runtime": 4.8059, |
|
"eval_samples_per_second": 104.038, |
|
"eval_steps_per_second": 13.109, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_exact_match": 19.8, |
|
"eval_f1": 31.627460317460333, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5025, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4934, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5982051282051282, |
|
"eval_loss": 1.9262274503707886, |
|
"eval_runtime": 4.2024, |
|
"eval_samples_per_second": 118.98, |
|
"eval_steps_per_second": 14.992, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_exact_match": 19.2, |
|
"eval_f1": 30.531746031746042, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4827, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4823, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5974358974358974, |
|
"eval_loss": 1.9412546157836914, |
|
"eval_runtime": 4.4921, |
|
"eval_samples_per_second": 111.306, |
|
"eval_steps_per_second": 14.025, |
|
"step": 2437 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_exact_match": 18.8, |
|
"eval_f1": 30.277619047619062, |
|
"step": 2437 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.467, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.47, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5967179487179487, |
|
"eval_loss": 2.01212739944458, |
|
"eval_runtime": 4.0908, |
|
"eval_samples_per_second": 122.226, |
|
"eval_steps_per_second": 15.4, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_exact_match": 18.8, |
|
"eval_f1": 30.478571428571446, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.454, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4661, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5968205128205128, |
|
"eval_loss": 2.0249853134155273, |
|
"eval_runtime": 4.0875, |
|
"eval_samples_per_second": 122.323, |
|
"eval_steps_per_second": 15.413, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 32.526031746031755, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4423, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.462, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.598974358974359, |
|
"eval_loss": 1.9804810285568237, |
|
"eval_runtime": 4.0893, |
|
"eval_samples_per_second": 122.269, |
|
"eval_steps_per_second": 15.406, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_exact_match": 19.8, |
|
"eval_f1": 31.533809523809534, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4357, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5975897435897436, |
|
"eval_loss": 2.0656166076660156, |
|
"eval_runtime": 4.1949, |
|
"eval_samples_per_second": 119.193, |
|
"eval_steps_per_second": 15.018, |
|
"step": 3187 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_exact_match": 18.8, |
|
"eval_f1": 31.295238095238105, |
|
"step": 3187 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4511, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4348, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5978974358974359, |
|
"eval_loss": 2.030846118927002, |
|
"eval_runtime": 4.1974, |
|
"eval_samples_per_second": 119.122, |
|
"eval_steps_per_second": 15.009, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_exact_match": 19.6, |
|
"eval_f1": 31.137619047619065, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4436, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4331, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.598974358974359, |
|
"eval_loss": 2.062859058380127, |
|
"eval_runtime": 4.7948, |
|
"eval_samples_per_second": 104.279, |
|
"eval_steps_per_second": 13.139, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 32.42380952380953, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4338, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 19.73, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4341, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5983076923076923, |
|
"eval_loss": 2.0815415382385254, |
|
"eval_runtime": 4.4041, |
|
"eval_samples_per_second": 113.531, |
|
"eval_steps_per_second": 14.305, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_exact_match": 21.6, |
|
"eval_f1": 33.345238095238116, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4316, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.434, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.5968205128205128, |
|
"eval_loss": 2.1252710819244385, |
|
"eval_runtime": 4.3531, |
|
"eval_samples_per_second": 114.862, |
|
"eval_steps_per_second": 14.473, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_exact_match": 21.4, |
|
"eval_f1": 32.32285714285715, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4255, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4335, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5971282051282051, |
|
"eval_loss": 2.178868293762207, |
|
"eval_runtime": 4.1999, |
|
"eval_samples_per_second": 119.05, |
|
"eval_steps_per_second": 15.0, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_exact_match": 21.2, |
|
"eval_f1": 31.35571428571428, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4201, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 22.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4346, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.5952307692307692, |
|
"eval_loss": 2.145519971847534, |
|
"eval_runtime": 4.0955, |
|
"eval_samples_per_second": 122.084, |
|
"eval_steps_per_second": 15.383, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_exact_match": 21.0, |
|
"eval_f1": 32.04714285714286, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 23.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4177, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4326, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5971282051282051, |
|
"eval_loss": 2.1990349292755127, |
|
"eval_runtime": 4.0925, |
|
"eval_samples_per_second": 122.176, |
|
"eval_steps_per_second": 15.394, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_exact_match": 19.6, |
|
"eval_f1": 30.60428571428572, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4139, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.5976410256410256, |
|
"eval_loss": 2.1889517307281494, |
|
"eval_runtime": 4.1957, |
|
"eval_samples_per_second": 119.17, |
|
"eval_steps_per_second": 15.015, |
|
"step": 4687 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_exact_match": 20.4, |
|
"eval_f1": 31.052554112554116, |
|
"step": 4687 |
|
}, |
|
{ |
|
"epoch": 25.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4268, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4139, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.5967692307692307, |
|
"eval_loss": 2.1938648223876953, |
|
"eval_runtime": 4.417, |
|
"eval_samples_per_second": 113.198, |
|
"eval_steps_per_second": 14.263, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_exact_match": 20.4, |
|
"eval_f1": 32.143030303030315, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 26.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4222, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4162, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.5964615384615385, |
|
"eval_loss": 2.218991279602051, |
|
"eval_runtime": 4.2318, |
|
"eval_samples_per_second": 118.152, |
|
"eval_steps_per_second": 14.887, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_exact_match": 21.2, |
|
"eval_f1": 31.969523809523817, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.419, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 27.73, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4177, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5954871794871794, |
|
"eval_loss": 2.2781052589416504, |
|
"eval_runtime": 4.1923, |
|
"eval_samples_per_second": 119.265, |
|
"eval_steps_per_second": 15.027, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_exact_match": 19.8, |
|
"eval_f1": 30.879220779220788, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 28.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4143, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4173, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.5975897435897436, |
|
"eval_loss": 2.2680609226226807, |
|
"eval_runtime": 4.401, |
|
"eval_samples_per_second": 113.61, |
|
"eval_steps_per_second": 14.315, |
|
"step": 5437 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 31.83809523809525, |
|
"step": 5437 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4117, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 29.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4187, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.5958974358974359, |
|
"eval_loss": 2.2996103763580322, |
|
"eval_runtime": 4.199, |
|
"eval_samples_per_second": 119.077, |
|
"eval_steps_per_second": 15.004, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_exact_match": 20.4, |
|
"eval_f1": 32.61430014430016, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.408, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 30.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4199, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.5981025641025641, |
|
"eval_loss": 2.239492177963257, |
|
"eval_runtime": 4.8298, |
|
"eval_samples_per_second": 103.523, |
|
"eval_steps_per_second": 13.044, |
|
"step": 5812 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_exact_match": 20.2, |
|
"eval_f1": 31.465887445887457, |
|
"step": 5812 |
|
}, |
|
{ |
|
"epoch": 31.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4055, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4213, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5957435897435898, |
|
"eval_loss": 2.299053907394409, |
|
"eval_runtime": 4.0871, |
|
"eval_samples_per_second": 122.335, |
|
"eval_steps_per_second": 15.414, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_exact_match": 20.0, |
|
"eval_f1": 31.56428571428572, |
|
"step": 6000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 9350, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 2.2387994473057485e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|