|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 32.0, |
|
"eval_steps": 500, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1914, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 1.798, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6067341772151899, |
|
"eval_loss": 1.8213441371917725, |
|
"eval_runtime": 4.8116, |
|
"eval_samples_per_second": 103.916, |
|
"eval_steps_per_second": 13.093, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 25.4, |
|
"eval_f1": 35.50992875545509, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7534, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7163, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6077215189873417, |
|
"eval_loss": 1.8046445846557617, |
|
"eval_runtime": 5.1228, |
|
"eval_samples_per_second": 97.603, |
|
"eval_steps_per_second": 12.298, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 25.8, |
|
"eval_f1": 35.76454372424963, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5854, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5869, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6071392405063291, |
|
"eval_loss": 1.829284906387329, |
|
"eval_runtime": 4.5692, |
|
"eval_samples_per_second": 109.429, |
|
"eval_steps_per_second": 13.788, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 24.4, |
|
"eval_f1": 34.46831890331891, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5039, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4165, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4349, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6042784810126582, |
|
"eval_loss": 1.89736008644104, |
|
"eval_runtime": 4.5698, |
|
"eval_samples_per_second": 109.415, |
|
"eval_steps_per_second": 13.786, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 22.8, |
|
"eval_f1": 32.991320346320364, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2624, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3111, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.601493670886076, |
|
"eval_loss": 1.9769378900527954, |
|
"eval_runtime": 4.7168, |
|
"eval_samples_per_second": 106.005, |
|
"eval_steps_per_second": 13.357, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 22.6, |
|
"eval_f1": 32.46891774891776, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 5e-05, |
|
"loss": 1.226, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1608, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 5e-05, |
|
"loss": 1.197, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.599240506329114, |
|
"eval_loss": 2.0634872913360596, |
|
"eval_runtime": 4.4047, |
|
"eval_samples_per_second": 113.515, |
|
"eval_steps_per_second": 14.303, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 21.4, |
|
"eval_f1": 29.995800865800874, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0336, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0729, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5975443037974684, |
|
"eval_loss": 2.1523237228393555, |
|
"eval_runtime": 4.4025, |
|
"eval_samples_per_second": 113.571, |
|
"eval_steps_per_second": 14.31, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 20.8, |
|
"eval_f1": 29.054994058229354, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0005, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9437, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9833, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5947341772151898, |
|
"eval_loss": 2.2639756202697754, |
|
"eval_runtime": 4.7031, |
|
"eval_samples_per_second": 106.312, |
|
"eval_steps_per_second": 13.395, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 20.8, |
|
"eval_f1": 28.71228732052262, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.823, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8672, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5924050632911393, |
|
"eval_loss": 2.3642578125, |
|
"eval_runtime": 4.4051, |
|
"eval_samples_per_second": 113.505, |
|
"eval_steps_per_second": 14.302, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 22.4, |
|
"eval_f1": 30.14020120686362, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8032, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7566, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7883, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5908101265822785, |
|
"eval_loss": 2.4598007202148438, |
|
"eval_runtime": 4.5544, |
|
"eval_samples_per_second": 109.783, |
|
"eval_steps_per_second": 13.833, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 21.4, |
|
"eval_f1": 29.271013039248334, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6488, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6879, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.588987341772152, |
|
"eval_loss": 2.5668857097625732, |
|
"eval_runtime": 4.6769, |
|
"eval_samples_per_second": 106.908, |
|
"eval_steps_per_second": 13.47, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_exact_match": 20.2, |
|
"eval_f1": 29.08202869026399, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6502, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.602, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6295, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5885316455696202, |
|
"eval_loss": 2.700011730194092, |
|
"eval_runtime": 5.1309, |
|
"eval_samples_per_second": 97.45, |
|
"eval_steps_per_second": 12.279, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_exact_match": 19.6, |
|
"eval_f1": 27.678461864932462, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5228, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5545, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5850886075949367, |
|
"eval_loss": 2.8281185626983643, |
|
"eval_runtime": 4.5558, |
|
"eval_samples_per_second": 109.749, |
|
"eval_steps_per_second": 13.828, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_exact_match": 20.0, |
|
"eval_f1": 28.273127941889552, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5244, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4924, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5208, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5852911392405064, |
|
"eval_loss": 2.879397392272949, |
|
"eval_runtime": 5.1301, |
|
"eval_samples_per_second": 97.464, |
|
"eval_steps_per_second": 12.28, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_exact_match": 20.2, |
|
"eval_f1": 28.16958851605911, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4405, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4679, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.586253164556962, |
|
"eval_loss": 2.9183950424194336, |
|
"eval_runtime": 4.8449, |
|
"eval_samples_per_second": 103.201, |
|
"eval_steps_per_second": 13.003, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_exact_match": 19.8, |
|
"eval_f1": 27.632893381128678, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4443, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4235, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4464, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5852151898734177, |
|
"eval_loss": 3.0790698528289795, |
|
"eval_runtime": 4.7018, |
|
"eval_samples_per_second": 106.343, |
|
"eval_steps_per_second": 13.399, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_exact_match": 21.0, |
|
"eval_f1": 29.560525356995953, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3919, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4136, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5855696202531645, |
|
"eval_loss": 3.0832109451293945, |
|
"eval_runtime": 4.7575, |
|
"eval_samples_per_second": 105.096, |
|
"eval_steps_per_second": 13.242, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_exact_match": 20.4, |
|
"eval_f1": 28.69772782772784, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3902, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3841, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4021, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5846835443037974, |
|
"eval_loss": 3.0944228172302246, |
|
"eval_runtime": 4.7087, |
|
"eval_samples_per_second": 106.186, |
|
"eval_steps_per_second": 13.379, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_exact_match": 20.0, |
|
"eval_f1": 28.558940281940288, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3577, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3776, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5828354430379746, |
|
"eval_loss": 3.2120306491851807, |
|
"eval_runtime": 4.5614, |
|
"eval_samples_per_second": 109.616, |
|
"eval_steps_per_second": 13.812, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_exact_match": 20.2, |
|
"eval_f1": 28.91690789886443, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3617, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3575, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.373, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5839240506329114, |
|
"eval_loss": 3.229841947555542, |
|
"eval_runtime": 4.5542, |
|
"eval_samples_per_second": 109.788, |
|
"eval_steps_per_second": 13.833, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_exact_match": 20.0, |
|
"eval_f1": 29.50454927425517, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3333, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3572, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.584126582278481, |
|
"eval_loss": 3.2433719635009766, |
|
"eval_runtime": 5.128, |
|
"eval_samples_per_second": 97.503, |
|
"eval_steps_per_second": 12.285, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_exact_match": 20.0, |
|
"eval_f1": 28.610755910755913, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3415, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3365, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3517, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.5846835443037974, |
|
"eval_loss": 3.2606234550476074, |
|
"eval_runtime": 4.5466, |
|
"eval_samples_per_second": 109.971, |
|
"eval_steps_per_second": 13.856, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_exact_match": 21.2, |
|
"eval_f1": 30.38587926693191, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3228, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3374, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.5845063291139241, |
|
"eval_loss": 3.339247465133667, |
|
"eval_runtime": 4.5344, |
|
"eval_samples_per_second": 110.267, |
|
"eval_steps_per_second": 13.894, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_exact_match": 19.8, |
|
"eval_f1": 28.687225552225563, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3256, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3242, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3338, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5841012658227848, |
|
"eval_loss": 3.348928689956665, |
|
"eval_runtime": 5.042, |
|
"eval_samples_per_second": 99.167, |
|
"eval_steps_per_second": 12.495, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_exact_match": 20.2, |
|
"eval_f1": 28.578326118326117, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3099, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3286, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.5845822784810126, |
|
"eval_loss": 3.4035861492156982, |
|
"eval_runtime": 5.141, |
|
"eval_samples_per_second": 97.258, |
|
"eval_steps_per_second": 12.255, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_exact_match": 21.2, |
|
"eval_f1": 29.869212454212466, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3144, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3147, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3259, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.584886075949367, |
|
"eval_loss": 3.3877618312835693, |
|
"eval_runtime": 4.5792, |
|
"eval_samples_per_second": 109.19, |
|
"eval_steps_per_second": 13.758, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_exact_match": 19.6, |
|
"eval_f1": 28.681592185592194, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3023, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3175, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.5852911392405064, |
|
"eval_loss": 3.4960334300994873, |
|
"eval_runtime": 4.5521, |
|
"eval_samples_per_second": 109.838, |
|
"eval_steps_per_second": 13.84, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_exact_match": 21.0, |
|
"eval_f1": 28.89975690975692, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3096, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 27.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3048, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3185, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5851898734177216, |
|
"eval_loss": 3.487301826477051, |
|
"eval_runtime": 4.5552, |
|
"eval_samples_per_second": 109.764, |
|
"eval_steps_per_second": 13.83, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_exact_match": 20.4, |
|
"eval_f1": 28.829249639249642, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.296, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3117, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.5839746835443038, |
|
"eval_loss": 3.4780404567718506, |
|
"eval_runtime": 4.5544, |
|
"eval_samples_per_second": 109.784, |
|
"eval_steps_per_second": 13.833, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_exact_match": 20.0, |
|
"eval_f1": 27.875814185814193, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3015, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 29.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3017, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3125, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.5835696202531646, |
|
"eval_loss": 3.5383405685424805, |
|
"eval_runtime": 4.8879, |
|
"eval_samples_per_second": 102.293, |
|
"eval_steps_per_second": 12.889, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_exact_match": 19.2, |
|
"eval_f1": 27.704913974913982, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2933, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 30.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3041, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.5840506329113924, |
|
"eval_loss": 3.525256395339966, |
|
"eval_runtime": 4.6104, |
|
"eval_samples_per_second": 108.45, |
|
"eval_steps_per_second": 13.665, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_exact_match": 19.8, |
|
"eval_f1": 28.185307265833586, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 31.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2966, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 31.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2984, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3047, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5853164556962025, |
|
"eval_loss": 3.528280258178711, |
|
"eval_runtime": 4.7023, |
|
"eval_samples_per_second": 106.332, |
|
"eval_steps_per_second": 13.398, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_exact_match": 20.2, |
|
"eval_f1": 28.492784992785, |
|
"step": 8000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 12500, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 5.115434673069097e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|