{ "best_metric": null, "best_model_checkpoint": null, "epoch": 40.0, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "learning_rate": 0.0003, "loss": 1.9296, "step": 100 }, { "epoch": 0.8, "learning_rate": 0.0003, "loss": 1.7554, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.6093164556962025, "eval_loss": 1.7939746379852295, "eval_runtime": 5.0177, "eval_samples_per_second": 99.648, "eval_steps_per_second": 12.556, "step": 250 }, { "epoch": 1.0, "eval_exact_match": 26.0, "eval_f1": 36.71136363636365, "step": 250 }, { "epoch": 1.2, "learning_rate": 0.0003, "loss": 1.6394, "step": 300 }, { "epoch": 1.6, "learning_rate": 0.0003, "loss": 1.5315, "step": 400 }, { "epoch": 2.0, "learning_rate": 0.0003, "loss": 1.5248, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.6085063291139241, "eval_loss": 1.8273799419403076, "eval_runtime": 4.4023, "eval_samples_per_second": 113.576, "eval_steps_per_second": 14.311, "step": 500 }, { "epoch": 2.0, "eval_exact_match": 24.4, "eval_f1": 33.57723554223556, "step": 500 }, { "epoch": 2.4, "learning_rate": 0.0003, "loss": 1.1692, "step": 600 }, { "epoch": 2.8, "learning_rate": 0.0003, "loss": 1.2054, "step": 700 }, { "epoch": 3.0, "eval_accuracy": 0.6027088607594937, "eval_loss": 1.9717934131622314, "eval_runtime": 5.1273, "eval_samples_per_second": 97.518, "eval_steps_per_second": 12.287, "step": 750 }, { "epoch": 3.0, "eval_exact_match": 20.8, "eval_f1": 30.584243697478996, "step": 750 }, { "epoch": 3.2, "learning_rate": 0.0003, "loss": 1.0252, "step": 800 }, { "epoch": 3.6, "learning_rate": 0.0003, "loss": 0.8608, "step": 900 }, { "epoch": 4.0, "learning_rate": 0.0003, "loss": 0.8989, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.5986835443037974, "eval_loss": 2.151914596557617, "eval_runtime": 5.3325, "eval_samples_per_second": 93.765, "eval_steps_per_second": 11.814, "step": 1000 }, { "epoch": 4.0, "eval_exact_match": 21.0, "eval_f1": 30.363229548229558, "step": 1000 }, { "epoch": 4.4, "learning_rate": 0.0003, "loss": 0.5842, "step": 1100 }, { "epoch": 4.8, "learning_rate": 0.0003, "loss": 0.6306, "step": 1200 }, { "epoch": 5.0, "eval_accuracy": 0.5960506329113924, "eval_loss": 2.329284191131592, "eval_runtime": 4.5585, "eval_samples_per_second": 109.686, "eval_steps_per_second": 13.82, "step": 1250 }, { "epoch": 5.0, "eval_exact_match": 20.4, "eval_f1": 30.02542624042625, "step": 1250 }, { "epoch": 5.2, "learning_rate": 0.0003, "loss": 0.5368, "step": 1300 }, { "epoch": 5.6, "learning_rate": 0.0003, "loss": 0.4495, "step": 1400 }, { "epoch": 6.0, "learning_rate": 0.0003, "loss": 0.4712, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.5935696202531645, "eval_loss": 2.5598793029785156, "eval_runtime": 4.4094, "eval_samples_per_second": 113.395, "eval_steps_per_second": 14.288, "step": 1500 }, { "epoch": 6.0, "eval_exact_match": 20.6, "eval_f1": 30.108149073149082, "step": 1500 }, { "epoch": 6.4, "learning_rate": 0.0003, "loss": 0.3625, "step": 1600 }, { "epoch": 6.8, "learning_rate": 0.0003, "loss": 0.3797, "step": 1700 }, { "epoch": 7.0, "eval_accuracy": 0.5935696202531645, "eval_loss": 2.732860803604126, "eval_runtime": 5.2701, "eval_samples_per_second": 94.876, "eval_steps_per_second": 11.954, "step": 1750 }, { "epoch": 7.0, "eval_exact_match": 19.6, "eval_f1": 29.002709512709526, "step": 1750 }, { "epoch": 7.2, "learning_rate": 0.0003, "loss": 0.3582, "step": 1800 }, { "epoch": 7.6, "learning_rate": 0.0003, "loss": 0.3399, "step": 1900 }, { "epoch": 8.0, "learning_rate": 0.0003, "loss": 0.3527, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.5912911392405064, "eval_loss": 2.8185083866119385, "eval_runtime": 4.4105, "eval_samples_per_second": 113.366, "eval_steps_per_second": 14.284, "step": 2000 }, { "epoch": 8.0, "eval_exact_match": 20.8, "eval_f1": 29.191525696525694, "step": 2000 }, { "epoch": 8.4, "learning_rate": 0.0003, "loss": 0.3202, "step": 2100 }, { "epoch": 8.8, "learning_rate": 0.0003, "loss": 0.3314, "step": 2200 }, { "epoch": 9.0, "eval_accuracy": 0.592, "eval_loss": 2.824962854385376, "eval_runtime": 4.5596, "eval_samples_per_second": 109.66, "eval_steps_per_second": 13.817, "step": 2250 }, { "epoch": 9.0, "eval_exact_match": 21.0, "eval_f1": 29.434571539571547, "step": 2250 }, { "epoch": 9.2, "learning_rate": 0.0003, "loss": 0.3174, "step": 2300 }, { "epoch": 9.6, "learning_rate": 0.0003, "loss": 0.3157, "step": 2400 }, { "epoch": 10.0, "learning_rate": 0.0003, "loss": 0.3265, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.5911392405063292, "eval_loss": 2.9242382049560547, "eval_runtime": 4.7857, "eval_samples_per_second": 104.477, "eval_steps_per_second": 13.164, "step": 2500 }, { "epoch": 10.0, "eval_exact_match": 20.8, "eval_f1": 29.524657009657016, "step": 2500 }, { "epoch": 10.4, "learning_rate": 0.0003, "loss": 0.2989, "step": 2600 }, { "epoch": 10.8, "learning_rate": 0.0003, "loss": 0.3148, "step": 2700 }, { "epoch": 11.0, "eval_accuracy": 0.591240506329114, "eval_loss": 3.0012593269348145, "eval_runtime": 4.5582, "eval_samples_per_second": 109.691, "eval_steps_per_second": 13.821, "step": 2750 }, { "epoch": 11.0, "eval_exact_match": 20.0, "eval_f1": 28.128880727116037, "step": 2750 }, { "epoch": 11.2, "learning_rate": 0.0003, "loss": 0.3028, "step": 2800 }, { "epoch": 11.6, "learning_rate": 0.0003, "loss": 0.3047, "step": 2900 }, { "epoch": 12.0, "learning_rate": 0.0003, "loss": 0.3184, "step": 3000 }, { "epoch": 12.0, "eval_accuracy": 0.590632911392405, "eval_loss": 2.931525707244873, "eval_runtime": 4.4989, "eval_samples_per_second": 111.138, "eval_steps_per_second": 14.003, "step": 3000 }, { "epoch": 12.0, "eval_exact_match": 21.2, "eval_f1": 30.12248362748364, "step": 3000 }, { "epoch": 12.4, "learning_rate": 0.0003, "loss": 0.3, "step": 3100 }, { "epoch": 12.8, "learning_rate": 0.0003, "loss": 0.3101, "step": 3200 }, { "epoch": 13.0, "eval_accuracy": 0.5896962025316456, "eval_loss": 2.9116382598876953, "eval_runtime": 4.6155, "eval_samples_per_second": 108.331, "eval_steps_per_second": 13.65, "step": 3250 }, { "epoch": 13.0, "eval_exact_match": 19.2, "eval_f1": 27.90991785991786, "step": 3250 }, { "epoch": 13.2, "learning_rate": 0.0003, "loss": 0.3063, "step": 3300 }, { "epoch": 13.6, "learning_rate": 0.0003, "loss": 0.3041, "step": 3400 }, { "epoch": 14.0, "learning_rate": 0.0003, "loss": 0.3164, "step": 3500 }, { "epoch": 14.0, "eval_accuracy": 0.5902025316455696, "eval_loss": 2.920793056488037, "eval_runtime": 4.4066, "eval_samples_per_second": 113.467, "eval_steps_per_second": 14.297, "step": 3500 }, { "epoch": 14.0, "eval_exact_match": 18.0, "eval_f1": 27.415170385170395, "step": 3500 }, { "epoch": 14.4, "learning_rate": 0.0003, "loss": 0.2957, "step": 3600 }, { "epoch": 14.8, "learning_rate": 0.0003, "loss": 0.3074, "step": 3700 }, { "epoch": 15.0, "eval_accuracy": 0.5908607594936709, "eval_loss": 2.9385440349578857, "eval_runtime": 4.8175, "eval_samples_per_second": 103.788, "eval_steps_per_second": 13.077, "step": 3750 }, { "epoch": 15.0, "eval_exact_match": 21.2, "eval_f1": 29.687630702630706, "step": 3750 }, { "epoch": 15.2, "learning_rate": 0.0003, "loss": 0.3013, "step": 3800 }, { "epoch": 15.6, "learning_rate": 0.0003, "loss": 0.3002, "step": 3900 }, { "epoch": 16.0, "learning_rate": 0.0003, "loss": 0.3107, "step": 4000 }, { "epoch": 16.0, "eval_accuracy": 0.5891645569620253, "eval_loss": 2.9519243240356445, "eval_runtime": 4.5619, "eval_samples_per_second": 109.603, "eval_steps_per_second": 13.81, "step": 4000 }, { "epoch": 16.0, "eval_exact_match": 19.8, "eval_f1": 28.670117105117104, "step": 4000 }, { "epoch": 16.4, "learning_rate": 0.0003, "loss": 0.2892, "step": 4100 }, { "epoch": 16.8, "learning_rate": 0.0003, "loss": 0.3054, "step": 4200 }, { "epoch": 17.0, "eval_accuracy": 0.5898227848101266, "eval_loss": 3.010847568511963, "eval_runtime": 4.718, "eval_samples_per_second": 105.977, "eval_steps_per_second": 13.353, "step": 4250 }, { "epoch": 17.0, "eval_exact_match": 19.6, "eval_f1": 27.744197632432932, "step": 4250 }, { "epoch": 17.2, "learning_rate": 0.0003, "loss": 0.2959, "step": 4300 }, { "epoch": 17.6, "learning_rate": 0.0003, "loss": 0.297, "step": 4400 }, { "epoch": 18.0, "learning_rate": 0.0003, "loss": 0.309, "step": 4500 }, { "epoch": 18.0, "eval_accuracy": 0.5903544303797469, "eval_loss": 3.003683567047119, "eval_runtime": 4.4085, "eval_samples_per_second": 113.418, "eval_steps_per_second": 14.291, "step": 4500 }, { "epoch": 18.0, "eval_exact_match": 20.6, "eval_f1": 28.64154694978225, "step": 4500 }, { "epoch": 18.4, "learning_rate": 0.0003, "loss": 0.2883, "step": 4600 }, { "epoch": 18.8, "learning_rate": 0.0003, "loss": 0.3005, "step": 4700 }, { "epoch": 19.0, "eval_accuracy": 0.5898481012658228, "eval_loss": 3.0279438495635986, "eval_runtime": 4.7099, "eval_samples_per_second": 106.16, "eval_steps_per_second": 13.376, "step": 4750 }, { "epoch": 19.0, "eval_exact_match": 21.0, "eval_f1": 28.707790706026003, "step": 4750 }, { "epoch": 19.2, "learning_rate": 0.0003, "loss": 0.2959, "step": 4800 }, { "epoch": 19.6, "learning_rate": 0.0003, "loss": 0.2911, "step": 4900 }, { "epoch": 20.0, "learning_rate": 0.0003, "loss": 0.3127, "step": 5000 }, { "epoch": 20.0, "eval_accuracy": 0.5883291139240506, "eval_loss": 2.9650285243988037, "eval_runtime": 4.711, "eval_samples_per_second": 106.135, "eval_steps_per_second": 13.373, "step": 5000 }, { "epoch": 20.0, "eval_exact_match": 11.4, "eval_f1": 20.32415197623728, "step": 5000 }, { "epoch": 20.4, "learning_rate": 0.0003, "loss": 0.2868, "step": 5100 }, { "epoch": 20.8, "learning_rate": 0.0003, "loss": 0.2989, "step": 5200 }, { "epoch": 21.0, "eval_accuracy": 0.5895696202531645, "eval_loss": 3.013432502746582, "eval_runtime": 4.8101, "eval_samples_per_second": 103.947, "eval_steps_per_second": 13.097, "step": 5250 }, { "epoch": 21.0, "eval_exact_match": 10.8, "eval_f1": 17.55567536105352, "step": 5250 }, { "epoch": 21.2, "learning_rate": 0.0003, "loss": 0.2948, "step": 5300 }, { "epoch": 21.6, "learning_rate": 0.0003, "loss": 0.2921, "step": 5400 }, { "epoch": 22.0, "learning_rate": 0.0003, "loss": 0.3072, "step": 5500 }, { "epoch": 22.0, "eval_accuracy": 0.5885822784810126, "eval_loss": 3.0298521518707275, "eval_runtime": 4.7113, "eval_samples_per_second": 106.128, "eval_steps_per_second": 13.372, "step": 5500 }, { "epoch": 22.0, "eval_exact_match": 0.0, "eval_f1": 0.7666666666666667, "step": 5500 }, { "epoch": 22.4, "learning_rate": 0.0003, "loss": 0.2914, "step": 5600 }, { "epoch": 22.8, "learning_rate": 0.0003, "loss": 0.2982, "step": 5700 }, { "epoch": 23.0, "eval_accuracy": 0.5870126582278481, "eval_loss": 3.0277092456817627, "eval_runtime": 4.4065, "eval_samples_per_second": 113.468, "eval_steps_per_second": 14.297, "step": 5750 }, { "epoch": 23.0, "eval_exact_match": 7.6, "eval_f1": 9.586507936507935, "step": 5750 }, { "epoch": 23.2, "learning_rate": 0.0003, "loss": 0.3014, "step": 5800 }, { "epoch": 23.6, "learning_rate": 0.0003, "loss": 0.2907, "step": 5900 }, { "epoch": 24.0, "learning_rate": 0.0003, "loss": 0.3029, "step": 6000 }, { "epoch": 24.0, "eval_accuracy": 0.5877215189873418, "eval_loss": 3.02402925491333, "eval_runtime": 4.7089, "eval_samples_per_second": 106.182, "eval_steps_per_second": 13.379, "step": 6000 }, { "epoch": 24.0, "eval_exact_match": 4.2, "eval_f1": 7.583745822888591, "step": 6000 }, { "epoch": 24.4, "learning_rate": 0.0003, "loss": 0.2853, "step": 6100 }, { "epoch": 24.8, "learning_rate": 0.0003, "loss": 0.2989, "step": 6200 }, { "epoch": 25.0, "eval_accuracy": 0.5874430379746836, "eval_loss": 3.0006086826324463, "eval_runtime": 4.5617, "eval_samples_per_second": 109.609, "eval_steps_per_second": 13.811, "step": 6250 }, { "epoch": 25.0, "eval_exact_match": 0.0, "eval_f1": 0.5121500721500722, "step": 6250 }, { "epoch": 25.2, "learning_rate": 0.0003, "loss": 0.2904, "step": 6300 }, { "epoch": 25.6, "learning_rate": 0.0003, "loss": 0.29, "step": 6400 }, { "epoch": 26.0, "learning_rate": 0.0003, "loss": 0.3009, "step": 6500 }, { "epoch": 26.0, "eval_accuracy": 0.5873417721518988, "eval_loss": 3.058345317840576, "eval_runtime": 4.708, "eval_samples_per_second": 106.202, "eval_steps_per_second": 13.381, "step": 6500 }, { "epoch": 26.0, "eval_exact_match": 2.2, "eval_f1": 4.8883016983016985, "step": 6500 }, { "epoch": 26.4, "learning_rate": 0.0003, "loss": 0.2793, "step": 6600 }, { "epoch": 26.8, "learning_rate": 0.0003, "loss": 0.2905, "step": 6700 }, { "epoch": 27.0, "eval_accuracy": 0.5867848101265822, "eval_loss": 3.1007750034332275, "eval_runtime": 4.5013, "eval_samples_per_second": 111.079, "eval_steps_per_second": 13.996, "step": 6750 }, { "epoch": 27.0, "eval_exact_match": 1.2, "eval_f1": 2.0656084656084652, "step": 6750 }, { "epoch": 27.2, "learning_rate": 0.0003, "loss": 0.2964, "step": 6800 }, { "epoch": 27.6, "learning_rate": 0.0003, "loss": 0.3021, "step": 6900 }, { "epoch": 28.0, "learning_rate": 0.0003, "loss": 0.3237, "step": 7000 }, { "epoch": 28.0, "eval_accuracy": 0.5851139240506329, "eval_loss": 3.192138671875, "eval_runtime": 4.5595, "eval_samples_per_second": 109.661, "eval_steps_per_second": 13.817, "step": 7000 }, { "epoch": 28.0, "eval_exact_match": 0.8, "eval_f1": 1.5059791467236656, "step": 7000 }, { "epoch": 28.4, "learning_rate": 0.0003, "loss": 0.3584, "step": 7100 }, { "epoch": 28.8, "learning_rate": 0.0003, "loss": 0.4738, "step": 7200 }, { "epoch": 29.0, "eval_accuracy": 0.5789873417721519, "eval_loss": 3.32121205329895, "eval_runtime": 4.4073, "eval_samples_per_second": 113.447, "eval_steps_per_second": 14.294, "step": 7250 }, { "epoch": 29.0, "eval_exact_match": 3.8, "eval_f1": 6.689863653841047, "step": 7250 }, { "epoch": 29.2, "learning_rate": 0.0003, "loss": 0.8318, "step": 7300 }, { "epoch": 29.6, "learning_rate": 0.0003, "loss": 1.646, "step": 7400 }, { "epoch": 30.0, "learning_rate": 0.0003, "loss": 2.5106, "step": 7500 }, { "epoch": 30.0, "eval_accuracy": 0.5371645569620254, "eval_loss": 3.9834704399108887, "eval_runtime": 5.1142, "eval_samples_per_second": 97.766, "eval_steps_per_second": 12.319, "step": 7500 }, { "epoch": 30.0, "eval_exact_match": 3.8, "eval_f1": 5.247779866332498, "step": 7500 }, { "epoch": 30.4, "learning_rate": 0.0003, "loss": 2.5833, "step": 7600 }, { "epoch": 30.8, "learning_rate": 0.0003, "loss": 2.4442, "step": 7700 }, { "epoch": 31.0, "eval_accuracy": 0.5435443037974683, "eval_loss": 3.8099365234375, "eval_runtime": 5.1354, "eval_samples_per_second": 97.364, "eval_steps_per_second": 12.268, "step": 7750 }, { "epoch": 31.0, "eval_exact_match": 8.0, "eval_f1": 11.652945717183004, "step": 7750 }, { "epoch": 31.2, "learning_rate": 0.0003, "loss": 2.1299, "step": 7800 }, { "epoch": 31.6, "learning_rate": 0.0003, "loss": 2.3675, "step": 7900 }, { "epoch": 32.0, "learning_rate": 0.0003, "loss": 2.5329, "step": 8000 }, { "epoch": 32.0, "eval_accuracy": 0.5304810126582279, "eval_loss": 4.046270847320557, "eval_runtime": 5.1297, "eval_samples_per_second": 97.471, "eval_steps_per_second": 12.281, "step": 8000 }, { "epoch": 32.0, "eval_exact_match": 8.0, "eval_f1": 11.118349417066474, "step": 8000 }, { "epoch": 32.4, "learning_rate": 0.0003, "loss": 2.9966, "step": 8100 }, { "epoch": 32.8, "learning_rate": 0.0003, "loss": 3.9607, "step": 8200 }, { "epoch": 33.0, "eval_accuracy": 0.45944303797468355, "eval_loss": 5.853953838348389, "eval_runtime": 4.7634, "eval_samples_per_second": 104.966, "eval_steps_per_second": 13.226, "step": 8250 }, { "epoch": 33.0, "eval_exact_match": 0.8, "eval_f1": 2.1406916099773245, "step": 8250 }, { "epoch": 33.2, "learning_rate": 0.0003, "loss": 5.3072, "step": 8300 }, { "epoch": 33.6, "learning_rate": 0.0003, "loss": 8.0875, "step": 8400 }, { "epoch": 34.0, "learning_rate": 0.0003, "loss": 8.2643, "step": 8500 }, { "epoch": 34.0, "eval_accuracy": 0.36860759493670886, "eval_loss": 9.332476615905762, "eval_runtime": 4.4983, "eval_samples_per_second": 111.154, "eval_steps_per_second": 14.005, "step": 8500 }, { "epoch": 34.0, "eval_exact_match": 0.0, "eval_f1": 0.12853014882332628, "step": 8500 }, { "epoch": 34.4, "learning_rate": 0.0003, "loss": 8.6504, "step": 8600 }, { "epoch": 34.8, "learning_rate": 0.0003, "loss": 8.6872, "step": 8700 }, { "epoch": 35.0, "eval_accuracy": 0.34237974683544303, "eval_loss": 10.414295196533203, "eval_runtime": 4.561, "eval_samples_per_second": 109.625, "eval_steps_per_second": 13.813, "step": 8750 }, { "epoch": 35.0, "eval_exact_match": 0.0, "eval_f1": 0.11605853329133448, "step": 8750 }, { "epoch": 35.2, "learning_rate": 0.0003, "loss": 9.9152, "step": 8800 }, { "epoch": 35.6, "learning_rate": 0.0003, "loss": 4.7671, "step": 8900 }, { "epoch": 36.0, "learning_rate": 0.0003, "loss": 0.5615, "step": 9000 }, { "epoch": 36.0, "eval_accuracy": 0.5844810126582278, "eval_loss": 2.786999464035034, "eval_runtime": 5.1493, "eval_samples_per_second": 97.101, "eval_steps_per_second": 12.235, "step": 9000 }, { "epoch": 36.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "step": 9000 }, { "epoch": 36.4, "learning_rate": 0.0003, "loss": 0.434, "step": 9100 }, { "epoch": 36.8, "learning_rate": 0.0003, "loss": 0.4658, "step": 9200 }, { "epoch": 37.0, "eval_accuracy": 0.5855189873417721, "eval_loss": 2.7785391807556152, "eval_runtime": 4.403, "eval_samples_per_second": 113.558, "eval_steps_per_second": 14.308, "step": 9250 }, { "epoch": 37.0, "eval_exact_match": 0.2, "eval_f1": 0.2, "step": 9250 }, { "epoch": 37.2, "learning_rate": 0.0003, "loss": 0.4142, "step": 9300 }, { "epoch": 37.6, "learning_rate": 0.0003, "loss": 0.3799, "step": 9400 }, { "epoch": 38.0, "learning_rate": 0.0003, "loss": 0.3872, "step": 9500 }, { "epoch": 38.0, "eval_accuracy": 0.585873417721519, "eval_loss": 2.8146767616271973, "eval_runtime": 4.5598, "eval_samples_per_second": 109.653, "eval_steps_per_second": 13.816, "step": 9500 }, { "epoch": 38.0, "eval_exact_match": 0.0, "eval_f1": 0.1949989841186112, "step": 9500 }, { "epoch": 38.4, "learning_rate": 0.0003, "loss": 0.3113, "step": 9600 }, { "epoch": 38.8, "learning_rate": 0.0003, "loss": 0.3237, "step": 9700 }, { "epoch": 39.0, "eval_accuracy": 0.5856455696202532, "eval_loss": 2.956742286682129, "eval_runtime": 4.7632, "eval_samples_per_second": 104.972, "eval_steps_per_second": 13.227, "step": 9750 }, { "epoch": 39.0, "eval_exact_match": 0.2, "eval_f1": 0.28, "step": 9750 }, { "epoch": 39.2, "learning_rate": 0.0003, "loss": 0.3078, "step": 9800 }, { "epoch": 39.6, "learning_rate": 0.0003, "loss": 0.2905, "step": 9900 }, { "epoch": 40.0, "learning_rate": 0.0003, "loss": 0.3006, "step": 10000 }, { "epoch": 40.0, "eval_accuracy": 0.5861518987341772, "eval_loss": 3.0283753871917725, "eval_runtime": 4.4059, "eval_samples_per_second": 113.485, "eval_steps_per_second": 14.299, "step": 10000 }, { "epoch": 40.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "step": 10000 } ], "logging_steps": 100, "max_steps": 12500, "num_train_epochs": 50, "save_steps": 500, "total_flos": 6.394690327847895e+17, "trial_name": null, "trial_params": null }