{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 1875, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5333333333333333, "grad_norm": 0.9283596873283386, "learning_rate": 3e-05, "loss": 1.963, "step": 100 }, { "epoch": 0.9973333333333333, "eval_accuracy": 0.5695384615384615, "eval_loss": 1.6438876390457153, "eval_runtime": 5.0534, "eval_samples_per_second": 98.943, "eval_steps_per_second": 12.467, "step": 187 }, { "epoch": 0.9973333333333333, "eval_exact_match": 16.2, "eval_f1": 25.888571428571442, "step": 187 }, { "epoch": 1.0666666666666667, "grad_norm": 1.0407154560089111, "learning_rate": 3e-05, "loss": 1.6668, "step": 200 }, { "epoch": 1.6, "grad_norm": 1.1281720399856567, "learning_rate": 3e-05, "loss": 1.6099, "step": 300 }, { "epoch": 2.0, "eval_accuracy": 0.5733333333333334, "eval_loss": 1.618270754814148, "eval_runtime": 5.1983, "eval_samples_per_second": 96.186, "eval_steps_per_second": 12.119, "step": 375 }, { "epoch": 2.0, "eval_exact_match": 16.6, "eval_f1": 26.287619047619057, "step": 375 }, { "epoch": 2.1333333333333333, "grad_norm": 1.2508225440979004, "learning_rate": 3e-05, "loss": 1.5866, "step": 400 }, { "epoch": 2.6666666666666665, "grad_norm": 1.3197834491729736, "learning_rate": 3e-05, "loss": 1.524, "step": 500 }, { "epoch": 2.997333333333333, "eval_accuracy": 0.5743589743589743, "eval_loss": 1.6164497137069702, "eval_runtime": 5.0495, "eval_samples_per_second": 99.019, "eval_steps_per_second": 12.476, "step": 562 }, { "epoch": 2.997333333333333, "eval_exact_match": 17.8, "eval_f1": 27.35428571428571, "step": 562 }, { "epoch": 3.2, "grad_norm": 1.6320394277572632, "learning_rate": 3e-05, "loss": 1.4752, "step": 600 }, { "epoch": 3.7333333333333334, "grad_norm": 1.7101725339889526, "learning_rate": 3e-05, "loss": 1.3938, "step": 700 }, { "epoch": 4.0, "eval_accuracy": 0.5728717948717948, "eval_loss": 1.6376241445541382, "eval_runtime": 4.9662, "eval_samples_per_second": 100.68, "eval_steps_per_second": 12.686, "step": 750 }, { "epoch": 4.0, "eval_exact_match": 18.0, "eval_f1": 27.498571428571427, "step": 750 }, { "epoch": 4.266666666666667, "grad_norm": 2.1727664470672607, "learning_rate": 3e-05, "loss": 1.342, "step": 800 }, { "epoch": 4.8, "grad_norm": 2.341384172439575, "learning_rate": 3e-05, "loss": 1.2685, "step": 900 }, { "epoch": 4.997333333333334, "eval_accuracy": 0.5699487179487179, "eval_loss": 1.684618353843689, "eval_runtime": 5.5457, "eval_samples_per_second": 90.16, "eval_steps_per_second": 11.36, "step": 937 }, { "epoch": 4.997333333333334, "eval_exact_match": 19.2, "eval_f1": 28.190952380952375, "step": 937 }, { "epoch": 5.333333333333333, "grad_norm": 2.5624172687530518, "learning_rate": 3e-05, "loss": 1.1847, "step": 1000 }, { "epoch": 5.866666666666667, "grad_norm": 2.8159019947052, "learning_rate": 3e-05, "loss": 1.1591, "step": 1100 }, { "epoch": 6.0, "eval_accuracy": 0.5672820512820513, "eval_loss": 1.7546838521957397, "eval_runtime": 5.0186, "eval_samples_per_second": 99.629, "eval_steps_per_second": 12.553, "step": 1125 }, { "epoch": 6.0, "eval_exact_match": 16.6, "eval_f1": 25.054761904761914, "step": 1125 }, { "epoch": 6.4, "grad_norm": 3.056166410446167, "learning_rate": 3e-05, "loss": 1.0486, "step": 1200 }, { "epoch": 6.933333333333334, "grad_norm": 2.873281240463257, "learning_rate": 3e-05, "loss": 1.0444, "step": 1300 }, { "epoch": 6.997333333333334, "eval_accuracy": 0.5643076923076923, "eval_loss": 1.8394943475723267, "eval_runtime": 5.5931, "eval_samples_per_second": 89.396, "eval_steps_per_second": 11.264, "step": 1312 }, { "epoch": 6.997333333333334, "eval_exact_match": 15.8, "eval_f1": 23.925714285714296, "step": 1312 }, { "epoch": 7.466666666666667, "grad_norm": 3.069322109222412, "learning_rate": 3e-05, "loss": 0.9374, "step": 1400 }, { "epoch": 8.0, "grad_norm": 3.2732129096984863, "learning_rate": 3e-05, "loss": 0.9535, "step": 1500 }, { "epoch": 8.0, "eval_accuracy": 0.5612820512820513, "eval_loss": 1.9008216857910156, "eval_runtime": 5.1357, "eval_samples_per_second": 97.358, "eval_steps_per_second": 12.267, "step": 1500 }, { "epoch": 8.0, "eval_exact_match": 15.2, "eval_f1": 23.12063492063493, "step": 1500 }, { "epoch": 8.533333333333333, "grad_norm": 3.382610321044922, "learning_rate": 3e-05, "loss": 0.8235, "step": 1600 }, { "epoch": 8.997333333333334, "eval_accuracy": 0.5592307692307692, "eval_loss": 2.026776075363159, "eval_runtime": 5.5056, "eval_samples_per_second": 90.817, "eval_steps_per_second": 11.443, "step": 1687 }, { "epoch": 8.997333333333334, "eval_exact_match": 13.8, "eval_f1": 21.42285714285715, "step": 1687 }, { "epoch": 9.066666666666666, "grad_norm": 4.363819122314453, "learning_rate": 3e-05, "loss": 0.8547, "step": 1700 }, { "epoch": 9.6, "grad_norm": 3.685805320739746, "learning_rate": 3e-05, "loss": 0.7635, "step": 1800 }, { "epoch": 10.0, "eval_accuracy": 0.5567692307692308, "eval_loss": 2.0937435626983643, "eval_runtime": 5.235, "eval_samples_per_second": 95.512, "eval_steps_per_second": 12.034, "step": 1875 }, { "epoch": 10.0, "eval_exact_match": 15.2, "eval_f1": 22.031904761904762, "step": 1875 } ], "logging_steps": 100, "max_steps": 9350, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.4335509031419904e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }