{ "best_metric": 0.7073720097541809, "best_model_checkpoint": "/content/drive/MyDrive/NLP/HW_2/base_no_aug/checkpoint-3000", "epoch": 1.7225180081428124, "eval_steps": 500, "global_step": 5500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15659254619480112, "grad_norm": 19.11348533630371, "learning_rate": 4.843407453805199e-05, "loss": 0.6641, "step": 500 }, { "epoch": 0.15659254619480112, "eval_accuracy": 0.6756993006993007, "eval_f1": 0.6309085532769743, "eval_loss": 1.0107067823410034, "eval_precision": 0.7012138660182872, "eval_recall": 0.6756993006993007, "eval_runtime": 44.0879, "eval_samples_per_second": 51.896, "eval_steps_per_second": 3.244, "step": 500 }, { "epoch": 0.31318509238960224, "grad_norm": 4.949793338775635, "learning_rate": 4.686814907610398e-05, "loss": 0.4952, "step": 1000 }, { "epoch": 0.31318509238960224, "eval_accuracy": 0.7172202797202797, "eval_f1": 0.7010817389261343, "eval_loss": 0.9195705056190491, "eval_precision": 0.719533769236407, "eval_recall": 0.7172202797202797, "eval_runtime": 44.0115, "eval_samples_per_second": 51.986, "eval_steps_per_second": 3.249, "step": 1000 }, { "epoch": 0.46977763858440336, "grad_norm": 6.428478240966797, "learning_rate": 4.530222361415597e-05, "loss": 0.4705, "step": 1500 }, { "epoch": 0.46977763858440336, "eval_accuracy": 0.7342657342657343, "eval_f1": 0.7241364056660365, "eval_loss": 0.8369142413139343, "eval_precision": 0.7324017585102514, "eval_recall": 0.7342657342657343, "eval_runtime": 44.1553, "eval_samples_per_second": 51.817, "eval_steps_per_second": 3.239, "step": 1500 }, { "epoch": 0.6263701847792045, "grad_norm": 15.48355484008789, "learning_rate": 4.3736298152207956e-05, "loss": 0.4563, "step": 2000 }, { "epoch": 0.6263701847792045, "eval_accuracy": 0.7255244755244755, "eval_f1": 0.7220659878589696, "eval_loss": 0.7203921675682068, "eval_precision": 0.7288491079278737, "eval_recall": 0.7255244755244755, "eval_runtime": 44.1072, "eval_samples_per_second": 51.874, "eval_steps_per_second": 3.242, "step": 2000 }, { "epoch": 0.7829627309740056, "grad_norm": 9.567626953125, "learning_rate": 4.2170372690259944e-05, "loss": 0.4562, "step": 2500 }, { "epoch": 0.7829627309740056, "eval_accuracy": 0.7368881118881119, "eval_f1": 0.7318259360556157, "eval_loss": 0.7625445127487183, "eval_precision": 0.7457878456256892, "eval_recall": 0.7368881118881119, "eval_runtime": 44.0347, "eval_samples_per_second": 51.959, "eval_steps_per_second": 3.247, "step": 2500 }, { "epoch": 0.9395552771688067, "grad_norm": 10.897008895874023, "learning_rate": 4.060444722831194e-05, "loss": 0.4505, "step": 3000 }, { "epoch": 0.9395552771688067, "eval_accuracy": 0.7390734265734266, "eval_f1": 0.7361840041600632, "eval_loss": 0.7073720097541809, "eval_precision": 0.7464772753837399, "eval_recall": 0.7390734265734266, "eval_runtime": 44.1508, "eval_samples_per_second": 51.822, "eval_steps_per_second": 3.239, "step": 3000 }, { "epoch": 1.096147823363608, "grad_norm": 6.87204647064209, "learning_rate": 3.903852176636392e-05, "loss": 0.4002, "step": 3500 }, { "epoch": 1.096147823363608, "eval_accuracy": 0.7058566433566433, "eval_f1": 0.6837629922596936, "eval_loss": 1.0260264873504639, "eval_precision": 0.7131527545701114, "eval_recall": 0.7058566433566433, "eval_runtime": 44.0506, "eval_samples_per_second": 51.94, "eval_steps_per_second": 3.246, "step": 3500 }, { "epoch": 1.252740369558409, "grad_norm": 3.467501640319824, "learning_rate": 3.7472596304415916e-05, "loss": 0.3763, "step": 4000 }, { "epoch": 1.252740369558409, "eval_accuracy": 0.7259615384615384, "eval_f1": 0.7082333601046841, "eval_loss": 0.8645443916320801, "eval_precision": 0.7337310658027728, "eval_recall": 0.7259615384615384, "eval_runtime": 43.9247, "eval_samples_per_second": 52.089, "eval_steps_per_second": 3.256, "step": 4000 }, { "epoch": 1.4093329157532102, "grad_norm": 5.148597240447998, "learning_rate": 3.59066708424679e-05, "loss": 0.3773, "step": 4500 }, { "epoch": 1.4093329157532102, "eval_accuracy": 0.7526223776223776, "eval_f1": 0.7476449339273773, "eval_loss": 0.7937297821044922, "eval_precision": 0.7504548787471751, "eval_recall": 0.7526223776223776, "eval_runtime": 44.1088, "eval_samples_per_second": 51.872, "eval_steps_per_second": 3.242, "step": 4500 }, { "epoch": 1.5659254619480114, "grad_norm": 5.7631306648254395, "learning_rate": 3.434074538051989e-05, "loss": 0.3843, "step": 5000 }, { "epoch": 1.5659254619480114, "eval_accuracy": 0.7574300699300699, "eval_f1": 0.7513480604993672, "eval_loss": 0.7118169665336609, "eval_precision": 0.7551936355851372, "eval_recall": 0.7574300699300699, "eval_runtime": 44.066, "eval_samples_per_second": 51.922, "eval_steps_per_second": 3.245, "step": 5000 }, { "epoch": 1.7225180081428124, "grad_norm": 22.783588409423828, "learning_rate": 3.2774819918571875e-05, "loss": 0.3698, "step": 5500 }, { "epoch": 1.7225180081428124, "eval_accuracy": 0.7399475524475524, "eval_f1": 0.726730366521134, "eval_loss": 0.7873443365097046, "eval_precision": 0.7414103820390555, "eval_recall": 0.7399475524475524, "eval_runtime": 44.2033, "eval_samples_per_second": 51.761, "eval_steps_per_second": 3.235, "step": 5500 } ], "logging_steps": 500, "max_steps": 15965, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.3006752358256576e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }