{ "best_metric": 0.9050816297531128, "best_model_checkpoint": "./mbert_ar_ur/checkpoint-5000", "epoch": 4.0, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "learning_rate": 1.885714285714286e-05, "loss": 0.4454, "step": 500 }, { "epoch": 0.8, "learning_rate": 1.7714285714285717e-05, "loss": 0.2955, "step": 1000 }, { "epoch": 1.0, "eval_LOC_f1": 0.09510682288077188, "eval_ORG_f1": 0.24583663758921492, "eval_PER_f1": 0.3119353304802663, "eval_loss": 1.4924763441085815, "eval_overall_accuracy": 0.4886179434773416, "eval_overall_f1": 0.1982125758059368, "eval_overall_precision": 0.16568836712913554, "eval_overall_recall": 0.2466243050039714, "eval_runtime": 2.7088, "eval_samples_per_second": 369.163, "eval_steps_per_second": 23.257, "step": 1250 }, { "epoch": 1.2, "learning_rate": 1.6571428571428574e-05, "loss": 0.2182, "step": 1500 }, { "epoch": 1.6, "learning_rate": 1.542857142857143e-05, "loss": 0.1877, "step": 2000 }, { "epoch": 2.0, "learning_rate": 1.4285714285714287e-05, "loss": 0.181, "step": 2500 }, { "epoch": 2.0, "eval_LOC_f1": 0.16472203157172272, "eval_ORG_f1": 0.26800929512006194, "eval_PER_f1": 0.6124661246612466, "eval_loss": 1.1303032636642456, "eval_overall_accuracy": 0.6376770737895553, "eval_overall_f1": 0.3045238514346066, "eval_overall_precision": 0.2734745494783433, "eval_overall_recall": 0.3435266084193805, "eval_runtime": 2.7853, "eval_samples_per_second": 359.026, "eval_steps_per_second": 22.619, "step": 2500 }, { "epoch": 2.4, "learning_rate": 1.3142857142857145e-05, "loss": 0.1295, "step": 3000 }, { "epoch": 2.8, "learning_rate": 1.2e-05, "loss": 0.1253, "step": 3500 }, { "epoch": 3.0, "eval_LOC_f1": 0.15284677111196027, "eval_ORG_f1": 0.3176020408163266, "eval_PER_f1": 0.6216577540106952, "eval_loss": 1.348933458328247, "eval_overall_accuracy": 0.6324617661568821, "eval_overall_f1": 0.3217743355043126, "eval_overall_precision": 0.2889661713563073, "eval_overall_recall": 0.36298649722001586, "eval_runtime": 2.5633, "eval_samples_per_second": 390.124, "eval_steps_per_second": 24.578, "step": 3750 }, { "epoch": 3.2, "learning_rate": 1.0857142857142858e-05, "loss": 0.098, "step": 4000 }, { "epoch": 3.6, "learning_rate": 9.714285714285715e-06, "loss": 0.0916, "step": 4500 }, { "epoch": 4.0, "learning_rate": 8.571428571428571e-06, "loss": 0.0866, "step": 5000 }, { "epoch": 4.0, "eval_LOC_f1": 0.42877697841726614, "eval_ORG_f1": 0.4028497409326424, "eval_PER_f1": 0.6109282422646478, "eval_loss": 0.9050816297531128, "eval_overall_accuracy": 0.7715836211149482, "eval_overall_f1": 0.47474747474747475, "eval_overall_precision": 0.464638783269962, "eval_overall_recall": 0.4853057982525814, "eval_runtime": 2.9178, "eval_samples_per_second": 342.725, "eval_steps_per_second": 21.592, "step": 5000 } ], "max_steps": 8750, "num_train_epochs": 7, "total_flos": 1363663262792160.0, "trial_name": null, "trial_params": null }