{ "best_metric": 0.8179571663920923, "best_model_checkpoint": "./bigbird-base-health-fact/checkpoint-2452", "epoch": 3.0, "global_step": 3678, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 2.6630434782608698e-06, "loss": 1.2792, "step": 100 }, { "epoch": 0.16, "learning_rate": 5.380434782608695e-06, "loss": 1.0546, "step": 200 }, { "epoch": 0.24, "learning_rate": 8.097826086956523e-06, "loss": 0.8857, "step": 300 }, { "epoch": 0.33, "learning_rate": 9.909365558912388e-06, "loss": 0.7958, "step": 400 }, { "epoch": 0.41, "learning_rate": 9.60725075528701e-06, "loss": 0.7246, "step": 500 }, { "epoch": 0.49, "learning_rate": 9.305135951661632e-06, "loss": 0.7629, "step": 600 }, { "epoch": 0.57, "learning_rate": 9.003021148036256e-06, "loss": 0.6498, "step": 700 }, { "epoch": 0.65, "learning_rate": 8.700906344410877e-06, "loss": 0.6201, "step": 800 }, { "epoch": 0.73, "learning_rate": 8.398791540785499e-06, "loss": 0.5809, "step": 900 }, { "epoch": 0.82, "learning_rate": 8.099697885196374e-06, "loss": 0.647, "step": 1000 }, { "epoch": 0.9, "learning_rate": 7.797583081570997e-06, "loss": 0.5814, "step": 1100 }, { "epoch": 0.98, "learning_rate": 7.4954682779456205e-06, "loss": 0.5563, "step": 1200 }, { "epoch": 1.0, "eval_false_f1": 0.7926023778071335, "eval_loss": 0.5019509196281433, "eval_macro_f1": 0.6062122975261963, "eval_micro_f1": 0.7948929159802306, "eval_mixture_f1": 0.4591194968553459, "eval_runtime": 39.3928, "eval_samples_per_second": 30.818, "eval_steps_per_second": 0.965, "eval_true_f1": 0.8986175115207373, "eval_unproven_f1": 0.2745098039215686, "step": 1226 }, { "epoch": 1.06, "learning_rate": 7.193353474320243e-06, "loss": 0.5271, "step": 1300 }, { "epoch": 1.14, "learning_rate": 6.891238670694864e-06, "loss": 0.4912, "step": 1400 }, { "epoch": 1.22, "learning_rate": 6.589123867069487e-06, "loss": 0.4844, "step": 1500 }, { "epoch": 1.31, "learning_rate": 6.287009063444109e-06, "loss": 0.4816, "step": 1600 }, { "epoch": 1.39, "learning_rate": 5.984894259818732e-06, "loss": 0.462, "step": 1700 }, { "epoch": 1.47, "learning_rate": 5.682779456193354e-06, "loss": 0.4087, "step": 1800 }, { "epoch": 1.55, "learning_rate": 5.380664652567976e-06, "loss": 0.5065, "step": 1900 }, { "epoch": 1.63, "learning_rate": 5.078549848942599e-06, "loss": 0.4313, "step": 2000 }, { "epoch": 1.71, "learning_rate": 4.776435045317221e-06, "loss": 0.5098, "step": 2100 }, { "epoch": 1.79, "learning_rate": 4.4743202416918435e-06, "loss": 0.4699, "step": 2200 }, { "epoch": 1.88, "learning_rate": 4.172205438066466e-06, "loss": 0.4408, "step": 2300 }, { "epoch": 1.96, "learning_rate": 3.8700906344410875e-06, "loss": 0.5048, "step": 2400 }, { "epoch": 2.0, "eval_false_f1": 0.8201811125485123, "eval_loss": 0.4968700110912323, "eval_macro_f1": 0.684587518040316, "eval_micro_f1": 0.8179571663920923, "eval_mixture_f1": 0.43416370106761565, "eval_runtime": 39.4059, "eval_samples_per_second": 30.808, "eval_steps_per_second": 0.964, "eval_true_f1": 0.9125766871165646, "eval_unproven_f1": 0.5714285714285714, "step": 2452 }, { "epoch": 2.04, "learning_rate": 3.5679758308157103e-06, "loss": 0.378, "step": 2500 }, { "epoch": 2.12, "learning_rate": 3.2658610271903322e-06, "loss": 0.3631, "step": 2600 }, { "epoch": 2.2, "learning_rate": 2.963746223564955e-06, "loss": 0.3475, "step": 2700 }, { "epoch": 2.28, "learning_rate": 2.661631419939577e-06, "loss": 0.3283, "step": 2800 }, { "epoch": 2.37, "learning_rate": 2.3595166163142e-06, "loss": 0.317, "step": 2900 }, { "epoch": 2.45, "learning_rate": 2.0604229607250755e-06, "loss": 0.3541, "step": 3000 }, { "epoch": 2.53, "learning_rate": 1.758308157099698e-06, "loss": 0.3818, "step": 3100 }, { "epoch": 2.61, "learning_rate": 1.4561933534743203e-06, "loss": 0.3467, "step": 3200 }, { "epoch": 2.69, "learning_rate": 1.1540785498489427e-06, "loss": 0.3048, "step": 3300 }, { "epoch": 2.77, "learning_rate": 8.51963746223565e-07, "loss": 0.3334, "step": 3400 }, { "epoch": 2.85, "learning_rate": 5.498489425981874e-07, "loss": 0.3855, "step": 3500 }, { "epoch": 2.94, "learning_rate": 2.477341389728097e-07, "loss": 0.3454, "step": 3600 }, { "epoch": 3.0, "eval_false_f1": 0.811443433029909, "eval_loss": 0.5863622426986694, "eval_macro_f1": 0.6874160790583576, "eval_micro_f1": 0.8130148270181219, "eval_mixture_f1": 0.4556962025316456, "eval_runtime": 39.6082, "eval_samples_per_second": 30.65, "eval_steps_per_second": 0.959, "eval_true_f1": 0.9153605015673981, "eval_unproven_f1": 0.5671641791044776, "step": 3678 }, { "epoch": 3.0, "step": 3678, "total_flos": 2.106512041918464e+16, "train_loss": 0.5249485609128204, "train_runtime": 3291.1672, "train_samples_per_second": 8.937, "train_steps_per_second": 1.118 } ], "max_steps": 3678, "num_train_epochs": 3, "total_flos": 2.106512041918464e+16, "trial_name": null, "trial_params": null }