{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.705079078674316, "learning_rate": 4.75e-05, "loss": 0.5593, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7268170426065163, "eval_f1": 0.6589395923680764, "eval_loss": 0.5025668144226074, "eval_precision": 0.6658409387222947, "eval_recall": 0.6542098563375159, "eval_runtime": 5.1146, "eval_samples_per_second": 78.012, "eval_steps_per_second": 9.776, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.789808988571167, "learning_rate": 4.5e-05, "loss": 0.4995, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7543859649122807, "eval_f1": 0.7226336397684712, "eval_loss": 0.47967690229415894, "eval_precision": 0.7148526077097506, "eval_recall": 0.7412256773958902, "eval_runtime": 5.0589, "eval_samples_per_second": 78.87, "eval_steps_per_second": 9.883, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.145122051239014, "learning_rate": 4.25e-05, "loss": 0.4612, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7644110275689223, "eval_f1": 0.7262335766423358, "eval_loss": 0.4281724691390991, "eval_precision": 0.7199248120300752, "eval_recall": 0.7358156028368794, "eval_runtime": 5.0857, "eval_samples_per_second": 78.455, "eval_steps_per_second": 9.831, "step": 366 }, { "epoch": 4.0, "grad_norm": 3.555657386779785, "learning_rate": 4e-05, "loss": 0.4019, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8295739348370927, "eval_f1": 0.7933776044839771, "eval_loss": 0.3933873772621155, "eval_precision": 0.7949020208205757, "eval_recall": 0.7919167121294781, "eval_runtime": 5.0576, "eval_samples_per_second": 78.891, "eval_steps_per_second": 9.886, "step": 488 }, { "epoch": 5.0, "grad_norm": 1.5041733980178833, "learning_rate": 3.7500000000000003e-05, "loss": 0.3665, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.7969924812030075, "eval_f1": 0.772043420300895, "eval_loss": 0.42343708872795105, "eval_precision": 0.7618072289156627, "eval_recall": 0.7963720676486634, "eval_runtime": 5.0851, "eval_samples_per_second": 78.464, "eval_steps_per_second": 9.833, "step": 610 }, { "epoch": 6.0, "grad_norm": 1.9622180461883545, "learning_rate": 3.5e-05, "loss": 0.334, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8195488721804511, "eval_f1": 0.788441145281018, "eval_loss": 0.3723360300064087, "eval_precision": 0.7816537467700257, "eval_recall": 0.7973267866884888, "eval_runtime": 5.0586, "eval_samples_per_second": 78.875, "eval_steps_per_second": 9.884, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.7456966042518616, "learning_rate": 3.2500000000000004e-05, "loss": 0.3263, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8345864661654135, "eval_f1": 0.8086080586080586, "eval_loss": 0.37042734026908875, "eval_precision": 0.7989898989898989, "eval_recall": 0.8229678123295144, "eval_runtime": 5.0772, "eval_samples_per_second": 78.586, "eval_steps_per_second": 9.848, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.445054531097412, "learning_rate": 3e-05, "loss": 0.3076, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8160386984618873, "eval_loss": 0.352139413356781, "eval_precision": 0.8152632848784607, "eval_recall": 0.8168303327877796, "eval_runtime": 5.0512, "eval_samples_per_second": 78.991, "eval_steps_per_second": 9.899, "step": 976 }, { "epoch": 9.0, "grad_norm": 5.338367462158203, "learning_rate": 2.7500000000000004e-05, "loss": 0.298, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8186863532409097, "eval_loss": 0.35223379731178284, "eval_precision": 0.8138123167155425, "eval_recall": 0.8243316966721222, "eval_runtime": 5.0482, "eval_samples_per_second": 79.038, "eval_steps_per_second": 9.904, "step": 1098 }, { "epoch": 10.0, "grad_norm": 6.702072620391846, "learning_rate": 2.5e-05, "loss": 0.2923, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8263588263588264, "eval_loss": 0.3374755382537842, "eval_precision": 0.8289473684210527, "eval_recall": 0.8239225313693399, "eval_runtime": 5.0805, "eval_samples_per_second": 78.536, "eval_steps_per_second": 9.842, "step": 1220 }, { "epoch": 11.0, "grad_norm": 6.563529968261719, "learning_rate": 2.25e-05, "loss": 0.2689, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8357422474382676, "eval_loss": 0.3392201364040375, "eval_precision": 0.8319228265372551, "eval_recall": 0.8399709038006911, "eval_runtime": 5.0499, "eval_samples_per_second": 79.012, "eval_steps_per_second": 9.901, "step": 1342 }, { "epoch": 12.0, "grad_norm": 14.090389251708984, "learning_rate": 2e-05, "loss": 0.2686, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8372817261706151, "eval_loss": 0.34843868017196655, "eval_precision": 0.8308913308913308, "eval_recall": 0.8449718130569195, "eval_runtime": 5.0635, "eval_samples_per_second": 78.799, "eval_steps_per_second": 9.875, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.997685432434082, "learning_rate": 1.75e-05, "loss": 0.2726, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8298403801632752, "eval_loss": 0.3257535398006439, "eval_precision": 0.8315523576240049, "eval_recall": 0.8281960356428442, "eval_runtime": 5.0532, "eval_samples_per_second": 78.96, "eval_steps_per_second": 9.895, "step": 1586 }, { "epoch": 14.0, "grad_norm": 10.811493873596191, "learning_rate": 1.5e-05, "loss": 0.2713, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8341332527115377, "eval_loss": 0.324627548456192, "eval_precision": 0.8333132275770553, "eval_recall": 0.8349699945444626, "eval_runtime": 5.0517, "eval_samples_per_second": 78.983, "eval_steps_per_second": 9.898, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.337926983833313, "learning_rate": 1.25e-05, "loss": 0.2577, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8323529411764705, "eval_loss": 0.3306790590286255, "eval_precision": 0.8292704679231822, "eval_recall": 0.8356973995271868, "eval_runtime": 5.089, "eval_samples_per_second": 78.404, "eval_steps_per_second": 9.825, "step": 1830 }, { "epoch": 16.0, "grad_norm": 6.205774784088135, "learning_rate": 1e-05, "loss": 0.2519, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8365204824303285, "eval_loss": 0.3305480182170868, "eval_precision": 0.8313636363636363, "eval_recall": 0.8424713584288053, "eval_runtime": 5.0659, "eval_samples_per_second": 78.761, "eval_steps_per_second": 9.87, "step": 1952 }, { "epoch": 17.0, "grad_norm": 7.098486423492432, "learning_rate": 7.5e-06, "loss": 0.2488, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8246499363520641, "eval_loss": 0.3233925998210907, "eval_precision": 0.8246499363520641, "eval_recall": 0.8246499363520641, "eval_runtime": 5.0647, "eval_samples_per_second": 78.781, "eval_steps_per_second": 9.872, "step": 2074 }, { "epoch": 18.0, "grad_norm": 4.508778095245361, "learning_rate": 5e-06, "loss": 0.2546, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8391129032258065, "eval_loss": 0.32470664381980896, "eval_precision": 0.8345705196182396, "eval_recall": 0.8442444080741953, "eval_runtime": 5.0482, "eval_samples_per_second": 79.038, "eval_steps_per_second": 9.904, "step": 2196 }, { "epoch": 19.0, "grad_norm": 3.409043073654175, "learning_rate": 2.5e-06, "loss": 0.2463, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8306964902709584, "eval_loss": 0.3204318881034851, "eval_precision": 0.8306964902709584, "eval_recall": 0.8306964902709584, "eval_runtime": 5.0518, "eval_samples_per_second": 78.982, "eval_steps_per_second": 9.897, "step": 2318 }, { "epoch": 20.0, "grad_norm": 8.363502502441406, "learning_rate": 0.0, "loss": 0.2458, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8349466368826011, "eval_loss": 0.32170674204826355, "eval_precision": 0.8325716845878136, "eval_recall": 0.8374704491725768, "eval_runtime": 5.054, "eval_samples_per_second": 78.947, "eval_steps_per_second": 9.893, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7609911792720000.0, "train_loss": 0.3216621422376789, "train_runtime": 1955.2061, "train_samples_per_second": 37.213, "train_steps_per_second": 1.248 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7609911792720000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }