{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.120229244232178, "learning_rate": 4.75e-05, "loss": 0.5609, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7192982456140351, "eval_f1": 0.6543102914784331, "eval_loss": 0.5086308717727661, "eval_precision": 0.6580196140461879, "eval_recall": 0.6513911620294599, "eval_runtime": 5.141, "eval_samples_per_second": 77.611, "eval_steps_per_second": 9.726, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.929361581802368, "learning_rate": 4.5e-05, "loss": 0.4986, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7493734335839599, "eval_f1": 0.7201178451178452, "eval_loss": 0.485486775636673, "eval_precision": 0.7127371273712737, "eval_recall": 0.7426804873613384, "eval_runtime": 5.0623, "eval_samples_per_second": 78.818, "eval_steps_per_second": 9.877, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.046614646911621, "learning_rate": 4.25e-05, "loss": 0.4593, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7694235588972431, "eval_f1": 0.7308797653958945, "eval_loss": 0.42381197214126587, "eval_precision": 0.7249487296342714, "eval_recall": 0.7393617021276595, "eval_runtime": 5.0583, "eval_samples_per_second": 78.88, "eval_steps_per_second": 9.885, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.3827860355377197, "learning_rate": 4e-05, "loss": 0.3957, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8070175438596491, "eval_f1": 0.7700391464135747, "eval_loss": 0.3916189670562744, "eval_precision": 0.7669852636562704, "eval_recall": 0.7734588106928533, "eval_runtime": 5.0518, "eval_samples_per_second": 78.981, "eval_steps_per_second": 9.897, "step": 488 }, { "epoch": 5.0, "grad_norm": 1.527370810508728, "learning_rate": 3.7500000000000003e-05, "loss": 0.3658, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.7994987468671679, "eval_f1": 0.7744360902255639, "eval_loss": 0.4266420900821686, "eval_precision": 0.7640805369127517, "eval_recall": 0.7981451172940535, "eval_runtime": 5.0724, "eval_samples_per_second": 78.661, "eval_steps_per_second": 9.857, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.4858503341674805, "learning_rate": 3.5e-05, "loss": 0.3345, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8049369344976196, "eval_loss": 0.36663180589675903, "eval_precision": 0.8027777777777778, "eval_recall": 0.8072376795781051, "eval_runtime": 5.0564, "eval_samples_per_second": 78.91, "eval_steps_per_second": 9.888, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.170096516609192, "learning_rate": 3.2500000000000004e-05, "loss": 0.3237, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.8136058394160584, "eval_loss": 0.3713766634464264, "eval_precision": 0.8045112781954887, "eval_recall": 0.8265139116202946, "eval_runtime": 5.0626, "eval_samples_per_second": 78.813, "eval_steps_per_second": 9.876, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.0918803215026855, "learning_rate": 3e-05, "loss": 0.304, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8421052631578947, "eval_f1": 0.8118502107020158, "eval_loss": 0.35368239879608154, "eval_precision": 0.8083091673078061, "eval_recall": 0.8157846881251136, "eval_runtime": 5.0605, "eval_samples_per_second": 78.847, "eval_steps_per_second": 9.881, "step": 976 }, { "epoch": 9.0, "grad_norm": 5.677598476409912, "learning_rate": 2.7500000000000004e-05, "loss": 0.3027, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8446115288220551, "eval_f1": 0.8152777777777778, "eval_loss": 0.3530685007572174, "eval_precision": 0.8110639802050195, "eval_recall": 0.8200581923986179, "eval_runtime": 5.1095, "eval_samples_per_second": 78.09, "eval_steps_per_second": 9.786, "step": 1098 }, { "epoch": 10.0, "grad_norm": 6.779963493347168, "learning_rate": 2.5e-05, "loss": 0.2962, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.8211781685593832, "eval_loss": 0.33822622895240784, "eval_precision": 0.8219964664310955, "eval_recall": 0.8203764320785598, "eval_runtime": 5.0712, "eval_samples_per_second": 78.679, "eval_steps_per_second": 9.86, "step": 1220 }, { "epoch": 11.0, "grad_norm": 4.037176132202148, "learning_rate": 2.25e-05, "loss": 0.2721, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8228975557791324, "eval_loss": 0.3489574193954468, "eval_precision": 0.8162488420565077, "eval_recall": 0.8311056555737406, "eval_runtime": 5.0699, "eval_samples_per_second": 78.7, "eval_steps_per_second": 9.862, "step": 1342 }, { "epoch": 12.0, "grad_norm": 10.375617027282715, "learning_rate": 2e-05, "loss": 0.2693, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8288009705864946, "eval_loss": 0.35016030073165894, "eval_precision": 0.822024085224641, "eval_recall": 0.837152209492635, "eval_runtime": 5.0628, "eval_samples_per_second": 78.81, "eval_steps_per_second": 9.876, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.8974263668060303, "learning_rate": 1.75e-05, "loss": 0.2745, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8263588263588264, "eval_loss": 0.3283708095550537, "eval_precision": 0.8289473684210527, "eval_recall": 0.8239225313693399, "eval_runtime": 5.0658, "eval_samples_per_second": 78.763, "eval_steps_per_second": 9.87, "step": 1586 }, { "epoch": 14.0, "grad_norm": 11.152755737304688, "learning_rate": 1.5e-05, "loss": 0.2712, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8315338681464504, "eval_loss": 0.3297080397605896, "eval_precision": 0.8299369747899159, "eval_recall": 0.8331969448990726, "eval_runtime": 5.0889, "eval_samples_per_second": 78.405, "eval_steps_per_second": 9.825, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.1235237121582031, "learning_rate": 1.25e-05, "loss": 0.256, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8391129032258065, "eval_loss": 0.3356616199016571, "eval_precision": 0.8345705196182396, "eval_recall": 0.8442444080741953, "eval_runtime": 5.0749, "eval_samples_per_second": 78.622, "eval_steps_per_second": 9.852, "step": 1830 }, { "epoch": 16.0, "grad_norm": 6.625916004180908, "learning_rate": 1e-05, "loss": 0.2504, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8305757727005222, "eval_loss": 0.3345828056335449, "eval_precision": 0.8255131964809383, "eval_recall": 0.8364248045099109, "eval_runtime": 5.0909, "eval_samples_per_second": 78.375, "eval_steps_per_second": 9.821, "step": 1952 }, { "epoch": 17.0, "grad_norm": 7.8184356689453125, "learning_rate": 7.5e-06, "loss": 0.2487, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8272399255573702, "eval_loss": 0.3242071270942688, "eval_precision": 0.8280735957109784, "eval_recall": 0.8264229859974541, "eval_runtime": 5.0576, "eval_samples_per_second": 78.891, "eval_steps_per_second": 9.886, "step": 2074 }, { "epoch": 18.0, "grad_norm": 4.3523454666137695, "learning_rate": 5e-06, "loss": 0.2514, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8365204824303285, "eval_loss": 0.33086374402046204, "eval_precision": 0.8313636363636363, "eval_recall": 0.8424713584288053, "eval_runtime": 5.0593, "eval_samples_per_second": 78.865, "eval_steps_per_second": 9.883, "step": 2196 }, { "epoch": 19.0, "grad_norm": 3.3547356128692627, "learning_rate": 2.5e-06, "loss": 0.2451, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8341332527115377, "eval_loss": 0.32434186339378357, "eval_precision": 0.8333132275770553, "eval_recall": 0.8349699945444626, "eval_runtime": 5.0573, "eval_samples_per_second": 78.897, "eval_steps_per_second": 9.887, "step": 2318 }, { "epoch": 20.0, "grad_norm": 9.069262504577637, "learning_rate": 0.0, "loss": 0.2461, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8357422474382676, "eval_loss": 0.3260349631309509, "eval_precision": 0.8319228265372551, "eval_recall": 0.8399709038006911, "eval_runtime": 5.0734, "eval_samples_per_second": 78.646, "eval_steps_per_second": 9.855, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7609911792720000.0, "train_loss": 0.3213141730574311, "train_runtime": 1953.4168, "train_samples_per_second": 37.248, "train_steps_per_second": 1.249 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7609911792720000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }