{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.088006496429443, "learning_rate": 4.75e-05, "loss": 0.5634, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7192982456140351, "eval_f1": 0.6524390243902439, "eval_loss": 0.5107927322387695, "eval_precision": 0.6572301881961337, "eval_recall": 0.6488907074013457, "eval_runtime": 5.1539, "eval_samples_per_second": 77.417, "eval_steps_per_second": 9.701, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.8167991638183594, "learning_rate": 4.5e-05, "loss": 0.5081, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7218045112781954, "eval_f1": 0.6887653635603403, "eval_loss": 0.5049471855163574, "eval_precision": 0.6829453441295547, "eval_recall": 0.7081742134933624, "eval_runtime": 5.0516, "eval_samples_per_second": 78.984, "eval_steps_per_second": 9.898, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.013192176818848, "learning_rate": 4.25e-05, "loss": 0.4924, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7493734335839599, "eval_f1": 0.6976723040552828, "eval_loss": 0.46672317385673523, "eval_precision": 0.6976723040552828, "eval_recall": 0.6976723040552828, "eval_runtime": 5.1076, "eval_samples_per_second": 78.118, "eval_steps_per_second": 9.789, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.717806100845337, "learning_rate": 4e-05, "loss": 0.4698, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.7794486215538847, "eval_f1": 0.7206949217258496, "eval_loss": 0.43917685747146606, "eval_precision": 0.7348989898989899, "eval_recall": 0.711447535915621, "eval_runtime": 5.0525, "eval_samples_per_second": 78.971, "eval_steps_per_second": 9.896, "step": 488 }, { "epoch": 5.0, "grad_norm": 2.4102962017059326, "learning_rate": 3.7500000000000003e-05, "loss": 0.4519, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.7468671679197995, "eval_f1": 0.7226057806810438, "eval_loss": 0.4547964334487915, "eval_precision": 0.7169434353918007, "eval_recall": 0.7534097108565194, "eval_runtime": 5.065, "eval_samples_per_second": 78.775, "eval_steps_per_second": 9.872, "step": 610 }, { "epoch": 6.0, "grad_norm": 1.93445885181427, "learning_rate": 3.5e-05, "loss": 0.4356, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8145363408521303, "eval_f1": 0.7739895897121861, "eval_loss": 0.41105952858924866, "eval_precision": 0.7769509251810136, "eval_recall": 0.7712765957446808, "eval_runtime": 5.0583, "eval_samples_per_second": 78.881, "eval_steps_per_second": 9.885, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.6042848825454712, "learning_rate": 3.2500000000000004e-05, "loss": 0.421, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.7944862155388471, "eval_f1": 0.7611824817518249, "eval_loss": 0.41012144088745117, "eval_precision": 0.7537593984962405, "eval_recall": 0.7720949263502455, "eval_runtime": 5.101, "eval_samples_per_second": 78.22, "eval_steps_per_second": 9.802, "step": 854 }, { "epoch": 8.0, "grad_norm": 18.006345748901367, "learning_rate": 3e-05, "loss": 0.4039, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8295739348370927, "eval_f1": 0.7933776044839771, "eval_loss": 0.38294023275375366, "eval_precision": 0.7949020208205757, "eval_recall": 0.7919167121294781, "eval_runtime": 5.0594, "eval_samples_per_second": 78.863, "eval_steps_per_second": 9.883, "step": 976 }, { "epoch": 9.0, "grad_norm": 4.092437744140625, "learning_rate": 2.7500000000000004e-05, "loss": 0.3887, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8320802005012531, "eval_f1": 0.7979441442122369, "eval_loss": 0.3800281286239624, "eval_precision": 0.7972133421798662, "eval_recall": 0.7986906710310966, "eval_runtime": 5.0761, "eval_samples_per_second": 78.603, "eval_steps_per_second": 9.85, "step": 1098 }, { "epoch": 10.0, "grad_norm": 4.934494972229004, "learning_rate": 2.5e-05, "loss": 0.3797, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8019881353214686, "eval_loss": 0.37680280208587646, "eval_precision": 0.8043859649122806, "eval_recall": 0.7997363156937625, "eval_runtime": 5.0643, "eval_samples_per_second": 78.786, "eval_steps_per_second": 9.873, "step": 1220 }, { "epoch": 11.0, "grad_norm": 3.9598286151885986, "learning_rate": 2.25e-05, "loss": 0.368, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8220551378446115, "eval_f1": 0.791846991484015, "eval_loss": 0.38417863845825195, "eval_precision": 0.7845581927366314, "eval_recall": 0.801600290961993, "eval_runtime": 5.0669, "eval_samples_per_second": 78.746, "eval_steps_per_second": 9.868, "step": 1342 }, { "epoch": 12.0, "grad_norm": 11.679308891296387, "learning_rate": 2e-05, "loss": 0.3598, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8270676691729323, "eval_f1": 0.7967966933608887, "eval_loss": 0.37781035900115967, "eval_precision": 0.7902444649446494, "eval_recall": 0.8051463902527732, "eval_runtime": 5.1021, "eval_samples_per_second": 78.204, "eval_steps_per_second": 9.8, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.9093892574310303, "learning_rate": 1.75e-05, "loss": 0.3548, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8141734808401475, "eval_loss": 0.3623768091201782, "eval_precision": 0.8166666666666667, "eval_recall": 0.8118294235315512, "eval_runtime": 5.0684, "eval_samples_per_second": 78.722, "eval_steps_per_second": 9.865, "step": 1586 }, { "epoch": 14.0, "grad_norm": 8.159423828125, "learning_rate": 1.5e-05, "loss": 0.3469, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8446115288220551, "eval_f1": 0.8134839254478557, "eval_loss": 0.36370429396629333, "eval_precision": 0.8119747899159664, "eval_recall": 0.8150572831423895, "eval_runtime": 5.0772, "eval_samples_per_second": 78.587, "eval_steps_per_second": 9.848, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.1732066869735718, "learning_rate": 1.25e-05, "loss": 0.3431, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.81021463820679, "eval_loss": 0.3684653341770172, "eval_precision": 0.8049051094890511, "eval_recall": 0.8165120931078378, "eval_runtime": 5.1409, "eval_samples_per_second": 77.613, "eval_steps_per_second": 9.726, "step": 1830 }, { "epoch": 16.0, "grad_norm": 6.608555316925049, "learning_rate": 1e-05, "loss": 0.3275, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8085765951950401, "eval_loss": 0.3663918673992157, "eval_precision": 0.8017470018450185, "eval_recall": 0.817239498090562, "eval_runtime": 5.0611, "eval_samples_per_second": 78.837, "eval_steps_per_second": 9.879, "step": 1952 }, { "epoch": 17.0, "grad_norm": 3.14452862739563, "learning_rate": 7.5e-06, "loss": 0.3288, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.8084033613445378, "eval_loss": 0.359037846326828, "eval_precision": 0.8055472976990309, "eval_recall": 0.8115111838516094, "eval_runtime": 5.0814, "eval_samples_per_second": 78.522, "eval_steps_per_second": 9.84, "step": 2074 }, { "epoch": 18.0, "grad_norm": 2.8248753547668457, "learning_rate": 5e-06, "loss": 0.3335, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8186863532409097, "eval_loss": 0.36074599623680115, "eval_precision": 0.8138123167155425, "eval_recall": 0.8243316966721222, "eval_runtime": 5.0558, "eval_samples_per_second": 78.92, "eval_steps_per_second": 9.89, "step": 2196 }, { "epoch": 19.0, "grad_norm": 4.17168664932251, "learning_rate": 2.5e-06, "loss": 0.3239, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8446115288220551, "eval_f1": 0.8161454307628278, "eval_loss": 0.36129772663116455, "eval_precision": 0.8107299270072992, "eval_recall": 0.8225586470267321, "eval_runtime": 5.0809, "eval_samples_per_second": 78.529, "eval_steps_per_second": 9.841, "step": 2318 }, { "epoch": 20.0, "grad_norm": 6.108452796936035, "learning_rate": 0.0, "loss": 0.327, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8186863532409097, "eval_loss": 0.3607771396636963, "eval_precision": 0.8138123167155425, "eval_recall": 0.8243316966721222, "eval_runtime": 5.0496, "eval_samples_per_second": 79.017, "eval_steps_per_second": 9.902, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7590599775312000.0, "train_loss": 0.39638843536376955, "train_runtime": 1954.0939, "train_samples_per_second": 37.235, "train_steps_per_second": 1.249 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7590599775312000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }