{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.088006496429443, "learning_rate": 4.75e-05, "loss": 0.5634, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7192982456140351, "eval_f1": 0.6524390243902439, "eval_loss": 0.5107927322387695, "eval_precision": 0.6572301881961337, "eval_recall": 0.6488907074013457, "eval_runtime": 5.0968, "eval_samples_per_second": 78.284, "eval_steps_per_second": 9.81, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.8167991638183594, "learning_rate": 4.5e-05, "loss": 0.5081, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7218045112781954, "eval_f1": 0.6887653635603403, "eval_loss": 0.5049471855163574, "eval_precision": 0.6829453441295547, "eval_recall": 0.7081742134933624, "eval_runtime": 5.0562, "eval_samples_per_second": 78.913, "eval_steps_per_second": 9.889, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.013192176818848, "learning_rate": 4.25e-05, "loss": 0.4924, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7493734335839599, "eval_f1": 0.6976723040552828, "eval_loss": 0.46672317385673523, "eval_precision": 0.6976723040552828, "eval_recall": 0.6976723040552828, "eval_runtime": 5.0806, "eval_samples_per_second": 78.535, "eval_steps_per_second": 9.841, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.717806100845337, "learning_rate": 4e-05, "loss": 0.4698, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.7794486215538847, "eval_f1": 0.7206949217258496, "eval_loss": 0.43917685747146606, "eval_precision": 0.7348989898989899, "eval_recall": 0.711447535915621, "eval_runtime": 5.1289, "eval_samples_per_second": 77.794, "eval_steps_per_second": 9.749, "step": 488 }, { "epoch": 5.0, "grad_norm": 2.4102962017059326, "learning_rate": 3.7500000000000003e-05, "loss": 0.4519, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.7468671679197995, "eval_f1": 0.7226057806810438, "eval_loss": 0.4547964334487915, "eval_precision": 0.7169434353918007, "eval_recall": 0.7534097108565194, "eval_runtime": 5.1149, "eval_samples_per_second": 78.007, "eval_steps_per_second": 9.775, "step": 610 }, { "epoch": 6.0, "grad_norm": 1.93445885181427, "learning_rate": 3.5e-05, "loss": 0.4356, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8145363408521303, "eval_f1": 0.7739895897121861, "eval_loss": 0.41105952858924866, "eval_precision": 0.7769509251810136, "eval_recall": 0.7712765957446808, "eval_runtime": 5.085, "eval_samples_per_second": 78.466, "eval_steps_per_second": 9.833, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.6042848825454712, "learning_rate": 3.2500000000000004e-05, "loss": 0.421, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.7944862155388471, "eval_f1": 0.7611824817518249, "eval_loss": 0.41012144088745117, "eval_precision": 0.7537593984962405, "eval_recall": 0.7720949263502455, "eval_runtime": 5.0805, "eval_samples_per_second": 78.536, "eval_steps_per_second": 9.842, "step": 854 }, { "epoch": 8.0, "grad_norm": 18.006345748901367, "learning_rate": 3e-05, "loss": 0.4039, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8295739348370927, "eval_f1": 0.7933776044839771, "eval_loss": 0.38294023275375366, "eval_precision": 0.7949020208205757, "eval_recall": 0.7919167121294781, "eval_runtime": 5.064, "eval_samples_per_second": 78.791, "eval_steps_per_second": 9.874, "step": 976 }, { "epoch": 9.0, "grad_norm": 4.092437744140625, "learning_rate": 2.7500000000000004e-05, "loss": 0.3887, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8320802005012531, "eval_f1": 0.7979441442122369, "eval_loss": 0.3800281286239624, "eval_precision": 0.7972133421798662, "eval_recall": 0.7986906710310966, "eval_runtime": 5.0646, "eval_samples_per_second": 78.782, "eval_steps_per_second": 9.872, "step": 1098 }, { "epoch": 10.0, "grad_norm": 4.934494972229004, "learning_rate": 2.5e-05, "loss": 0.3797, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8019881353214686, "eval_loss": 0.37680280208587646, "eval_precision": 0.8043859649122806, "eval_recall": 0.7997363156937625, "eval_runtime": 5.0723, "eval_samples_per_second": 78.663, "eval_steps_per_second": 9.857, "step": 1220 }, { "epoch": 11.0, "grad_norm": 3.9598286151885986, "learning_rate": 2.25e-05, "loss": 0.368, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8220551378446115, "eval_f1": 0.791846991484015, "eval_loss": 0.38417863845825195, "eval_precision": 0.7845581927366314, "eval_recall": 0.801600290961993, "eval_runtime": 5.0582, "eval_samples_per_second": 78.882, "eval_steps_per_second": 9.885, "step": 1342 }, { "epoch": 12.0, "grad_norm": 11.679308891296387, "learning_rate": 2e-05, "loss": 0.3598, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8270676691729323, "eval_f1": 0.7967966933608887, "eval_loss": 0.37781035900115967, "eval_precision": 0.7902444649446494, "eval_recall": 0.8051463902527732, "eval_runtime": 5.1525, "eval_samples_per_second": 77.438, "eval_steps_per_second": 9.704, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.9093892574310303, "learning_rate": 1.75e-05, "loss": 0.3548, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8141734808401475, "eval_loss": 0.3623768091201782, "eval_precision": 0.8166666666666667, "eval_recall": 0.8118294235315512, "eval_runtime": 5.0613, "eval_samples_per_second": 78.833, "eval_steps_per_second": 9.879, "step": 1586 }, { "epoch": 14.0, "grad_norm": 8.159423828125, "learning_rate": 1.5e-05, "loss": 0.3469, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8446115288220551, "eval_f1": 0.8134839254478557, "eval_loss": 0.36370429396629333, "eval_precision": 0.8119747899159664, "eval_recall": 0.8150572831423895, "eval_runtime": 5.06, "eval_samples_per_second": 78.854, "eval_steps_per_second": 9.881, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.1732066869735718, "learning_rate": 1.25e-05, "loss": 0.3431, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.81021463820679, "eval_loss": 0.3684653341770172, "eval_precision": 0.8049051094890511, "eval_recall": 0.8165120931078378, "eval_runtime": 5.0473, "eval_samples_per_second": 79.053, "eval_steps_per_second": 9.906, "step": 1830 }, { "epoch": 16.0, "grad_norm": 6.608555316925049, "learning_rate": 1e-05, "loss": 0.3275, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8085765951950401, "eval_loss": 0.3663918673992157, "eval_precision": 0.8017470018450185, "eval_recall": 0.817239498090562, "eval_runtime": 5.098, "eval_samples_per_second": 78.267, "eval_steps_per_second": 9.808, "step": 1952 }, { "epoch": 17.0, "grad_norm": 3.14452862739563, "learning_rate": 7.5e-06, "loss": 0.3288, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.8084033613445378, "eval_loss": 0.359037846326828, "eval_precision": 0.8055472976990309, "eval_recall": 0.8115111838516094, "eval_runtime": 5.0473, "eval_samples_per_second": 79.052, "eval_steps_per_second": 9.906, "step": 2074 }, { "epoch": 18.0, "grad_norm": 2.8248753547668457, "learning_rate": 5e-06, "loss": 0.3335, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8186863532409097, "eval_loss": 0.36074599623680115, "eval_precision": 0.8138123167155425, "eval_recall": 0.8243316966721222, "eval_runtime": 5.0588, "eval_samples_per_second": 78.872, "eval_steps_per_second": 9.884, "step": 2196 }, { "epoch": 19.0, "grad_norm": 4.17168664932251, "learning_rate": 2.5e-06, "loss": 0.3239, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8446115288220551, "eval_f1": 0.8161454307628278, "eval_loss": 0.36129772663116455, "eval_precision": 0.8107299270072992, "eval_recall": 0.8225586470267321, "eval_runtime": 5.0523, "eval_samples_per_second": 78.975, "eval_steps_per_second": 9.897, "step": 2318 }, { "epoch": 20.0, "grad_norm": 6.108452796936035, "learning_rate": 0.0, "loss": 0.327, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8186863532409097, "eval_loss": 0.3607771396636963, "eval_precision": 0.8138123167155425, "eval_recall": 0.8243316966721222, "eval_runtime": 5.1353, "eval_samples_per_second": 77.698, "eval_steps_per_second": 9.737, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7590599775312000.0, "train_loss": 0.39638843536376955, "train_runtime": 1955.7644, "train_samples_per_second": 37.203, "train_steps_per_second": 1.248 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7590599775312000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }