{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.091732025146484, "learning_rate": 4.75e-05, "loss": 0.5658, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7268170426065163, "eval_f1": 0.6550441396924102, "eval_loss": 0.519511878490448, "eval_precision": 0.6646488294314381, "eval_recall": 0.6492089470812875, "eval_runtime": 5.1668, "eval_samples_per_second": 77.224, "eval_steps_per_second": 9.677, "step": 122 }, { "epoch": 2.0, "grad_norm": 4.045337200164795, "learning_rate": 4.5e-05, "loss": 0.5125, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7293233082706767, "eval_f1": 0.6854598540145985, "eval_loss": 0.5060133337974548, "eval_precision": 0.6804511278195489, "eval_recall": 0.6934897254046191, "eval_runtime": 5.0798, "eval_samples_per_second": 78.546, "eval_steps_per_second": 9.843, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.02074670791626, "learning_rate": 4.25e-05, "loss": 0.4809, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7669172932330827, "eval_f1": 0.7166907166907166, "eval_loss": 0.46860969066619873, "eval_precision": 0.718421052631579, "eval_recall": 0.7150845608292418, "eval_runtime": 5.0663, "eval_samples_per_second": 78.755, "eval_steps_per_second": 9.869, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.456829786300659, "learning_rate": 4e-05, "loss": 0.4353, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.7919799498746867, "eval_f1": 0.7417274322480016, "eval_loss": 0.4295312464237213, "eval_precision": 0.7499839010882864, "eval_recall": 0.7353155119112567, "eval_runtime": 5.0682, "eval_samples_per_second": 78.726, "eval_steps_per_second": 9.865, "step": 488 }, { "epoch": 5.0, "grad_norm": 3.0160815715789795, "learning_rate": 3.7500000000000003e-05, "loss": 0.4116, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8020050125313283, "eval_f1": 0.7713964535663778, "eval_loss": 0.4171212613582611, "eval_precision": 0.7628187206441512, "eval_recall": 0.7849154391707583, "eval_runtime": 5.0604, "eval_samples_per_second": 78.848, "eval_steps_per_second": 9.881, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.68072509765625, "learning_rate": 3.5e-05, "loss": 0.3809, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8446115288220551, "eval_f1": 0.8096491228070175, "eval_loss": 0.38647741079330444, "eval_precision": 0.8148460960960962, "eval_recall": 0.8050554646299327, "eval_runtime": 5.0923, "eval_samples_per_second": 78.353, "eval_steps_per_second": 9.819, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.0169405937194824, "learning_rate": 3.2500000000000004e-05, "loss": 0.3681, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8176861216035092, "eval_loss": 0.36971357464790344, "eval_precision": 0.8193355786895284, "eval_recall": 0.8161029278050556, "eval_runtime": 5.0477, "eval_samples_per_second": 79.046, "eval_steps_per_second": 9.906, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.219416618347168, "learning_rate": 3e-05, "loss": 0.3469, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8101852212906999, "eval_loss": 0.35537657141685486, "eval_precision": 0.820642024599137, "eval_recall": 0.8018276050190944, "eval_runtime": 5.1133, "eval_samples_per_second": 78.032, "eval_steps_per_second": 9.778, "step": 976 }, { "epoch": 9.0, "grad_norm": 5.871381759643555, "learning_rate": 2.7500000000000004e-05, "loss": 0.3455, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8157894736842105, "eval_loss": 0.3493554890155792, "eval_precision": 0.8210867117117118, "eval_recall": 0.8111020185488271, "eval_runtime": 5.0475, "eval_samples_per_second": 79.05, "eval_steps_per_second": 9.906, "step": 1098 }, { "epoch": 10.0, "grad_norm": 6.4938273429870605, "learning_rate": 2.5e-05, "loss": 0.3284, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8095647193585338, "eval_loss": 0.34365448355674744, "eval_precision": 0.8289393939393939, "eval_recall": 0.7960992907801419, "eval_runtime": 5.0626, "eval_samples_per_second": 78.812, "eval_steps_per_second": 9.876, "step": 1220 }, { "epoch": 11.0, "grad_norm": 4.7792277336120605, "learning_rate": 2.25e-05, "loss": 0.3132, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8242843661528783, "eval_loss": 0.3370875418186188, "eval_precision": 0.8389366308055628, "eval_recall": 0.8131933078741589, "eval_runtime": 5.063, "eval_samples_per_second": 78.808, "eval_steps_per_second": 9.876, "step": 1342 }, { "epoch": 12.0, "grad_norm": 11.339967727661133, "learning_rate": 2e-05, "loss": 0.3042, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8237632508833923, "eval_loss": 0.3370998501777649, "eval_precision": 0.8254439681567667, "eval_recall": 0.8221494817239499, "eval_runtime": 5.0498, "eval_samples_per_second": 79.013, "eval_steps_per_second": 9.901, "step": 1464 }, { "epoch": 13.0, "grad_norm": 3.7291290760040283, "learning_rate": 1.75e-05, "loss": 0.3063, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8232837709585574, "eval_loss": 0.331680566072464, "eval_precision": 0.8405874144461426, "eval_recall": 0.8106928532460447, "eval_runtime": 5.0673, "eval_samples_per_second": 78.74, "eval_steps_per_second": 9.867, "step": 1586 }, { "epoch": 14.0, "grad_norm": 9.840410232543945, "learning_rate": 1.5e-05, "loss": 0.3013, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8306935047100303, "eval_loss": 0.33042553067207336, "eval_precision": 0.8372758729160114, "eval_recall": 0.8249681760320058, "eval_runtime": 5.0637, "eval_samples_per_second": 78.796, "eval_steps_per_second": 9.874, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.2072865962982178, "learning_rate": 1.25e-05, "loss": 0.2928, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8289650949173301, "eval_loss": 0.3295370638370514, "eval_precision": 0.8325081997648369, "eval_recall": 0.82569558101473, "eval_runtime": 5.0536, "eval_samples_per_second": 78.954, "eval_steps_per_second": 9.894, "step": 1830 }, { "epoch": 16.0, "grad_norm": 7.374602317810059, "learning_rate": 1e-05, "loss": 0.2864, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8324514991181657, "eval_loss": 0.32842549681663513, "eval_precision": 0.8350877192982455, "eval_recall": 0.8299690852882342, "eval_runtime": 5.1067, "eval_samples_per_second": 78.133, "eval_steps_per_second": 9.791, "step": 1952 }, { "epoch": 17.0, "grad_norm": 5.933995723724365, "learning_rate": 7.5e-06, "loss": 0.2819, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8271551457392166, "eval_loss": 0.3254449963569641, "eval_precision": 0.8347358430876305, "eval_recall": 0.8206946717585015, "eval_runtime": 5.0579, "eval_samples_per_second": 78.886, "eval_steps_per_second": 9.886, "step": 2074 }, { "epoch": 18.0, "grad_norm": 4.023169994354248, "learning_rate": 5e-06, "loss": 0.2877, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8280701754385965, "eval_loss": 0.32487839460372925, "eval_precision": 0.833567942942943, "eval_recall": 0.8231951263866157, "eval_runtime": 5.0474, "eval_samples_per_second": 79.051, "eval_steps_per_second": 9.906, "step": 2196 }, { "epoch": 19.0, "grad_norm": 4.3332839012146, "learning_rate": 2.5e-06, "loss": 0.2819, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8333281762485303, "eval_loss": 0.3241012394428253, "eval_precision": 0.8410471369819678, "eval_recall": 0.8267412256773959, "eval_runtime": 5.0495, "eval_samples_per_second": 79.017, "eval_steps_per_second": 9.902, "step": 2318 }, { "epoch": 20.0, "grad_norm": 7.439182758331299, "learning_rate": 0.0, "loss": 0.2803, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8306935047100303, "eval_loss": 0.3238792419433594, "eval_precision": 0.8372758729160114, "eval_recall": 0.8249681760320058, "eval_runtime": 5.0469, "eval_samples_per_second": 79.058, "eval_steps_per_second": 9.907, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7597037114448000.0, "train_loss": 0.35560139046340694, "train_runtime": 1953.8843, "train_samples_per_second": 37.239, "train_steps_per_second": 1.249 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7597037114448000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }