{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.089293003082275, "learning_rate": 4.75e-05, "loss": 0.566, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7167919799498746, "eval_f1": 0.6444270944235455, "eval_loss": 0.5211467742919922, "eval_precision": 0.6520598138245197, "eval_recall": 0.639616293871613, "eval_runtime": 5.162, "eval_samples_per_second": 77.296, "eval_steps_per_second": 9.686, "step": 122 }, { "epoch": 2.0, "grad_norm": 4.061326026916504, "learning_rate": 4.5e-05, "loss": 0.5148, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7243107769423559, "eval_f1": 0.6849698535745048, "eval_loss": 0.516937255859375, "eval_precision": 0.6790780141843972, "eval_recall": 0.6974449899981815, "eval_runtime": 5.0571, "eval_samples_per_second": 78.899, "eval_steps_per_second": 9.887, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.013665199279785, "learning_rate": 4.25e-05, "loss": 0.4927, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7543859649122807, "eval_f1": 0.6942098348348349, "eval_loss": 0.48614969849586487, "eval_precision": 0.7017006802721089, "eval_recall": 0.6887161302054918, "eval_runtime": 5.055, "eval_samples_per_second": 78.931, "eval_steps_per_second": 9.891, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.659437417984009, "learning_rate": 4e-05, "loss": 0.4627, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.7619047619047619, "eval_f1": 0.7090966301102831, "eval_loss": 0.4655974805355072, "eval_precision": 0.7119991289198606, "eval_recall": 0.7065375522822331, "eval_runtime": 5.1983, "eval_samples_per_second": 76.756, "eval_steps_per_second": 9.619, "step": 488 }, { "epoch": 5.0, "grad_norm": 2.7017691135406494, "learning_rate": 3.7500000000000003e-05, "loss": 0.4504, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.7543859649122807, "eval_f1": 0.7193367786391043, "eval_loss": 0.4610688388347626, "eval_precision": 0.7119825169058223, "eval_recall": 0.7337243135115475, "eval_runtime": 5.06, "eval_samples_per_second": 78.853, "eval_steps_per_second": 9.881, "step": 610 }, { "epoch": 6.0, "grad_norm": 4.450038909912109, "learning_rate": 3.5e-05, "loss": 0.4276, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.7894736842105263, "eval_f1": 0.743447642375995, "eval_loss": 0.4303344488143921, "eval_precision": 0.7460857726344452, "eval_recall": 0.7410438261502091, "eval_runtime": 5.0829, "eval_samples_per_second": 78.498, "eval_steps_per_second": 9.837, "step": 732 }, { "epoch": 7.0, "grad_norm": 4.093283176422119, "learning_rate": 3.2500000000000004e-05, "loss": 0.4176, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.7944862155388471, "eval_f1": 0.7533174497858737, "eval_loss": 0.4162527918815613, "eval_precision": 0.7521008403361344, "eval_recall": 0.7545917439534461, "eval_runtime": 5.0538, "eval_samples_per_second": 78.95, "eval_steps_per_second": 9.894, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.816234588623047, "learning_rate": 3e-05, "loss": 0.397, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8170426065162907, "eval_f1": 0.7740779522978476, "eval_loss": 0.3960316479206085, "eval_precision": 0.7814051164566629, "eval_recall": 0.7680487361338425, "eval_runtime": 5.0577, "eval_samples_per_second": 78.889, "eval_steps_per_second": 9.886, "step": 976 }, { "epoch": 9.0, "grad_norm": 5.539109706878662, "learning_rate": 2.7500000000000004e-05, "loss": 0.3904, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8270676691729323, "eval_f1": 0.7828802611966783, "eval_loss": 0.39395618438720703, "eval_precision": 0.7969399881164587, "eval_recall": 0.7726404800872886, "eval_runtime": 5.0627, "eval_samples_per_second": 78.812, "eval_steps_per_second": 9.876, "step": 1098 }, { "epoch": 10.0, "grad_norm": 5.052578449249268, "learning_rate": 2.5e-05, "loss": 0.3743, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8270676691729323, "eval_f1": 0.7803582113367107, "eval_loss": 0.3900292217731476, "eval_precision": 0.7993592785951591, "eval_recall": 0.7676395708310602, "eval_runtime": 5.0505, "eval_samples_per_second": 79.002, "eval_steps_per_second": 9.9, "step": 1220 }, { "epoch": 11.0, "grad_norm": 3.9592137336730957, "learning_rate": 2.25e-05, "loss": 0.3632, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8345864661654135, "eval_f1": 0.7929065743944637, "eval_loss": 0.38483926653862, "eval_precision": 0.8062188401994228, "eval_recall": 0.7829605382796871, "eval_runtime": 5.0602, "eval_samples_per_second": 78.851, "eval_steps_per_second": 9.881, "step": 1342 }, { "epoch": 12.0, "grad_norm": 13.379903793334961, "learning_rate": 2e-05, "loss": 0.3599, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8270676691729323, "eval_f1": 0.784099374985296, "eval_loss": 0.37946680188179016, "eval_precision": 0.7958930899608865, "eval_recall": 0.7751409347154028, "eval_runtime": 5.1036, "eval_samples_per_second": 78.18, "eval_steps_per_second": 9.797, "step": 1464 }, { "epoch": 13.0, "grad_norm": 4.531927108764648, "learning_rate": 1.75e-05, "loss": 0.3597, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8345864661654135, "eval_f1": 0.786734693877551, "eval_loss": 0.37652111053466797, "eval_precision": 0.8135673624288424, "eval_recall": 0.7704582651391162, "eval_runtime": 5.0432, "eval_samples_per_second": 79.117, "eval_steps_per_second": 9.914, "step": 1586 }, { "epoch": 14.0, "grad_norm": 6.193031311035156, "learning_rate": 1.5e-05, "loss": 0.3461, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8320802005012531, "eval_f1": 0.7867246399936176, "eval_loss": 0.3729116916656494, "eval_precision": 0.8061224489795917, "eval_recall": 0.7736861247499545, "eval_runtime": 5.0678, "eval_samples_per_second": 78.732, "eval_steps_per_second": 9.866, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.2145860195159912, "learning_rate": 1.25e-05, "loss": 0.3432, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.7954669127215085, "eval_loss": 0.37140053510665894, "eval_precision": 0.8101109130520895, "eval_recall": 0.7847335879250773, "eval_runtime": 5.045, "eval_samples_per_second": 79.088, "eval_steps_per_second": 9.911, "step": 1830 }, { "epoch": 16.0, "grad_norm": 6.050211429595947, "learning_rate": 1e-05, "loss": 0.333, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8421052631578947, "eval_f1": 0.8006218422075398, "eval_loss": 0.3706248998641968, "eval_precision": 0.8181103678929766, "eval_recall": 0.7882796872158575, "eval_runtime": 5.0703, "eval_samples_per_second": 78.694, "eval_steps_per_second": 9.861, "step": 1952 }, { "epoch": 17.0, "grad_norm": 2.7747299671173096, "learning_rate": 7.5e-06, "loss": 0.3323, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.796869033982436, "eval_loss": 0.3699536621570587, "eval_precision": 0.8155050505050505, "eval_recall": 0.7840061829423532, "eval_runtime": 5.0529, "eval_samples_per_second": 78.964, "eval_steps_per_second": 9.895, "step": 2074 }, { "epoch": 18.0, "grad_norm": 2.8045554161071777, "learning_rate": 5e-06, "loss": 0.3337, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.7980385953812085, "eval_loss": 0.3686981499195099, "eval_precision": 0.8140740248521496, "eval_recall": 0.7865066375704673, "eval_runtime": 5.0589, "eval_samples_per_second": 78.87, "eval_steps_per_second": 9.883, "step": 2196 }, { "epoch": 19.0, "grad_norm": 4.2901482582092285, "learning_rate": 2.5e-06, "loss": 0.3298, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.7980385953812085, "eval_loss": 0.36837807297706604, "eval_precision": 0.8140740248521496, "eval_recall": 0.7865066375704673, "eval_runtime": 5.0488, "eval_samples_per_second": 79.029, "eval_steps_per_second": 9.903, "step": 2318 }, { "epoch": 20.0, "grad_norm": 5.859314441680908, "learning_rate": 0.0, "loss": 0.3309, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.7980385953812085, "eval_loss": 0.368120402097702, "eval_precision": 0.8140740248521496, "eval_recall": 0.7865066375704673, "eval_runtime": 5.1179, "eval_samples_per_second": 77.961, "eval_steps_per_second": 9.77, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7590599775312000.0, "train_loss": 0.39626741878321914, "train_runtime": 1951.4944, "train_samples_per_second": 37.284, "train_steps_per_second": 1.25 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7590599775312000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }