{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.218796253204346, "learning_rate": 4.75e-05, "loss": 0.5604, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7268170426065163, "eval_f1": 0.6626643397324026, "eval_loss": 0.4996059536933899, "eval_precision": 0.6671439480717831, "eval_recall": 0.6592107655937443, "eval_runtime": 5.1871, "eval_samples_per_second": 76.921, "eval_steps_per_second": 9.639, "step": 122 }, { "epoch": 2.0, "grad_norm": 4.024437427520752, "learning_rate": 4.5e-05, "loss": 0.4842, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7543859649122807, "eval_f1": 0.7247113489157984, "eval_loss": 0.4520498514175415, "eval_precision": 0.7169408246101261, "eval_recall": 0.7462265866521185, "eval_runtime": 5.0789, "eval_samples_per_second": 78.561, "eval_steps_per_second": 9.845, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.7485127449035645, "learning_rate": 4.25e-05, "loss": 0.4079, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8320802005012531, "eval_f1": 0.800852224051491, "eval_loss": 0.37486591935157776, "eval_precision": 0.7962609970674487, "eval_recall": 0.8061920349154392, "eval_runtime": 5.055, "eval_samples_per_second": 78.932, "eval_steps_per_second": 9.891, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.6576170921325684, "learning_rate": 4e-05, "loss": 0.3378, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8122237052238519, "eval_loss": 0.36237815022468567, "eval_precision": 0.8184491978609625, "eval_recall": 0.8068285142753229, "eval_runtime": 5.0874, "eval_samples_per_second": 78.43, "eval_steps_per_second": 9.828, "step": 488 }, { "epoch": 5.0, "grad_norm": 3.3436532020568848, "learning_rate": 3.7500000000000003e-05, "loss": 0.3146, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8234833375639119, "eval_loss": 0.36204174160957336, "eval_precision": 0.8130172220979647, "eval_recall": 0.8393344244408074, "eval_runtime": 5.0836, "eval_samples_per_second": 78.488, "eval_steps_per_second": 9.836, "step": 610 }, { "epoch": 6.0, "grad_norm": 4.493219375610352, "learning_rate": 3.5e-05, "loss": 0.2935, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8252554744525548, "eval_loss": 0.35182783007621765, "eval_precision": 0.8157894736842105, "eval_recall": 0.8386070194580832, "eval_runtime": 5.0469, "eval_samples_per_second": 79.059, "eval_steps_per_second": 9.907, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.3236638307571411, "learning_rate": 3.2500000000000004e-05, "loss": 0.2842, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8391129032258065, "eval_loss": 0.3307338058948517, "eval_precision": 0.8345705196182396, "eval_recall": 0.8442444080741953, "eval_runtime": 5.0617, "eval_samples_per_second": 78.827, "eval_steps_per_second": 9.878, "step": 854 }, { "epoch": 8.0, "grad_norm": 2.9696807861328125, "learning_rate": 3e-05, "loss": 0.267, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8341332527115377, "eval_loss": 0.3191296458244324, "eval_precision": 0.8333132275770553, "eval_recall": 0.8349699945444626, "eval_runtime": 5.1096, "eval_samples_per_second": 78.089, "eval_steps_per_second": 9.786, "step": 976 }, { "epoch": 9.0, "grad_norm": 6.35373067855835, "learning_rate": 2.7500000000000004e-05, "loss": 0.2598, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8401647707947546, "eval_loss": 0.31739577651023865, "eval_precision": 0.8393298751432535, "eval_recall": 0.8410165484633569, "eval_runtime": 5.1627, "eval_samples_per_second": 77.285, "eval_steps_per_second": 9.685, "step": 1098 }, { "epoch": 10.0, "grad_norm": 4.5958967208862305, "learning_rate": 2.5e-05, "loss": 0.2557, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8367430441898527, "eval_loss": 0.30755287408828735, "eval_precision": 0.8367430441898527, "eval_recall": 0.8367430441898527, "eval_runtime": 5.0513, "eval_samples_per_second": 78.99, "eval_steps_per_second": 9.898, "step": 1220 }, { "epoch": 11.0, "grad_norm": 1.4262058734893799, "learning_rate": 2.25e-05, "loss": 0.2341, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.844327731092437, "eval_loss": 0.31438612937927246, "eval_precision": 0.8411320530352577, "eval_recall": 0.8477905073649754, "eval_runtime": 5.0545, "eval_samples_per_second": 78.939, "eval_steps_per_second": 9.892, "step": 1342 }, { "epoch": 12.0, "grad_norm": 10.927000999450684, "learning_rate": 2e-05, "loss": 0.2352, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8409485773595975, "eval_loss": 0.31350696086883545, "eval_precision": 0.8385304659498208, "eval_recall": 0.8435170030914712, "eval_runtime": 5.0621, "eval_samples_per_second": 78.821, "eval_steps_per_second": 9.877, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.771910309791565, "learning_rate": 1.75e-05, "loss": 0.2335, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8469505178365937, "eval_loss": 0.30345895886421204, "eval_precision": 0.844489247311828, "eval_recall": 0.8495635570103655, "eval_runtime": 5.0626, "eval_samples_per_second": 78.814, "eval_steps_per_second": 9.876, "step": 1586 }, { "epoch": 14.0, "grad_norm": 11.13768482208252, "learning_rate": 1.5e-05, "loss": 0.232, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8450716845878137, "eval_loss": 0.3012339174747467, "eval_precision": 0.8404471544715447, "eval_recall": 0.8502909619930896, "eval_runtime": 5.0472, "eval_samples_per_second": 79.054, "eval_steps_per_second": 9.906, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.7109107971191406, "learning_rate": 1.25e-05, "loss": 0.221, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8424651921601347, "eval_loss": 0.3049696981906891, "eval_precision": 0.8372140762463343, "eval_recall": 0.8485179123476996, "eval_runtime": 5.0811, "eval_samples_per_second": 78.526, "eval_steps_per_second": 9.84, "step": 1830 }, { "epoch": 16.0, "grad_norm": 6.484745979309082, "learning_rate": 1e-05, "loss": 0.216, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8457993935430168, "eval_loss": 0.3015521466732025, "eval_precision": 0.8398540145985401, "eval_recall": 0.8527914166212038, "eval_runtime": 5.0477, "eval_samples_per_second": 79.046, "eval_steps_per_second": 9.906, "step": 1952 }, { "epoch": 17.0, "grad_norm": 10.693771362304688, "learning_rate": 7.5e-06, "loss": 0.2096, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8435671632788467, "eval_loss": 0.2880638837814331, "eval_precision": 0.8419117647058824, "eval_recall": 0.8452900527368612, "eval_runtime": 5.1498, "eval_samples_per_second": 77.478, "eval_steps_per_second": 9.709, "step": 2074 }, { "epoch": 18.0, "grad_norm": 5.451054096221924, "learning_rate": 5e-06, "loss": 0.2184, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8465112150085814, "eval_loss": 0.29656729102134705, "eval_precision": 0.8393498147290412, "eval_recall": 0.855291871249318, "eval_runtime": 5.0876, "eval_samples_per_second": 78.426, "eval_steps_per_second": 9.828, "step": 2196 }, { "epoch": 19.0, "grad_norm": 3.6623899936676025, "learning_rate": 2.5e-06, "loss": 0.2134, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8409485773595975, "eval_loss": 0.2884305715560913, "eval_precision": 0.8385304659498208, "eval_recall": 0.8435170030914712, "eval_runtime": 5.0715, "eval_samples_per_second": 78.675, "eval_steps_per_second": 9.859, "step": 2318 }, { "epoch": 20.0, "grad_norm": 10.805831909179688, "learning_rate": 0.0, "loss": 0.2077, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8383403361344538, "eval_loss": 0.289480060338974, "eval_precision": 0.8352012604792199, "eval_recall": 0.8417439534460811, "eval_runtime": 5.0647, "eval_samples_per_second": 78.78, "eval_steps_per_second": 9.872, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7635661149264000.0, "train_loss": 0.28429694019380164, "train_runtime": 1955.8264, "train_samples_per_second": 37.202, "train_steps_per_second": 1.248 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7635661149264000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }