{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.639183521270752, "learning_rate": 4.75e-05, "loss": 0.5413, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7243107769423559, "eval_f1": 0.6445578231292517, "eval_loss": 0.5006802678108215, "eval_precision": 0.6593400801180687, "eval_recall": 0.6374340789234406, "eval_runtime": 1.8003, "eval_samples_per_second": 221.628, "eval_steps_per_second": 27.773, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.4983344078063965, "learning_rate": 4.5e-05, "loss": 0.4584, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8295739348370927, "eval_f1": 0.7760942760942761, "eval_loss": 0.3855762183666229, "eval_precision": 0.8122789566755084, "eval_recall": 0.7569103473358793, "eval_runtime": 1.8373, "eval_samples_per_second": 217.166, "eval_steps_per_second": 27.214, "step": 244 }, { "epoch": 3.0, "grad_norm": 5.97755765914917, "learning_rate": 4.25e-05, "loss": 0.3559, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8079089841803424, "eval_loss": 0.3407064378261566, "eval_precision": 0.8638322884012539, "eval_recall": 0.7814148026913984, "eval_runtime": 1.8397, "eval_samples_per_second": 216.877, "eval_steps_per_second": 27.178, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.0363476276397705, "learning_rate": 4e-05, "loss": 0.2961, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8419946387230413, "eval_loss": 0.3088829517364502, "eval_precision": 0.8437691365584814, "eval_recall": 0.8402891434806329, "eval_runtime": 1.8466, "eval_samples_per_second": 216.069, "eval_steps_per_second": 27.076, "step": 488 }, { "epoch": 5.0, "grad_norm": 2.3318538665771484, "learning_rate": 3.7500000000000003e-05, "loss": 0.276, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8365204824303285, "eval_loss": 0.29173794388771057, "eval_precision": 0.8313636363636363, "eval_recall": 0.8424713584288053, "eval_runtime": 1.8414, "eval_samples_per_second": 216.677, "eval_steps_per_second": 27.153, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.3067033290863037, "learning_rate": 3.5e-05, "loss": 0.2555, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.842789598108747, "eval_loss": 0.29054704308509827, "eval_precision": 0.842789598108747, "eval_recall": 0.842789598108747, "eval_runtime": 1.847, "eval_samples_per_second": 216.025, "eval_steps_per_second": 27.071, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.5437530279159546, "learning_rate": 3.2500000000000004e-05, "loss": 0.2427, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.84402249790578, "eval_loss": 0.30313801765441895, "eval_precision": 0.8669909824394875, "eval_recall": 0.8281051100200036, "eval_runtime": 1.8479, "eval_samples_per_second": 215.922, "eval_steps_per_second": 27.058, "step": 854 }, { "epoch": 8.0, "grad_norm": 3.033709764480591, "learning_rate": 3e-05, "loss": 0.2219, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8522278069611882, "eval_loss": 0.2907596528530121, "eval_precision": 0.8513631702756499, "eval_recall": 0.8531096563011457, "eval_runtime": 1.8468, "eval_samples_per_second": 216.053, "eval_steps_per_second": 27.074, "step": 976 }, { "epoch": 9.0, "grad_norm": 2.670888900756836, "learning_rate": 2.7500000000000004e-05, "loss": 0.2158, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8539996181748759, "eval_loss": 0.3083769381046295, "eval_precision": 0.8759595959595959, "eval_recall": 0.8384251682124022, "eval_runtime": 1.856, "eval_samples_per_second": 214.979, "eval_steps_per_second": 26.94, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.6058239936828613, "learning_rate": 2.5e-05, "loss": 0.2, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8517301860990547, "eval_loss": 0.29381993412971497, "eval_precision": 0.8456788321167883, "eval_recall": 0.8588379705400981, "eval_runtime": 1.8468, "eval_samples_per_second": 216.045, "eval_steps_per_second": 27.073, "step": 1220 }, { "epoch": 11.0, "grad_norm": 3.181007146835327, "learning_rate": 2.25e-05, "loss": 0.1885, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8514869535493182, "eval_loss": 0.2976568639278412, "eval_precision": 0.8523821128305106, "eval_recall": 0.8506092016730314, "eval_runtime": 1.8451, "eval_samples_per_second": 216.253, "eval_steps_per_second": 27.099, "step": 1342 }, { "epoch": 12.0, "grad_norm": 3.1322133541107178, "learning_rate": 2e-05, "loss": 0.183, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8556621579112929, "eval_loss": 0.30698344111442566, "eval_precision": 0.871654421411703, "eval_recall": 0.8434260774686306, "eval_runtime": 1.8533, "eval_samples_per_second": 215.295, "eval_steps_per_second": 26.979, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.6321003437042236, "learning_rate": 1.75e-05, "loss": 0.1752, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8569892473118279, "eval_loss": 0.29585033655166626, "eval_precision": 0.8522004241781549, "eval_recall": 0.8623840698308783, "eval_runtime": 1.846, "eval_samples_per_second": 216.146, "eval_steps_per_second": 27.086, "step": 1586 }, { "epoch": 14.0, "grad_norm": 3.546229600906372, "learning_rate": 1.5e-05, "loss": 0.1558, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8530841286673736, "eval_loss": 0.3040328025817871, "eval_precision": 0.8446597760551249, "eval_recall": 0.8638388797963266, "eval_runtime": 1.8508, "eval_samples_per_second": 215.586, "eval_steps_per_second": 27.016, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.5455009937286377, "learning_rate": 1.25e-05, "loss": 0.1538, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8484099018899409, "eval_loss": 0.30823931097984314, "eval_precision": 0.8430645161290322, "eval_recall": 0.8545644662665939, "eval_runtime": 1.848, "eval_samples_per_second": 215.904, "eval_steps_per_second": 27.056, "step": 1830 }, { "epoch": 16.0, "grad_norm": 2.4319658279418945, "learning_rate": 1e-05, "loss": 0.152, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8483536940081443, "eval_loss": 0.30997762084007263, "eval_precision": 0.8575792287132493, "eval_recall": 0.8406073831605747, "eval_runtime": 1.8455, "eval_samples_per_second": 216.202, "eval_steps_per_second": 27.093, "step": 1952 }, { "epoch": 17.0, "grad_norm": 3.420119285583496, "learning_rate": 7.5e-06, "loss": 0.1436, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8510304659498208, "eval_loss": 0.31050172448158264, "eval_precision": 0.8463237893248498, "eval_recall": 0.8563375159119839, "eval_runtime": 1.8478, "eval_samples_per_second": 215.931, "eval_steps_per_second": 27.059, "step": 2074 }, { "epoch": 18.0, "grad_norm": 0.7008257508277893, "learning_rate": 5e-06, "loss": 0.1426, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8503151260504201, "eval_loss": 0.3118613362312317, "eval_precision": 0.8470628455912955, "eval_recall": 0.8538370612838698, "eval_runtime": 1.8481, "eval_samples_per_second": 215.903, "eval_steps_per_second": 27.056, "step": 2196 }, { "epoch": 19.0, "grad_norm": 2.8680572509765625, "learning_rate": 2.5e-06, "loss": 0.1398, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8569892473118279, "eval_loss": 0.316354900598526, "eval_precision": 0.8522004241781549, "eval_recall": 0.8623840698308783, "eval_runtime": 1.8465, "eval_samples_per_second": 216.08, "eval_steps_per_second": 27.078, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.442784547805786, "learning_rate": 0.0, "loss": 0.14, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8563025210084034, "eval_loss": 0.31680676341056824, "eval_precision": 0.8529936381473334, "eval_recall": 0.8598836152027641, "eval_runtime": 1.8511, "eval_samples_per_second": 215.549, "eval_steps_per_second": 27.011, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8460375995160000.0, "train_loss": 0.2318932650519199, "train_runtime": 643.2373, "train_samples_per_second": 113.333, "train_steps_per_second": 3.793 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8460375995160000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }