{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.490695476531982, "learning_rate": 4.75e-05, "loss": 0.5637, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7192982456140351, "eval_f1": 0.6312141018023371, "eval_loss": 0.504301905632019, "eval_precision": 0.6505857648099027, "eval_recall": 0.6238861611202037, "eval_runtime": 4.8371, "eval_samples_per_second": 82.488, "eval_steps_per_second": 10.337, "step": 122 }, { "epoch": 2.0, "grad_norm": 4.642432689666748, "learning_rate": 4.5e-05, "loss": 0.5058, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7343358395989975, "eval_f1": 0.7022388059701492, "eval_loss": 0.4904969036579132, "eval_precision": 0.695631275368511, "eval_recall": 0.7220403709765412, "eval_runtime": 5.048, "eval_samples_per_second": 79.041, "eval_steps_per_second": 9.905, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.178711891174316, "learning_rate": 4.25e-05, "loss": 0.4607, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7844611528822055, "eval_f1": 0.7495328467153285, "eval_loss": 0.42068806290626526, "eval_precision": 0.7424812030075187, "eval_recall": 0.7600018185124569, "eval_runtime": 5.0555, "eval_samples_per_second": 78.924, "eval_steps_per_second": 9.89, "step": 366 }, { "epoch": 4.0, "grad_norm": 3.523857355117798, "learning_rate": 4e-05, "loss": 0.3992, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8148090847205891, "eval_loss": 0.37228837609291077, "eval_precision": 0.8221132552989561, "eval_recall": 0.8086015639207129, "eval_runtime": 5.059, "eval_samples_per_second": 78.87, "eval_steps_per_second": 9.883, "step": 488 }, { "epoch": 5.0, "grad_norm": 2.291674852371216, "learning_rate": 3.7500000000000003e-05, "loss": 0.3565, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8145363408521303, "eval_f1": 0.7872030673431734, "eval_loss": 0.3854798376560211, "eval_precision": 0.7773381294964029, "eval_recall": 0.8037825059101655, "eval_runtime": 5.0549, "eval_samples_per_second": 78.933, "eval_steps_per_second": 9.891, "step": 610 }, { "epoch": 6.0, "grad_norm": 5.28938102722168, "learning_rate": 3.5e-05, "loss": 0.332, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8270676691729323, "eval_f1": 0.7977104565126344, "eval_loss": 0.36892175674438477, "eval_precision": 0.7902630826422647, "eval_recall": 0.8076468448808873, "eval_runtime": 5.0865, "eval_samples_per_second": 78.443, "eval_steps_per_second": 9.83, "step": 732 }, { "epoch": 7.0, "grad_norm": 3.7372069358825684, "learning_rate": 3.2500000000000004e-05, "loss": 0.3089, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8446115288220551, "eval_f1": 0.8116089923236262, "eval_loss": 0.3518911898136139, "eval_precision": 0.8132271892222902, "eval_recall": 0.810056373886161, "eval_runtime": 5.0912, "eval_samples_per_second": 78.37, "eval_steps_per_second": 9.821, "step": 854 }, { "epoch": 8.0, "grad_norm": 6.950363636016846, "learning_rate": 3e-05, "loss": 0.2979, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8263588263588264, "eval_loss": 0.3406260311603546, "eval_precision": 0.8289473684210527, "eval_recall": 0.8239225313693399, "eval_runtime": 5.0511, "eval_samples_per_second": 78.993, "eval_steps_per_second": 9.899, "step": 976 }, { "epoch": 9.0, "grad_norm": 5.673851013183594, "learning_rate": 2.7500000000000004e-05, "loss": 0.2887, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8203564970291914, "eval_loss": 0.35820019245147705, "eval_precision": 0.8132495387453875, "eval_recall": 0.8293326059283506, "eval_runtime": 5.0494, "eval_samples_per_second": 79.019, "eval_steps_per_second": 9.902, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.3661675453186035, "learning_rate": 2.5e-05, "loss": 0.268, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8315822595375324, "eval_loss": 0.33940380811691284, "eval_precision": 0.8361280487804879, "eval_recall": 0.82746863066012, "eval_runtime": 5.053, "eval_samples_per_second": 78.963, "eval_steps_per_second": 9.895, "step": 1220 }, { "epoch": 11.0, "grad_norm": 1.5574766397476196, "learning_rate": 2.25e-05, "loss": 0.267, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8272399255573702, "eval_loss": 0.3338986337184906, "eval_precision": 0.8280735957109784, "eval_recall": 0.8264229859974541, "eval_runtime": 5.0604, "eval_samples_per_second": 78.848, "eval_steps_per_second": 9.881, "step": 1342 }, { "epoch": 12.0, "grad_norm": 17.53325843811035, "learning_rate": 2e-05, "loss": 0.2609, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8365204824303285, "eval_loss": 0.339706152677536, "eval_precision": 0.8313636363636363, "eval_recall": 0.8424713584288053, "eval_runtime": 5.0561, "eval_samples_per_second": 78.915, "eval_steps_per_second": 9.889, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.5466238260269165, "learning_rate": 1.75e-05, "loss": 0.2564, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8368501045387564, "eval_loss": 0.32274091243743896, "eval_precision": 0.8435514312676942, "eval_recall": 0.8310147299509002, "eval_runtime": 5.0671, "eval_samples_per_second": 78.744, "eval_steps_per_second": 9.868, "step": 1586 }, { "epoch": 14.0, "grad_norm": 9.6759033203125, "learning_rate": 1.5e-05, "loss": 0.2566, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8401647707947546, "eval_loss": 0.3246406316757202, "eval_precision": 0.8393298751432535, "eval_recall": 0.8410165484633569, "eval_runtime": 5.0843, "eval_samples_per_second": 78.477, "eval_steps_per_second": 9.834, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.8860002756118774, "learning_rate": 1.25e-05, "loss": 0.2503, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8484099018899409, "eval_loss": 0.32968148589134216, "eval_precision": 0.8430645161290322, "eval_recall": 0.8545644662665939, "eval_runtime": 5.0647, "eval_samples_per_second": 78.781, "eval_steps_per_second": 9.872, "step": 1830 }, { "epoch": 16.0, "grad_norm": 3.0858819484710693, "learning_rate": 1e-05, "loss": 0.2539, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8450716845878137, "eval_loss": 0.3228394687175751, "eval_precision": 0.8404471544715447, "eval_recall": 0.8502909619930896, "eval_runtime": 5.0463, "eval_samples_per_second": 79.068, "eval_steps_per_second": 9.908, "step": 1952 }, { "epoch": 17.0, "grad_norm": 6.341916084289551, "learning_rate": 7.5e-06, "loss": 0.2478, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.8263588263588264, "eval_loss": 0.31420159339904785, "eval_precision": 0.8289473684210527, "eval_recall": 0.8239225313693399, "eval_runtime": 5.0557, "eval_samples_per_second": 78.921, "eval_steps_per_second": 9.89, "step": 2074 }, { "epoch": 18.0, "grad_norm": 6.797486305236816, "learning_rate": 5e-06, "loss": 0.2449, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8476882658063937, "eval_loss": 0.31900447607040405, "eval_precision": 0.8437296561519796, "eval_recall": 0.8520640116384797, "eval_runtime": 5.0545, "eval_samples_per_second": 78.94, "eval_steps_per_second": 9.892, "step": 2196 }, { "epoch": 19.0, "grad_norm": 4.446300506591797, "learning_rate": 2.5e-06, "loss": 0.2401, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8341332527115377, "eval_loss": 0.31386807560920715, "eval_precision": 0.8333132275770553, "eval_recall": 0.8349699945444626, "eval_runtime": 5.1195, "eval_samples_per_second": 77.937, "eval_steps_per_second": 9.767, "step": 2318 }, { "epoch": 20.0, "grad_norm": 5.507618427276611, "learning_rate": 0.0, "loss": 0.2392, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8341332527115377, "eval_loss": 0.3142927885055542, "eval_precision": 0.8333132275770553, "eval_recall": 0.8349699945444626, "eval_runtime": 5.0576, "eval_samples_per_second": 78.891, "eval_steps_per_second": 9.886, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7609911792720000.0, "train_loss": 0.31493380656007863, "train_runtime": 1922.5285, "train_samples_per_second": 37.846, "train_steps_per_second": 1.269 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7609911792720000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }