| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 9.0, |
| "eval_steps": 500, |
| "global_step": 288, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5078026652336121, |
| "learning_rate": 0.0001, |
| "loss": 1.0979, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.24573734402656555, |
| "learning_rate": 9.968335515358916e-05, |
| "loss": 0.6078, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.28030624985694885, |
| "learning_rate": 9.873743117270691e-05, |
| "loss": 0.4051, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8977756524449263, |
| "eval_loss": 0.43045690655708313, |
| "eval_runtime": 25.9378, |
| "eval_samples_per_second": 14.843, |
| "eval_steps_per_second": 1.889, |
| "step": 32 |
| }, |
| { |
| "epoch": 1.256, |
| "grad_norm": 0.24669107794761658, |
| "learning_rate": 9.717420893549902e-05, |
| "loss": 0.4398, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.576, |
| "grad_norm": 0.2326413094997406, |
| "learning_rate": 9.501348789257373e-05, |
| "loss": 0.3765, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.896, |
| "grad_norm": 0.2204989194869995, |
| "learning_rate": 9.2282635291242e-05, |
| "loss": 0.2886, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.920555460186586, |
| "eval_loss": 0.323241263628006, |
| "eval_runtime": 25.9139, |
| "eval_samples_per_second": 14.857, |
| "eval_steps_per_second": 1.891, |
| "step": 64 |
| }, |
| { |
| "epoch": 2.192, |
| "grad_norm": 0.2965347468852997, |
| "learning_rate": 8.90162395476046e-05, |
| "loss": 0.2927, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.512, |
| "grad_norm": 0.3402569890022278, |
| "learning_rate": 8.525567215680398e-05, |
| "loss": 0.3082, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.832, |
| "grad_norm": 0.29415449500083923, |
| "learning_rate": 8.104856369019524e-05, |
| "loss": 0.2289, |
| "step": 90 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9322851101896489, |
| "eval_loss": 0.27416735887527466, |
| "eval_runtime": 25.8596, |
| "eval_samples_per_second": 14.888, |
| "eval_steps_per_second": 1.895, |
| "step": 96 |
| }, |
| { |
| "epoch": 3.128, |
| "grad_norm": 0.30058005452156067, |
| "learning_rate": 7.644820051634812e-05, |
| "loss": 0.2103, |
| "step": 100 |
| }, |
| { |
| "epoch": 3.448, |
| "grad_norm": 0.39383992552757263, |
| "learning_rate": 7.15128498868873e-05, |
| "loss": 0.2362, |
| "step": 110 |
| }, |
| { |
| "epoch": 3.768, |
| "grad_norm": 0.32451319694519043, |
| "learning_rate": 6.630502193549474e-05, |
| "loss": 0.1925, |
| "step": 120 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.938676979897959, |
| "eval_loss": 0.25138482451438904, |
| "eval_runtime": 25.9363, |
| "eval_samples_per_second": 14.844, |
| "eval_steps_per_second": 1.889, |
| "step": 128 |
| }, |
| { |
| "epoch": 4.064, |
| "grad_norm": 0.375893235206604, |
| "learning_rate": 6.0890677937442574e-05, |
| "loss": 0.1471, |
| "step": 130 |
| }, |
| { |
| "epoch": 4.384, |
| "grad_norm": 0.3606163263320923, |
| "learning_rate": 5.5338394857677945e-05, |
| "loss": 0.1876, |
| "step": 140 |
| }, |
| { |
| "epoch": 4.704, |
| "grad_norm": 0.40445348620414734, |
| "learning_rate": 4.971849676912172e-05, |
| "loss": 0.1488, |
| "step": 150 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.4769314229488373, |
| "learning_rate": 4.410216414245771e-05, |
| "loss": 0.1079, |
| "step": 160 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.9420305087488926, |
| "eval_loss": 0.2455526441335678, |
| "eval_runtime": 25.9468, |
| "eval_samples_per_second": 14.838, |
| "eval_steps_per_second": 1.888, |
| "step": 160 |
| }, |
| { |
| "epoch": 5.32, |
| "grad_norm": 0.4198707044124603, |
| "learning_rate": 3.856053228896442e-05, |
| "loss": 0.1534, |
| "step": 170 |
| }, |
| { |
| "epoch": 5.64, |
| "grad_norm": 0.3887736201286316, |
| "learning_rate": 3.316379037532644e-05, |
| "loss": 0.1151, |
| "step": 180 |
| }, |
| { |
| "epoch": 5.96, |
| "grad_norm": 0.37902015447616577, |
| "learning_rate": 2.798029242211828e-05, |
| "loss": 0.0968, |
| "step": 190 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.945372723288016, |
| "eval_loss": 0.24095402657985687, |
| "eval_runtime": 25.9698, |
| "eval_samples_per_second": 14.825, |
| "eval_steps_per_second": 1.887, |
| "step": 192 |
| }, |
| { |
| "epoch": 6.256, |
| "grad_norm": 0.4507382810115814, |
| "learning_rate": 2.3075691545870558e-05, |
| "loss": 0.1142, |
| "step": 200 |
| }, |
| { |
| "epoch": 6.576, |
| "grad_norm": 0.39560869336128235, |
| "learning_rate": 1.8512108410229878e-05, |
| "loss": 0.0908, |
| "step": 210 |
| }, |
| { |
| "epoch": 6.896, |
| "grad_norm": 0.3712189793586731, |
| "learning_rate": 1.434734441843899e-05, |
| "loss": 0.0835, |
| "step": 220 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.9465976771312703, |
| "eval_loss": 0.24637635052204132, |
| "eval_runtime": 25.9064, |
| "eval_samples_per_second": 14.861, |
| "eval_steps_per_second": 1.891, |
| "step": 224 |
| }, |
| { |
| "epoch": 7.192, |
| "grad_norm": 0.36213552951812744, |
| "learning_rate": 1.063414961267859e-05, |
| "loss": 0.0841, |
| "step": 230 |
| }, |
| { |
| "epoch": 7.5120000000000005, |
| "grad_norm": 0.4722885191440582, |
| "learning_rate": 7.41955455290726e-06, |
| "loss": 0.0863, |
| "step": 240 |
| }, |
| { |
| "epoch": 7.832, |
| "grad_norm": 0.29437902569770813, |
| "learning_rate": 4.744274637483936e-06, |
| "loss": 0.0716, |
| "step": 250 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.9472423896041875, |
| "eval_loss": 0.2516001760959625, |
| "eval_runtime": 25.92, |
| "eval_samples_per_second": 14.853, |
| "eval_steps_per_second": 1.89, |
| "step": 256 |
| }, |
| { |
| "epoch": 8.128, |
| "grad_norm": 0.39367932081222534, |
| "learning_rate": 2.6421944103256657e-06, |
| "loss": 0.0666, |
| "step": 260 |
| }, |
| { |
| "epoch": 8.448, |
| "grad_norm": 0.31784766912460327, |
| "learning_rate": 1.1399383862592927e-06, |
| "loss": 0.0814, |
| "step": 270 |
| }, |
| { |
| "epoch": 8.768, |
| "grad_norm": 0.28355342149734497, |
| "learning_rate": 2.5653383040524227e-07, |
| "loss": 0.0611, |
| "step": 280 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.947486792929819, |
| "eval_loss": 0.2522543668746948, |
| "eval_runtime": 25.9049, |
| "eval_samples_per_second": 14.862, |
| "eval_steps_per_second": 1.892, |
| "step": 288 |
| }, |
| { |
| "epoch": 9.0, |
| "step": 288, |
| "total_flos": 3.686068977139712e+17, |
| "train_loss": 0.22300153877586126, |
| "train_runtime": 2410.3131, |
| "train_samples_per_second": 3.734, |
| "train_steps_per_second": 0.119 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 288, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.686068977139712e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|