{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 137, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0072992700729927005, "grad_norm": 708.0, "learning_rate": 1.4285714285714285e-05, "loss": 56.8346, "step": 1 }, { "epoch": 0.0364963503649635, "grad_norm": 368.0, "learning_rate": 7.142857142857143e-05, "loss": 45.8547, "step": 5 }, { "epoch": 0.072992700729927, "grad_norm": 30.875, "learning_rate": 0.00014285714285714287, "loss": 19.792, "step": 10 }, { "epoch": 0.10948905109489052, "grad_norm": 6.875, "learning_rate": 0.00019996738360808565, "loss": 15.0745, "step": 15 }, { "epoch": 0.145985401459854, "grad_norm": 17.75, "learning_rate": 0.00019882804237803488, "loss": 13.3169, "step": 20 }, { "epoch": 0.18248175182481752, "grad_norm": 117.0, "learning_rate": 0.00019607909582962477, "loss": 8.4463, "step": 25 }, { "epoch": 0.21897810218978103, "grad_norm": 5.09375, "learning_rate": 0.0001917653158603628, "loss": 2.1599, "step": 30 }, { "epoch": 0.25547445255474455, "grad_norm": 3.921875, "learning_rate": 0.00018595696069872013, "loss": 1.6643, "step": 35 }, { "epoch": 0.291970802919708, "grad_norm": 2.21875, "learning_rate": 0.00017874863061334657, "loss": 1.4899, "step": 40 }, { "epoch": 0.3284671532846715, "grad_norm": 2.1875, "learning_rate": 0.00017025772716520323, "loss": 1.3695, "step": 45 }, { "epoch": 0.36496350364963503, "grad_norm": 2.0, "learning_rate": 0.0001606225410966638, "loss": 1.2794, "step": 50 }, { "epoch": 0.40145985401459855, "grad_norm": 0.953125, "learning_rate": 0.00015000000000000001, "loss": 1.2388, "step": 55 }, { "epoch": 0.43795620437956206, "grad_norm": 3.53125, "learning_rate": 0.0001385631124488136, "loss": 1.2269, "step": 60 }, { "epoch": 0.4744525547445255, "grad_norm": 1.453125, "learning_rate": 0.0001264981502196662, "loss": 1.1434, "step": 65 }, { "epoch": 0.5109489051094891, "grad_norm": 0.921875, "learning_rate": 0.00011400161449686293, "loss": 1.1033, "step": 70 }, { "epoch": 0.5474452554744526, "grad_norm": 1.5703125, "learning_rate": 0.00010127703547159739, "loss": 1.0812, "step": 75 }, { "epoch": 0.583941605839416, "grad_norm": 2.828125, "learning_rate": 8.853165746015997e-05, "loss": 1.061, "step": 80 }, { "epoch": 0.6204379562043796, "grad_norm": 7.4375, "learning_rate": 7.597306353045393e-05, "loss": 1.0362, "step": 85 }, { "epoch": 0.656934306569343, "grad_norm": 0.734375, "learning_rate": 6.380579461128819e-05, "loss": 1.0233, "step": 90 }, { "epoch": 0.6934306569343066, "grad_norm": 1.296875, "learning_rate": 5.222801814877369e-05, "loss": 1.0499, "step": 95 }, { "epoch": 0.7299270072992701, "grad_norm": 1.3125, "learning_rate": 4.142830056718052e-05, "loss": 0.995, "step": 100 }, { "epoch": 0.7664233576642335, "grad_norm": 1.015625, "learning_rate": 3.158253610095697e-05, "loss": 0.9839, "step": 105 }, { "epoch": 0.8029197080291971, "grad_norm": 0.765625, "learning_rate": 2.2851082017805703e-05, "loss": 0.9762, "step": 110 }, { "epoch": 0.8394160583941606, "grad_norm": 0.63671875, "learning_rate": 1.5376146891235598e-05, "loss": 0.9773, "step": 115 }, { "epoch": 0.8759124087591241, "grad_norm": 0.62890625, "learning_rate": 9.279474459608805e-06, "loss": 0.9756, "step": 120 }, { "epoch": 0.9124087591240876, "grad_norm": 0.80859375, "learning_rate": 4.660360794506946e-06, "loss": 0.9625, "step": 125 }, { "epoch": 0.948905109489051, "grad_norm": 0.57421875, "learning_rate": 1.5940370726542863e-06, "loss": 0.9625, "step": 130 }, { "epoch": 0.9854014598540146, "grad_norm": 0.5625, "learning_rate": 1.3044429107700318e-07, "loss": 0.961, "step": 135 }, { "epoch": 1.0, "eval_loss": 2.341205358505249, "eval_runtime": 1.0018, "eval_samples_per_second": 4.991, "eval_steps_per_second": 1.996, "step": 137 }, { "epoch": 1.0, "step": 137, "total_flos": 2.0945562398778982e+17, "train_loss": 4.772963228887015, "train_runtime": 1080.9502, "train_samples_per_second": 2.028, "train_steps_per_second": 0.127 } ], "logging_steps": 5, "max_steps": 137, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.0945562398778982e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }