{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9895227008149012, "eval_steps": 500, "global_step": 321, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09313154831199069, "grad_norm": 6.410240671476086, "learning_rate": 5e-06, "loss": 1.0737, "step": 10 }, { "epoch": 0.18626309662398138, "grad_norm": 2.110670339648224, "learning_rate": 5e-06, "loss": 0.853, "step": 20 }, { "epoch": 0.27939464493597205, "grad_norm": 2.0454490264727103, "learning_rate": 5e-06, "loss": 0.808, "step": 30 }, { "epoch": 0.37252619324796277, "grad_norm": 1.9173512178087246, "learning_rate": 5e-06, "loss": 0.7705, "step": 40 }, { "epoch": 0.46565774155995343, "grad_norm": 1.9696383768635166, "learning_rate": 5e-06, "loss": 0.7493, "step": 50 }, { "epoch": 0.5587892898719441, "grad_norm": 1.7929306896649821, "learning_rate": 5e-06, "loss": 0.723, "step": 60 }, { "epoch": 0.6519208381839348, "grad_norm": 1.4541049068033436, "learning_rate": 5e-06, "loss": 0.7029, "step": 70 }, { "epoch": 0.7450523864959255, "grad_norm": 1.4546232801341403, "learning_rate": 5e-06, "loss": 0.7045, "step": 80 }, { "epoch": 0.8381839348079162, "grad_norm": 0.9836741916976506, "learning_rate": 5e-06, "loss": 0.6913, "step": 90 }, { "epoch": 0.9313154831199069, "grad_norm": 0.9546295476288479, "learning_rate": 5e-06, "loss": 0.6853, "step": 100 }, { "epoch": 0.9965075669383003, "eval_loss": 0.6684155464172363, "eval_runtime": 72.6852, "eval_samples_per_second": 39.788, "eval_steps_per_second": 0.633, "step": 107 }, { "epoch": 1.0244470314318976, "grad_norm": 1.2182872648581187, "learning_rate": 5e-06, "loss": 0.7062, "step": 110 }, { "epoch": 1.1175785797438882, "grad_norm": 1.4541964745779468, "learning_rate": 5e-06, "loss": 0.6098, "step": 120 }, { "epoch": 1.210710128055879, "grad_norm": 0.9495507104405151, "learning_rate": 5e-06, "loss": 0.6019, "step": 130 }, { "epoch": 1.3038416763678695, "grad_norm": 1.8134473930318298, "learning_rate": 5e-06, "loss": 0.6043, "step": 140 }, { "epoch": 1.3969732246798603, "grad_norm": 1.113009214401007, "learning_rate": 5e-06, "loss": 0.6023, "step": 150 }, { "epoch": 1.490104772991851, "grad_norm": 2.029132520416773, "learning_rate": 5e-06, "loss": 0.6021, "step": 160 }, { "epoch": 1.5832363213038416, "grad_norm": 1.6144281176922932, "learning_rate": 5e-06, "loss": 0.6002, "step": 170 }, { "epoch": 1.6763678696158324, "grad_norm": 1.2068997504682133, "learning_rate": 5e-06, "loss": 0.5935, "step": 180 }, { "epoch": 1.7694994179278232, "grad_norm": 1.001966561421665, "learning_rate": 5e-06, "loss": 0.589, "step": 190 }, { "epoch": 1.8626309662398137, "grad_norm": 0.7426199844782707, "learning_rate": 5e-06, "loss": 0.5954, "step": 200 }, { "epoch": 1.9557625145518043, "grad_norm": 1.0472743759335235, "learning_rate": 5e-06, "loss": 0.591, "step": 210 }, { "epoch": 1.9930151338766007, "eval_loss": 0.6565061807632446, "eval_runtime": 76.8161, "eval_samples_per_second": 37.648, "eval_steps_per_second": 0.599, "step": 214 }, { "epoch": 2.0488940628637953, "grad_norm": 1.1047685289937579, "learning_rate": 5e-06, "loss": 0.5986, "step": 220 }, { "epoch": 2.142025611175786, "grad_norm": 1.214031879234503, "learning_rate": 5e-06, "loss": 0.5179, "step": 230 }, { "epoch": 2.2351571594877764, "grad_norm": 0.8187191061599544, "learning_rate": 5e-06, "loss": 0.5178, "step": 240 }, { "epoch": 2.328288707799767, "grad_norm": 0.8726515587132341, "learning_rate": 5e-06, "loss": 0.513, "step": 250 }, { "epoch": 2.421420256111758, "grad_norm": 0.8422751030451243, "learning_rate": 5e-06, "loss": 0.5137, "step": 260 }, { "epoch": 2.5145518044237485, "grad_norm": 0.9018678534732393, "learning_rate": 5e-06, "loss": 0.5263, "step": 270 }, { "epoch": 2.607683352735739, "grad_norm": 0.875500810626841, "learning_rate": 5e-06, "loss": 0.5201, "step": 280 }, { "epoch": 2.70081490104773, "grad_norm": 0.8624139031471002, "learning_rate": 5e-06, "loss": 0.5181, "step": 290 }, { "epoch": 2.7939464493597206, "grad_norm": 0.8654410079756801, "learning_rate": 5e-06, "loss": 0.5197, "step": 300 }, { "epoch": 2.887077997671711, "grad_norm": 1.255098018906923, "learning_rate": 5e-06, "loss": 0.5274, "step": 310 }, { "epoch": 2.980209545983702, "grad_norm": 0.8384998281941215, "learning_rate": 5e-06, "loss": 0.5134, "step": 320 }, { "epoch": 2.9895227008149012, "eval_loss": 0.6689090132713318, "eval_runtime": 75.5704, "eval_samples_per_second": 38.269, "eval_steps_per_second": 0.609, "step": 321 }, { "epoch": 2.9895227008149012, "step": 321, "total_flos": 537477576130560.0, "train_loss": 0.6323956919607715, "train_runtime": 10362.5439, "train_samples_per_second": 15.907, "train_steps_per_second": 0.031 } ], "logging_steps": 10, "max_steps": 321, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 537477576130560.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }