{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 1250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "grad_norm": 1.8024471998214722, "learning_rate": 9.232000000000001e-05, "loss": 3.5517, "step": 100 }, { "epoch": 0.08, "eval_runtime": 87.7705, "eval_samples_per_second": 11.393, "eval_steps_per_second": 2.848, "step": 100 }, { "epoch": 0.16, "grad_norm": 0.6744942665100098, "learning_rate": 8.431999999999999e-05, "loss": 2.1197, "step": 200 }, { "epoch": 0.16, "eval_runtime": 87.8063, "eval_samples_per_second": 11.389, "eval_steps_per_second": 2.847, "step": 200 }, { "epoch": 0.24, "grad_norm": 0.7477216124534607, "learning_rate": 7.632e-05, "loss": 1.9757, "step": 300 }, { "epoch": 0.24, "eval_runtime": 87.9532, "eval_samples_per_second": 11.37, "eval_steps_per_second": 2.842, "step": 300 }, { "epoch": 0.32, "grad_norm": 2.347344160079956, "learning_rate": 6.832000000000001e-05, "loss": 1.9568, "step": 400 }, { "epoch": 0.32, "eval_runtime": 87.9668, "eval_samples_per_second": 11.368, "eval_steps_per_second": 2.842, "step": 400 }, { "epoch": 0.4, "grad_norm": 1.8626798391342163, "learning_rate": 6.032e-05, "loss": 1.9512, "step": 500 }, { "epoch": 0.4, "eval_runtime": 88.0332, "eval_samples_per_second": 11.359, "eval_steps_per_second": 2.84, "step": 500 }, { "epoch": 0.48, "grad_norm": 0.7776954174041748, "learning_rate": 5.232e-05, "loss": 1.9545, "step": 600 }, { "epoch": 0.48, "eval_runtime": 88.0187, "eval_samples_per_second": 11.361, "eval_steps_per_second": 2.84, "step": 600 }, { "epoch": 0.56, "grad_norm": 1.4220548868179321, "learning_rate": 4.432e-05, "loss": 1.9591, "step": 700 }, { "epoch": 0.56, "eval_runtime": 88.1299, "eval_samples_per_second": 11.347, "eval_steps_per_second": 2.837, "step": 700 }, { "epoch": 0.64, "grad_norm": 2.3337080478668213, "learning_rate": 3.6320000000000005e-05, "loss": 1.951, "step": 800 }, { "epoch": 0.64, "eval_runtime": 88.1046, "eval_samples_per_second": 11.35, "eval_steps_per_second": 2.838, "step": 800 }, { "epoch": 0.72, "grad_norm": 0.8959614634513855, "learning_rate": 2.8320000000000003e-05, "loss": 1.988, "step": 900 }, { "epoch": 0.72, "eval_runtime": 87.9785, "eval_samples_per_second": 11.366, "eval_steps_per_second": 2.842, "step": 900 }, { "epoch": 0.8, "grad_norm": 1.1870672702789307, "learning_rate": 2.032e-05, "loss": 1.9763, "step": 1000 }, { "epoch": 0.8, "eval_runtime": 87.7334, "eval_samples_per_second": 11.398, "eval_steps_per_second": 2.85, "step": 1000 }, { "epoch": 0.88, "grad_norm": 1.0990030765533447, "learning_rate": 1.232e-05, "loss": 1.9048, "step": 1100 }, { "epoch": 0.88, "eval_runtime": 87.772, "eval_samples_per_second": 11.393, "eval_steps_per_second": 2.848, "step": 1100 }, { "epoch": 0.96, "grad_norm": 4.1690354347229, "learning_rate": 4.32e-06, "loss": 1.9114, "step": 1200 }, { "epoch": 0.96, "eval_runtime": 88.1794, "eval_samples_per_second": 11.341, "eval_steps_per_second": 2.835, "step": 1200 } ], "logging_steps": 100, "max_steps": 1250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.660886380544e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }