{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2663115845539281, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0013315579227696406, "eval_loss": 11.091023445129395, "eval_runtime": 1.6209, "eval_samples_per_second": 98.094, "eval_steps_per_second": 49.356, "step": 1 }, { "epoch": 0.013315579227696404, "grad_norm": 0.8809579014778137, "learning_rate": 0.0002, "loss": 22.1639, "step": 10 }, { "epoch": 0.02663115845539281, "grad_norm": 0.821391224861145, "learning_rate": 0.0002, "loss": 22.1332, "step": 20 }, { "epoch": 0.03994673768308921, "grad_norm": 0.863199770450592, "learning_rate": 0.0002, "loss": 22.0906, "step": 30 }, { "epoch": 0.05326231691078562, "grad_norm": 1.169640064239502, "learning_rate": 0.0002, "loss": 22.0633, "step": 40 }, { "epoch": 0.06657789613848203, "grad_norm": 0.8766728043556213, "learning_rate": 0.0002, "loss": 22.0108, "step": 50 }, { "epoch": 0.06657789613848203, "eval_loss": 10.995157241821289, "eval_runtime": 0.4627, "eval_samples_per_second": 343.665, "eval_steps_per_second": 172.913, "step": 50 }, { "epoch": 0.07989347536617843, "grad_norm": 0.7364636063575745, "learning_rate": 0.0002, "loss": 21.9144, "step": 60 }, { "epoch": 0.09320905459387484, "grad_norm": 0.8527082800865173, "learning_rate": 0.0002, "loss": 21.974, "step": 70 }, { "epoch": 0.10652463382157124, "grad_norm": 0.7260386347770691, "learning_rate": 0.0002, "loss": 22.0004, "step": 80 }, { "epoch": 0.11984021304926765, "grad_norm": 0.7889769077301025, "learning_rate": 0.0002, "loss": 21.9855, "step": 90 }, { "epoch": 0.13315579227696406, "grad_norm": 0.8119962215423584, "learning_rate": 0.0002, "loss": 21.9748, "step": 100 }, { "epoch": 0.13315579227696406, "eval_loss": 10.955042839050293, "eval_runtime": 0.4877, "eval_samples_per_second": 325.998, "eval_steps_per_second": 164.024, "step": 100 }, { "epoch": 0.14647137150466044, "grad_norm": 0.7654256820678711, "learning_rate": 0.0002, "loss": 21.9045, "step": 110 }, { "epoch": 0.15978695073235685, "grad_norm": 0.7582597732543945, "learning_rate": 0.0002, "loss": 21.8757, "step": 120 }, { "epoch": 0.17310252996005326, "grad_norm": 0.7560115456581116, "learning_rate": 0.0002, "loss": 21.8915, "step": 130 }, { "epoch": 0.18641810918774968, "grad_norm": 0.8035630583763123, "learning_rate": 0.0002, "loss": 21.8995, "step": 140 }, { "epoch": 0.19973368841544606, "grad_norm": 0.9448879957199097, "learning_rate": 0.0002, "loss": 21.817, "step": 150 }, { "epoch": 0.19973368841544606, "eval_loss": 10.934608459472656, "eval_runtime": 0.4706, "eval_samples_per_second": 337.856, "eval_steps_per_second": 169.99, "step": 150 }, { "epoch": 0.21304926764314247, "grad_norm": 0.8676323890686035, "learning_rate": 0.0002, "loss": 21.9072, "step": 160 }, { "epoch": 0.22636484687083888, "grad_norm": 0.8595561385154724, "learning_rate": 0.0002, "loss": 21.9161, "step": 170 }, { "epoch": 0.2396804260985353, "grad_norm": 0.8619910478591919, "learning_rate": 0.0002, "loss": 21.8431, "step": 180 }, { "epoch": 0.2529960053262317, "grad_norm": 0.8252401947975159, "learning_rate": 0.0002, "loss": 21.8738, "step": 190 }, { "epoch": 0.2663115845539281, "grad_norm": 0.8227563500404358, "learning_rate": 0.0002, "loss": 21.8141, "step": 200 }, { "epoch": 0.2663115845539281, "eval_loss": 10.927712440490723, "eval_runtime": 0.4592, "eval_samples_per_second": 346.264, "eval_steps_per_second": 174.221, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 179683983360.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }