{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4, "eval_steps": 25, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 0.18923527002334595, "learning_rate": 0.0001666666666666667, "loss": 1.1779, "step": 25 }, { "epoch": 0.05, "eval_loss": 0.7435017824172974, "eval_runtime": 621.1563, "eval_samples_per_second": 4.356, "eval_steps_per_second": 0.546, "step": 25 }, { "epoch": 0.1, "grad_norm": 0.16780352592468262, "learning_rate": 0.00019148936170212768, "loss": 1.2708, "step": 50 }, { "epoch": 0.1, "eval_loss": 0.7384106516838074, "eval_runtime": 619.3795, "eval_samples_per_second": 4.369, "eval_steps_per_second": 0.547, "step": 50 }, { "epoch": 0.15, "grad_norm": 0.1804531365633011, "learning_rate": 0.00018085106382978726, "loss": 1.2848, "step": 75 }, { "epoch": 0.15, "eval_loss": 0.7341726422309875, "eval_runtime": 619.225, "eval_samples_per_second": 4.37, "eval_steps_per_second": 0.547, "step": 75 }, { "epoch": 0.2, "grad_norm": 0.17204701900482178, "learning_rate": 0.00017021276595744682, "loss": 1.3443, "step": 100 }, { "epoch": 0.2, "eval_loss": 0.7313376665115356, "eval_runtime": 619.8363, "eval_samples_per_second": 4.366, "eval_steps_per_second": 0.547, "step": 100 }, { "epoch": 0.25, "grad_norm": 0.2668892741203308, "learning_rate": 0.00015957446808510637, "loss": 1.2563, "step": 125 }, { "epoch": 0.25, "eval_loss": 0.7282304167747498, "eval_runtime": 619.962, "eval_samples_per_second": 4.365, "eval_steps_per_second": 0.547, "step": 125 }, { "epoch": 0.3, "grad_norm": 0.21089820563793182, "learning_rate": 0.00014893617021276596, "loss": 0.7529, "step": 150 }, { "epoch": 0.3, "eval_loss": 0.7184526324272156, "eval_runtime": 619.2606, "eval_samples_per_second": 4.37, "eval_steps_per_second": 0.547, "step": 150 }, { "epoch": 0.35, "grad_norm": 0.158623605966568, "learning_rate": 0.00013829787234042554, "loss": 0.7957, "step": 175 }, { "epoch": 0.35, "eval_loss": 0.7136204242706299, "eval_runtime": 619.6757, "eval_samples_per_second": 4.367, "eval_steps_per_second": 0.547, "step": 175 }, { "epoch": 0.4, "grad_norm": 0.16229337453842163, "learning_rate": 0.00012765957446808513, "loss": 0.7322, "step": 200 }, { "epoch": 0.4, "eval_loss": 0.7111368179321289, "eval_runtime": 620.1668, "eval_samples_per_second": 4.363, "eval_steps_per_second": 0.547, "step": 200 } ], "logging_steps": 25, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.0317260660736e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }