{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.016185594820609658, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005395198273536552, "eval_loss": 1.9554163217544556, "eval_runtime": 15.2817, "eval_samples_per_second": 51.107, "eval_steps_per_second": 25.586, "step": 1 }, { "epoch": 0.0016185594820609657, "grad_norm": 3.3060264587402344, "learning_rate": 3e-05, "loss": 7.981, "step": 3 }, { "epoch": 0.002697599136768276, "eval_loss": 1.9359668493270874, "eval_runtime": 15.2945, "eval_samples_per_second": 51.064, "eval_steps_per_second": 25.565, "step": 5 }, { "epoch": 0.0032371189641219314, "grad_norm": 4.126204013824463, "learning_rate": 6e-05, "loss": 7.995, "step": 6 }, { "epoch": 0.0048556784461828975, "grad_norm": 126.12236785888672, "learning_rate": 9e-05, "loss": 7.6066, "step": 9 }, { "epoch": 0.005395198273536552, "eval_loss": 1.8610799312591553, "eval_runtime": 15.2832, "eval_samples_per_second": 51.102, "eval_steps_per_second": 25.584, "step": 10 }, { "epoch": 0.006474237928243863, "grad_norm": 3.5348870754241943, "learning_rate": 9.755282581475769e-05, "loss": 7.6399, "step": 12 }, { "epoch": 0.008092797410304829, "grad_norm": 2.763091564178467, "learning_rate": 8.535533905932738e-05, "loss": 7.1684, "step": 15 }, { "epoch": 0.008092797410304829, "eval_loss": 1.8094102144241333, "eval_runtime": 15.2652, "eval_samples_per_second": 51.162, "eval_steps_per_second": 25.614, "step": 15 }, { "epoch": 0.009711356892365795, "grad_norm": 2.5705695152282715, "learning_rate": 6.545084971874738e-05, "loss": 7.411, "step": 18 }, { "epoch": 0.010790396547073105, "eval_loss": 1.7697291374206543, "eval_runtime": 15.2787, "eval_samples_per_second": 51.117, "eval_steps_per_second": 25.591, "step": 20 }, { "epoch": 0.01132991637442676, "grad_norm": 2.7558438777923584, "learning_rate": 4.2178276747988446e-05, "loss": 7.025, "step": 21 }, { "epoch": 0.012948475856487725, "grad_norm": 2.9869544506073, "learning_rate": 2.061073738537635e-05, "loss": 7.2359, "step": 24 }, { "epoch": 0.013487995683841382, "eval_loss": 1.760362982749939, "eval_runtime": 15.3059, "eval_samples_per_second": 51.026, "eval_steps_per_second": 25.546, "step": 25 }, { "epoch": 0.014567035338548692, "grad_norm": 2.3133647441864014, "learning_rate": 5.449673790581611e-06, "loss": 6.754, "step": 27 }, { "epoch": 0.016185594820609658, "grad_norm": 2.724479913711548, "learning_rate": 0.0, "loss": 7.0305, "step": 30 }, { "epoch": 0.016185594820609658, "eval_loss": 1.7582391500473022, "eval_runtime": 15.255, "eval_samples_per_second": 51.196, "eval_steps_per_second": 25.631, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 458848619986944.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }