{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.704530087897228, "eval_steps": 500, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3380662609871535, "grad_norm": 1.1497818231582642, "learning_rate": 1.776876267748479e-05, "loss": 1.475, "step": 500 }, { "epoch": 0.3380662609871535, "eval_loss": 0.4233010411262512, "eval_runtime": 4.5459, "eval_samples_per_second": 289.274, "eval_steps_per_second": 36.297, "step": 500 }, { "epoch": 0.676132521974307, "grad_norm": 0.609937846660614, "learning_rate": 1.55149876042371e-05, "loss": 0.4585, "step": 1000 }, { "epoch": 0.676132521974307, "eval_loss": 0.3847877085208893, "eval_runtime": 4.0513, "eval_samples_per_second": 324.584, "eval_steps_per_second": 40.727, "step": 1000 }, { "epoch": 1.0141987829614605, "grad_norm": 0.7841311097145081, "learning_rate": 1.3261212530989409e-05, "loss": 0.4269, "step": 1500 }, { "epoch": 1.0141987829614605, "eval_loss": 0.3729918599128723, "eval_runtime": 4.4571, "eval_samples_per_second": 295.035, "eval_steps_per_second": 37.02, "step": 1500 }, { "epoch": 1.352265043948614, "grad_norm": 0.8554628491401672, "learning_rate": 1.100743745774172e-05, "loss": 0.4106, "step": 2000 }, { "epoch": 1.352265043948614, "eval_loss": 0.36659201979637146, "eval_runtime": 4.8023, "eval_samples_per_second": 273.828, "eval_steps_per_second": 34.359, "step": 2000 }, { "epoch": 1.6903313049357673, "grad_norm": 0.7967121005058289, "learning_rate": 8.753662384494028e-06, "loss": 0.4016, "step": 2500 }, { "epoch": 1.6903313049357673, "eval_loss": 0.36214107275009155, "eval_runtime": 4.099, "eval_samples_per_second": 320.808, "eval_steps_per_second": 40.253, "step": 2500 }, { "epoch": 2.028397565922921, "grad_norm": 0.5886064171791077, "learning_rate": 6.499887311246338e-06, "loss": 0.3963, "step": 3000 }, { "epoch": 2.028397565922921, "eval_loss": 0.35916823148727417, "eval_runtime": 4.5892, "eval_samples_per_second": 286.545, "eval_steps_per_second": 35.954, "step": 3000 }, { "epoch": 2.366463826910074, "grad_norm": 0.5553242564201355, "learning_rate": 4.246112237998648e-06, "loss": 0.3905, "step": 3500 }, { "epoch": 2.366463826910074, "eval_loss": 0.3574349880218506, "eval_runtime": 4.1299, "eval_samples_per_second": 318.41, "eval_steps_per_second": 39.953, "step": 3500 }, { "epoch": 2.704530087897228, "grad_norm": 3.7581846714019775, "learning_rate": 1.992337164750958e-06, "loss": 0.3889, "step": 4000 }, { "epoch": 2.704530087897228, "eval_loss": 0.35647767782211304, "eval_runtime": 5.0273, "eval_samples_per_second": 261.573, "eval_steps_per_second": 32.821, "step": 4000 } ], "logging_steps": 500, "max_steps": 4437, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1082599069974528.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }