{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.056794786238623295, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00567947862386233, "eval_accuracy": 0.577217863529113, "eval_loss": 2.233586072921753, "eval_runtime": 1986.1249, "eval_samples_per_second": 14.911, "eval_steps_per_second": 7.456, "step": 100 }, { "epoch": 0.01135895724772466, "eval_accuracy": 0.6331085624965921, "eval_loss": 1.865549921989441, "eval_runtime": 1990.8563, "eval_samples_per_second": 14.876, "eval_steps_per_second": 7.438, "step": 200 }, { "epoch": 0.017038435871586988, "eval_accuracy": 0.6620234759230288, "eval_loss": 1.6861233711242676, "eval_runtime": 1994.7346, "eval_samples_per_second": 14.847, "eval_steps_per_second": 7.424, "step": 300 }, { "epoch": 0.02271791449544932, "eval_accuracy": 0.6771864912263876, "eval_loss": 1.5935094356536865, "eval_runtime": 1993.4218, "eval_samples_per_second": 14.857, "eval_steps_per_second": 7.428, "step": 400 }, { "epoch": 0.028397393119311647, "grad_norm": 4.40625, "learning_rate": 4.8580110183449765e-05, "loss": 2.0591, "step": 500 }, { "epoch": 0.028397393119311647, "eval_accuracy": 0.6888090836794671, "eval_loss": 1.5249096155166626, "eval_runtime": 1993.694, "eval_samples_per_second": 14.855, "eval_steps_per_second": 7.427, "step": 500 }, { "epoch": 0.034076871743173975, "eval_accuracy": 0.6971747896369712, "eval_loss": 1.477766752243042, "eval_runtime": 1994.0632, "eval_samples_per_second": 14.852, "eval_steps_per_second": 7.426, "step": 600 }, { "epoch": 0.03975635036703631, "eval_accuracy": 0.7031079032429749, "eval_loss": 1.4431705474853516, "eval_runtime": 1994.0576, "eval_samples_per_second": 14.852, "eval_steps_per_second": 7.426, "step": 700 }, { "epoch": 0.04543582899089864, "eval_accuracy": 0.7086350429994105, "eval_loss": 1.4113129377365112, "eval_runtime": 1995.6333, "eval_samples_per_second": 14.84, "eval_steps_per_second": 7.42, "step": 800 }, { "epoch": 0.05111530761476096, "eval_accuracy": 0.7133429387568253, "eval_loss": 1.3802016973495483, "eval_runtime": 1996.6407, "eval_samples_per_second": 14.833, "eval_steps_per_second": 7.416, "step": 900 }, { "epoch": 0.056794786238623295, "grad_norm": 2.640625, "learning_rate": 4.7160220366899535e-05, "loss": 1.4351, "step": 1000 }, { "epoch": 0.056794786238623295, "eval_accuracy": 0.716021928609194, "eval_loss": 1.3633891344070435, "eval_runtime": 1991.0825, "eval_samples_per_second": 14.874, "eval_steps_per_second": 7.437, "step": 1000 }, { "epoch": 0.056794786238623295, "step": 1000, "total_flos": 7.41887283560448e+17, "train_loss": 1.7471453247070312, "train_runtime": 26390.675, "train_samples_per_second": 21.35, "train_steps_per_second": 0.667 } ], "logging_steps": 500, "max_steps": 17607, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.41887283560448e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }