{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 16, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 0, "loss": 2.9912, "step": 1 }, { "epoch": 0.06, "eval_accuracy": 0.040365853658536587, "eval_loss": 2.744140625, "eval_runtime": 4.9375, "eval_samples_per_second": 20.253, "eval_steps_per_second": 0.405, "step": 1 }, { "epoch": 0.12, "learning_rate": 0, "loss": 2.9329, "step": 2 }, { "epoch": 0.12, "eval_accuracy": 0.040365853658536587, "eval_loss": 2.744140625, "eval_runtime": 6.0216, "eval_samples_per_second": 16.607, "eval_steps_per_second": 0.332, "step": 2 }, { "epoch": 0.19, "learning_rate": 0.0, "loss": 2.9138, "step": 3 }, { "epoch": 0.19, "eval_accuracy": 0.038902439024390244, "eval_loss": 2.826171875, "eval_runtime": 6.6113, "eval_samples_per_second": 15.126, "eval_steps_per_second": 0.303, "step": 3 }, { "epoch": 0.25, "learning_rate": 5e-05, "loss": 2.9395, "step": 4 }, { "epoch": 0.25, "eval_accuracy": 0.038902439024390244, "eval_loss": 2.826171875, "eval_runtime": 7.2265, "eval_samples_per_second": 13.838, "eval_steps_per_second": 0.277, "step": 4 }, { "epoch": 0.31, "learning_rate": 5e-05, "loss": 2.9109, "step": 5 }, { "epoch": 0.31, "eval_accuracy": 0.039878048780487806, "eval_loss": 2.794921875, "eval_runtime": 7.453, "eval_samples_per_second": 13.417, "eval_steps_per_second": 0.268, "step": 5 }, { "epoch": 0.38, "learning_rate": 5e-05, "loss": 2.8391, "step": 6 }, { "epoch": 0.38, "eval_accuracy": 0.040284552845528454, "eval_loss": 2.74609375, "eval_runtime": 6.8467, "eval_samples_per_second": 14.606, "eval_steps_per_second": 0.292, "step": 6 }, { "epoch": 0.44, "learning_rate": 5e-05, "loss": 2.9368, "step": 7 }, { "epoch": 0.44, "eval_accuracy": 0.03983739837398374, "eval_loss": 2.720703125, "eval_runtime": 7.2744, "eval_samples_per_second": 13.747, "eval_steps_per_second": 0.275, "step": 7 }, { "epoch": 0.5, "learning_rate": 5e-05, "loss": 2.7583, "step": 8 }, { "epoch": 0.5, "eval_accuracy": 0.040325203252032524, "eval_loss": 2.70703125, "eval_runtime": 6.8783, "eval_samples_per_second": 14.539, "eval_steps_per_second": 0.291, "step": 8 }, { "epoch": 0.56, "learning_rate": 5e-05, "loss": 2.9756, "step": 9 }, { "epoch": 0.56, "eval_accuracy": 0.04083333333333333, "eval_loss": 2.68359375, "eval_runtime": 7.0122, "eval_samples_per_second": 14.261, "eval_steps_per_second": 0.285, "step": 9 }, { "epoch": 0.62, "learning_rate": 5e-05, "loss": 2.8442, "step": 10 }, { "epoch": 0.62, "eval_accuracy": 0.04034552845528455, "eval_loss": 2.673828125, "eval_runtime": 7.0273, "eval_samples_per_second": 14.23, "eval_steps_per_second": 0.285, "step": 10 }, { "epoch": 0.69, "learning_rate": 5e-05, "loss": 2.7312, "step": 11 }, { "epoch": 0.69, "eval_accuracy": 0.04054878048780488, "eval_loss": 2.66796875, "eval_runtime": 6.8584, "eval_samples_per_second": 14.581, "eval_steps_per_second": 0.292, "step": 11 }, { "epoch": 0.75, "learning_rate": 5e-05, "loss": 2.7439, "step": 12 }, { "epoch": 0.75, "eval_accuracy": 0.040365853658536587, "eval_loss": 2.669921875, "eval_runtime": 6.8135, "eval_samples_per_second": 14.677, "eval_steps_per_second": 0.294, "step": 12 }, { "epoch": 0.81, "learning_rate": 5e-05, "loss": 2.9075, "step": 13 }, { "epoch": 0.81, "eval_accuracy": 0.04034552845528455, "eval_loss": 2.6796875, "eval_runtime": 6.1497, "eval_samples_per_second": 16.261, "eval_steps_per_second": 0.325, "step": 13 }, { "epoch": 0.88, "learning_rate": 5e-05, "loss": 2.8518, "step": 14 }, { "epoch": 0.88, "eval_accuracy": 0.04034552845528455, "eval_loss": 2.6796875, "eval_runtime": 6.4271, "eval_samples_per_second": 15.559, "eval_steps_per_second": 0.311, "step": 14 }, { "epoch": 0.94, "learning_rate": 5e-05, "loss": 2.8579, "step": 15 }, { "epoch": 0.94, "eval_accuracy": 0.04044715447154471, "eval_loss": 2.677734375, "eval_runtime": 7.2159, "eval_samples_per_second": 13.858, "eval_steps_per_second": 0.277, "step": 15 }, { "epoch": 1.0, "learning_rate": 5e-05, "loss": 2.8916, "step": 16 }, { "epoch": 1.0, "eval_accuracy": 0.040325203252032524, "eval_loss": 2.6953125, "eval_runtime": 6.3862, "eval_samples_per_second": 15.659, "eval_steps_per_second": 0.313, "step": 16 }, { "epoch": 1.0, "step": 16, "total_flos": 4999961640960.0, "train_loss": 2.8766326904296875, "train_runtime": 267.0978, "train_samples_per_second": 3.744, "train_steps_per_second": 0.06 } ], "max_steps": 16, "num_train_epochs": 1, "total_flos": 4999961640960.0, "trial_name": null, "trial_params": null }