{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 32, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 0, "loss": 2.9912, "step": 1 }, { "epoch": 0.06, "eval_accuracy": 0.040365853658536587, "eval_loss": 2.744140625, "eval_runtime": 4.8585, "eval_samples_per_second": 20.583, "eval_steps_per_second": 0.412, "step": 1 }, { "epoch": 0.12, "learning_rate": 0, "loss": 2.9329, "step": 2 }, { "epoch": 0.12, "eval_accuracy": 0.040365853658536587, "eval_loss": 2.744140625, "eval_runtime": 6.3845, "eval_samples_per_second": 15.663, "eval_steps_per_second": 0.313, "step": 2 }, { "epoch": 0.19, "learning_rate": 0.0, "loss": 2.9138, "step": 3 }, { "epoch": 0.19, "eval_accuracy": 0.038902439024390244, "eval_loss": 2.826171875, "eval_runtime": 5.4611, "eval_samples_per_second": 18.311, "eval_steps_per_second": 0.366, "step": 3 }, { "epoch": 0.25, "learning_rate": 5e-05, "loss": 2.9395, "step": 4 }, { "epoch": 0.25, "eval_accuracy": 0.038902439024390244, "eval_loss": 2.826171875, "eval_runtime": 6.1625, "eval_samples_per_second": 16.227, "eval_steps_per_second": 0.325, "step": 4 }, { "epoch": 0.31, "learning_rate": 5e-05, "loss": 2.9109, "step": 5 }, { "epoch": 0.31, "eval_accuracy": 0.03985772357723577, "eval_loss": 2.794921875, "eval_runtime": 6.2368, "eval_samples_per_second": 16.034, "eval_steps_per_second": 0.321, "step": 5 }, { "epoch": 0.38, "learning_rate": 5e-05, "loss": 2.8394, "step": 6 }, { "epoch": 0.38, "eval_accuracy": 0.04030487804878049, "eval_loss": 2.74609375, "eval_runtime": 6.1734, "eval_samples_per_second": 16.198, "eval_steps_per_second": 0.324, "step": 6 }, { "epoch": 0.44, "learning_rate": 5e-05, "loss": 2.9365, "step": 7 }, { "epoch": 0.44, "eval_accuracy": 0.03985772357723577, "eval_loss": 2.720703125, "eval_runtime": 6.2486, "eval_samples_per_second": 16.004, "eval_steps_per_second": 0.32, "step": 7 }, { "epoch": 0.5, "learning_rate": 5e-05, "loss": 2.7588, "step": 8 }, { "epoch": 0.5, "eval_accuracy": 0.040325203252032524, "eval_loss": 2.70703125, "eval_runtime": 6.0675, "eval_samples_per_second": 16.481, "eval_steps_per_second": 0.33, "step": 8 }, { "epoch": 0.56, "learning_rate": 5e-05, "loss": 2.9751, "step": 9 }, { "epoch": 0.56, "eval_accuracy": 0.04073170731707317, "eval_loss": 2.681640625, "eval_runtime": 6.0505, "eval_samples_per_second": 16.528, "eval_steps_per_second": 0.331, "step": 9 }, { "epoch": 0.62, "learning_rate": 5e-05, "loss": 2.844, "step": 10 }, { "epoch": 0.62, "eval_accuracy": 0.040426829268292684, "eval_loss": 2.673828125, "eval_runtime": 5.8947, "eval_samples_per_second": 16.964, "eval_steps_per_second": 0.339, "step": 10 }, { "epoch": 0.69, "learning_rate": 5e-05, "loss": 2.731, "step": 11 }, { "epoch": 0.69, "eval_accuracy": 0.04056910569105691, "eval_loss": 2.66796875, "eval_runtime": 5.7332, "eval_samples_per_second": 17.442, "eval_steps_per_second": 0.349, "step": 11 }, { "epoch": 0.75, "learning_rate": 5e-05, "loss": 2.7434, "step": 12 }, { "epoch": 0.75, "eval_accuracy": 0.04040650406504065, "eval_loss": 2.669921875, "eval_runtime": 5.6141, "eval_samples_per_second": 17.812, "eval_steps_per_second": 0.356, "step": 12 }, { "epoch": 0.81, "learning_rate": 5e-05, "loss": 2.9043, "step": 13 }, { "epoch": 0.81, "eval_accuracy": 0.039979674796747966, "eval_loss": 2.685546875, "eval_runtime": 5.112, "eval_samples_per_second": 19.562, "eval_steps_per_second": 0.391, "step": 13 }, { "epoch": 0.88, "learning_rate": 5e-05, "loss": 2.8564, "step": 14 }, { "epoch": 0.88, "eval_accuracy": 0.039979674796747966, "eval_loss": 2.685546875, "eval_runtime": 6.1791, "eval_samples_per_second": 16.184, "eval_steps_per_second": 0.324, "step": 14 }, { "epoch": 0.94, "learning_rate": 5e-05, "loss": 2.8716, "step": 15 }, { "epoch": 0.94, "eval_accuracy": 0.039979674796747966, "eval_loss": 2.685546875, "eval_runtime": 6.2221, "eval_samples_per_second": 16.072, "eval_steps_per_second": 0.321, "step": 15 }, { "epoch": 1.0, "learning_rate": 5e-05, "loss": 2.896, "step": 16 }, { "epoch": 1.0, "eval_accuracy": 0.03975609756097561, "eval_loss": 2.6953125, "eval_runtime": 5.7373, "eval_samples_per_second": 17.43, "eval_steps_per_second": 0.349, "step": 16 }, { "epoch": 1.06, "learning_rate": 5e-05, "loss": 1.9858, "step": 17 }, { "epoch": 1.06, "eval_accuracy": 0.039979674796747966, "eval_loss": 2.70703125, "eval_runtime": 5.5135, "eval_samples_per_second": 18.137, "eval_steps_per_second": 0.363, "step": 17 }, { "epoch": 1.12, "learning_rate": 5e-05, "loss": 2.0563, "step": 18 }, { "epoch": 1.12, "eval_accuracy": 0.040020325203252036, "eval_loss": 2.728515625, "eval_runtime": 6.3828, "eval_samples_per_second": 15.667, "eval_steps_per_second": 0.313, "step": 18 }, { "epoch": 1.19, "learning_rate": 5e-05, "loss": 2.04, "step": 19 }, { "epoch": 1.19, "eval_accuracy": 0.03981707317073171, "eval_loss": 2.767578125, "eval_runtime": 5.4095, "eval_samples_per_second": 18.486, "eval_steps_per_second": 0.37, "step": 19 }, { "epoch": 1.25, "learning_rate": 5e-05, "loss": 1.9885, "step": 20 }, { "epoch": 1.25, "eval_accuracy": 0.03955284552845528, "eval_loss": 2.791015625, "eval_runtime": 5.4272, "eval_samples_per_second": 18.426, "eval_steps_per_second": 0.369, "step": 20 }, { "epoch": 1.31, "learning_rate": 5e-05, "loss": 2.09, "step": 21 }, { "epoch": 1.31, "eval_accuracy": 0.03930894308943089, "eval_loss": 2.796875, "eval_runtime": 6.3475, "eval_samples_per_second": 15.754, "eval_steps_per_second": 0.315, "step": 21 }, { "epoch": 1.38, "learning_rate": 5e-05, "loss": 2.059, "step": 22 }, { "epoch": 1.38, "eval_accuracy": 0.03951219512195122, "eval_loss": 2.810546875, "eval_runtime": 6.0188, "eval_samples_per_second": 16.615, "eval_steps_per_second": 0.332, "step": 22 }, { "epoch": 1.44, "learning_rate": 5e-05, "loss": 2.0498, "step": 23 }, { "epoch": 1.44, "eval_accuracy": 0.03975609756097561, "eval_loss": 2.79296875, "eval_runtime": 5.4742, "eval_samples_per_second": 18.268, "eval_steps_per_second": 0.365, "step": 23 }, { "epoch": 1.5, "learning_rate": 5e-05, "loss": 1.9568, "step": 24 }, { "epoch": 1.5, "eval_accuracy": 0.04008130081300813, "eval_loss": 2.791015625, "eval_runtime": 5.9198, "eval_samples_per_second": 16.892, "eval_steps_per_second": 0.338, "step": 24 }, { "epoch": 1.56, "learning_rate": 5e-05, "loss": 2.1418, "step": 25 }, { "epoch": 1.56, "eval_accuracy": 0.039776422764227645, "eval_loss": 2.79296875, "eval_runtime": 5.5505, "eval_samples_per_second": 18.016, "eval_steps_per_second": 0.36, "step": 25 }, { "epoch": 1.62, "learning_rate": 5e-05, "loss": 1.975, "step": 26 }, { "epoch": 1.62, "eval_accuracy": 0.03967479674796748, "eval_loss": 2.79296875, "eval_runtime": 5.108, "eval_samples_per_second": 19.577, "eval_steps_per_second": 0.392, "step": 26 }, { "epoch": 1.69, "learning_rate": 5e-05, "loss": 1.996, "step": 27 }, { "epoch": 1.69, "eval_accuracy": 0.03932926829268293, "eval_loss": 2.794921875, "eval_runtime": 5.1867, "eval_samples_per_second": 19.28, "eval_steps_per_second": 0.386, "step": 27 }, { "epoch": 1.75, "learning_rate": 5e-05, "loss": 1.9617, "step": 28 }, { "epoch": 1.75, "eval_accuracy": 0.03916666666666667, "eval_loss": 2.8046875, "eval_runtime": 5.8792, "eval_samples_per_second": 17.009, "eval_steps_per_second": 0.34, "step": 28 }, { "epoch": 1.81, "learning_rate": 5e-05, "loss": 2.2062, "step": 29 }, { "epoch": 1.81, "eval_accuracy": 0.038760162601626014, "eval_loss": 2.814453125, "eval_runtime": 5.1614, "eval_samples_per_second": 19.375, "eval_steps_per_second": 0.387, "step": 29 }, { "epoch": 1.88, "learning_rate": 5e-05, "loss": 1.9929, "step": 30 }, { "epoch": 1.88, "eval_accuracy": 0.038597560975609756, "eval_loss": 2.814453125, "eval_runtime": 5.7986, "eval_samples_per_second": 17.245, "eval_steps_per_second": 0.345, "step": 30 }, { "epoch": 1.94, "learning_rate": 5e-05, "loss": 1.9235, "step": 31 }, { "epoch": 1.94, "eval_accuracy": 0.039004065040650404, "eval_loss": 2.828125, "eval_runtime": 6.2602, "eval_samples_per_second": 15.974, "eval_steps_per_second": 0.319, "step": 31 }, { "epoch": 2.0, "learning_rate": 5e-05, "loss": 1.9127, "step": 32 }, { "epoch": 2.0, "eval_accuracy": 0.03882113821138211, "eval_loss": 2.857421875, "eval_runtime": 5.5269, "eval_samples_per_second": 18.093, "eval_steps_per_second": 0.362, "step": 32 }, { "epoch": 2.0, "step": 32, "total_flos": 10341801000960.0, "train_loss": 2.4493942260742188, "train_runtime": 515.1457, "train_samples_per_second": 3.882, "train_steps_per_second": 0.062 } ], "max_steps": 32, "num_train_epochs": 2, "total_flos": 10341801000960.0, "trial_name": null, "trial_params": null }