{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 75, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3e-07, "loss": 2.718, "step": 1 }, { "epoch": 0.08, "learning_rate": 3e-07, "loss": 2.5856, "step": 6 }, { "epoch": 0.08, "eval_accuracy": 0.2696508722665664, "eval_loss": 2.595703125, "eval_runtime": 73.0734, "eval_samples_per_second": 4.981, "eval_steps_per_second": 0.63, "step": 6 }, { "epoch": 0.16, "learning_rate": 3e-07, "loss": 2.6027, "step": 12 }, { "epoch": 0.16, "eval_accuracy": 0.26980564706922855, "eval_loss": 2.59375, "eval_runtime": 73.5378, "eval_samples_per_second": 4.95, "eval_steps_per_second": 0.626, "step": 12 }, { "epoch": 0.24, "learning_rate": 3e-07, "loss": 2.619, "step": 18 }, { "epoch": 0.24, "eval_accuracy": 0.26998806022950894, "eval_loss": 2.587890625, "eval_runtime": 73.5544, "eval_samples_per_second": 4.949, "eval_steps_per_second": 0.625, "step": 18 }, { "epoch": 0.32, "learning_rate": 3e-07, "loss": 2.6121, "step": 24 }, { "epoch": 0.32, "eval_accuracy": 0.27019258407588387, "eval_loss": 2.583984375, "eval_runtime": 73.6493, "eval_samples_per_second": 4.942, "eval_steps_per_second": 0.625, "step": 24 }, { "epoch": 0.4, "learning_rate": 3e-07, "loss": 2.6024, "step": 30 }, { "epoch": 0.4, "eval_accuracy": 0.27057399341101557, "eval_loss": 2.576171875, "eval_runtime": 73.5651, "eval_samples_per_second": 4.948, "eval_steps_per_second": 0.625, "step": 30 }, { "epoch": 0.48, "learning_rate": 3e-07, "loss": 2.5878, "step": 36 }, { "epoch": 0.48, "eval_accuracy": 0.270739823556725, "eval_loss": 2.5703125, "eval_runtime": 73.2671, "eval_samples_per_second": 4.968, "eval_steps_per_second": 0.628, "step": 36 }, { "epoch": 0.56, "learning_rate": 3e-07, "loss": 2.5541, "step": 42 }, { "epoch": 0.56, "eval_accuracy": 0.2710327901474783, "eval_loss": 2.5625, "eval_runtime": 73.5901, "eval_samples_per_second": 4.946, "eval_steps_per_second": 0.625, "step": 42 }, { "epoch": 0.64, "learning_rate": 3e-07, "loss": 2.5207, "step": 48 }, { "epoch": 0.64, "eval_accuracy": 0.27128706303756606, "eval_loss": 2.556640625, "eval_runtime": 73.6385, "eval_samples_per_second": 4.943, "eval_steps_per_second": 0.625, "step": 48 }, { "epoch": 0.72, "learning_rate": 3e-07, "loss": 2.4577, "step": 54 }, { "epoch": 0.72, "eval_accuracy": 0.27152475291308287, "eval_loss": 2.548828125, "eval_runtime": 73.672, "eval_samples_per_second": 4.941, "eval_steps_per_second": 0.624, "step": 54 }, { "epoch": 0.8, "learning_rate": 3e-07, "loss": 2.5614, "step": 60 }, { "epoch": 0.8, "eval_accuracy": 0.27176797046012335, "eval_loss": 2.54296875, "eval_runtime": 73.5346, "eval_samples_per_second": 4.95, "eval_steps_per_second": 0.626, "step": 60 }, { "epoch": 0.88, "learning_rate": 3e-07, "loss": 2.6959, "step": 66 }, { "epoch": 0.88, "eval_accuracy": 0.27219360116744423, "eval_loss": 2.53515625, "eval_runtime": 73.3529, "eval_samples_per_second": 4.962, "eval_steps_per_second": 0.627, "step": 66 }, { "epoch": 0.96, "learning_rate": 3e-07, "loss": 2.5084, "step": 72 }, { "epoch": 0.96, "eval_accuracy": 0.2725086784442921, "eval_loss": 2.529296875, "eval_runtime": 73.6241, "eval_samples_per_second": 4.944, "eval_steps_per_second": 0.625, "step": 72 }, { "epoch": 1.0, "step": 75, "total_flos": 4974649540608.0, "train_loss": 2.57614501953125, "train_runtime": 16094.8553, "train_samples_per_second": 0.037, "train_steps_per_second": 0.005 } ], "max_steps": 75, "num_train_epochs": 1, "total_flos": 4974649540608.0, "trial_name": null, "trial_params": null }