{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 75, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 6e-06, "loss": 2.718, "step": 1 }, { "epoch": 0.08, "learning_rate": 6e-06, "loss": 2.5793, "step": 6 }, { "epoch": 0.08, "eval_accuracy": 0.27131470139518427, "eval_loss": 2.572265625, "eval_runtime": 75.3129, "eval_samples_per_second": 4.833, "eval_steps_per_second": 0.611, "step": 6 }, { "epoch": 0.16, "learning_rate": 6e-06, "loss": 2.5612, "step": 12 }, { "epoch": 0.16, "eval_accuracy": 0.27501824131602803, "eval_loss": 2.5, "eval_runtime": 75.9396, "eval_samples_per_second": 4.793, "eval_steps_per_second": 0.606, "step": 12 }, { "epoch": 0.24, "learning_rate": 6e-06, "loss": 2.5235, "step": 18 }, { "epoch": 0.24, "eval_accuracy": 0.27841775930307117, "eval_loss": 2.447265625, "eval_runtime": 75.8369, "eval_samples_per_second": 4.8, "eval_steps_per_second": 0.607, "step": 18 }, { "epoch": 0.32, "learning_rate": 6e-06, "loss": 2.4961, "step": 24 }, { "epoch": 0.32, "eval_accuracy": 0.2818393879762089, "eval_loss": 2.41015625, "eval_runtime": 75.7164, "eval_samples_per_second": 4.807, "eval_steps_per_second": 0.608, "step": 24 }, { "epoch": 0.4, "learning_rate": 6e-06, "loss": 2.4488, "step": 30 }, { "epoch": 0.4, "eval_accuracy": 0.2848740796426913, "eval_loss": 2.3671875, "eval_runtime": 75.8548, "eval_samples_per_second": 4.799, "eval_steps_per_second": 0.606, "step": 30 }, { "epoch": 0.48, "learning_rate": 6e-06, "loss": 2.4121, "step": 36 }, { "epoch": 0.48, "eval_accuracy": 0.2877761071926062, "eval_loss": 2.33203125, "eval_runtime": 75.8441, "eval_samples_per_second": 4.799, "eval_steps_per_second": 0.607, "step": 36 }, { "epoch": 0.56, "learning_rate": 6e-06, "loss": 2.3901, "step": 42 }, { "epoch": 0.56, "eval_accuracy": 0.2902580317067239, "eval_loss": 2.302734375, "eval_runtime": 75.8507, "eval_samples_per_second": 4.799, "eval_steps_per_second": 0.606, "step": 42 }, { "epoch": 0.64, "learning_rate": 6e-06, "loss": 2.2845, "step": 48 }, { "epoch": 0.64, "eval_accuracy": 0.29269573484865236, "eval_loss": 2.271484375, "eval_runtime": 75.7801, "eval_samples_per_second": 4.803, "eval_steps_per_second": 0.607, "step": 48 }, { "epoch": 0.72, "learning_rate": 6e-06, "loss": 2.3032, "step": 54 }, { "epoch": 0.72, "eval_accuracy": 0.29554248568333075, "eval_loss": 2.2421875, "eval_runtime": 75.8262, "eval_samples_per_second": 4.8, "eval_steps_per_second": 0.607, "step": 54 }, { "epoch": 0.8, "learning_rate": 6e-06, "loss": 2.2954, "step": 60 }, { "epoch": 0.8, "eval_accuracy": 0.2984555685762929, "eval_loss": 2.208984375, "eval_runtime": 75.8901, "eval_samples_per_second": 4.796, "eval_steps_per_second": 0.606, "step": 60 }, { "epoch": 0.88, "learning_rate": 6e-06, "loss": 2.3908, "step": 66 }, { "epoch": 0.88, "eval_accuracy": 0.30088774404669777, "eval_loss": 2.18359375, "eval_runtime": 76.1084, "eval_samples_per_second": 4.783, "eval_steps_per_second": 0.604, "step": 66 }, { "epoch": 0.96, "learning_rate": 6e-06, "loss": 2.2676, "step": 72 }, { "epoch": 0.96, "eval_accuracy": 0.3035631370641431, "eval_loss": 2.150390625, "eval_runtime": 75.8379, "eval_samples_per_second": 4.8, "eval_steps_per_second": 0.607, "step": 72 }, { "epoch": 1.0, "step": 75, "total_flos": 4974649540608.0, "train_loss": 2.4108121744791666, "train_runtime": 14351.1542, "train_samples_per_second": 0.042, "train_steps_per_second": 0.005 } ], "max_steps": 75, "num_train_epochs": 1, "total_flos": 4974649540608.0, "trial_name": null, "trial_params": null }