{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 26, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 0, "loss": 2.6133, "step": 1 }, { "epoch": 0.08, "eval_accuracy": 0.06519634211941905, "eval_loss": 2.625, "eval_runtime": 2.6754, "eval_samples_per_second": 29.154, "eval_steps_per_second": 1.121, "step": 1 }, { "epoch": 0.15, "learning_rate": 0, "loss": 2.6199, "step": 2 }, { "epoch": 0.15, "eval_accuracy": 0.06519634211941905, "eval_loss": 2.625, "eval_runtime": 2.9539, "eval_samples_per_second": 26.406, "eval_steps_per_second": 1.016, "step": 2 }, { "epoch": 0.23, "learning_rate": 0.0, "loss": 2.7202, "step": 3 }, { "epoch": 0.23, "eval_accuracy": 0.06575219652142729, "eval_loss": 2.611328125, "eval_runtime": 2.94, "eval_samples_per_second": 26.53, "eval_steps_per_second": 1.02, "step": 3 }, { "epoch": 0.31, "learning_rate": 5e-05, "loss": 2.6177, "step": 4 }, { "epoch": 0.31, "eval_accuracy": 0.06575219652142729, "eval_loss": 2.611328125, "eval_runtime": 2.9241, "eval_samples_per_second": 26.675, "eval_steps_per_second": 1.026, "step": 4 }, { "epoch": 0.38, "learning_rate": 5e-05, "loss": 2.5422, "step": 5 }, { "epoch": 0.38, "eval_accuracy": 0.06611081226465843, "eval_loss": 2.5703125, "eval_runtime": 2.6904, "eval_samples_per_second": 28.992, "eval_steps_per_second": 1.115, "step": 5 }, { "epoch": 0.46, "learning_rate": 5e-05, "loss": 2.5627, "step": 6 }, { "epoch": 0.46, "eval_accuracy": 0.06623632777478931, "eval_loss": 2.556640625, "eval_runtime": 2.9307, "eval_samples_per_second": 26.615, "eval_steps_per_second": 1.024, "step": 6 }, { "epoch": 0.54, "learning_rate": 5e-05, "loss": 2.5784, "step": 7 }, { "epoch": 0.54, "eval_accuracy": 0.06639770485924332, "eval_loss": 2.546875, "eval_runtime": 2.9555, "eval_samples_per_second": 26.391, "eval_steps_per_second": 1.015, "step": 7 }, { "epoch": 0.62, "learning_rate": 5e-05, "loss": 2.5264, "step": 8 }, { "epoch": 0.62, "eval_accuracy": 0.06630805092343554, "eval_loss": 2.537109375, "eval_runtime": 2.6791, "eval_samples_per_second": 29.114, "eval_steps_per_second": 1.12, "step": 8 }, { "epoch": 0.69, "learning_rate": 5e-05, "loss": 2.3396, "step": 9 }, { "epoch": 0.69, "eval_accuracy": 0.06695355926125157, "eval_loss": 2.533203125, "eval_runtime": 1.929, "eval_samples_per_second": 40.435, "eval_steps_per_second": 1.555, "step": 9 }, { "epoch": 0.77, "learning_rate": 5e-05, "loss": 2.4297, "step": 10 }, { "epoch": 0.77, "eval_accuracy": 0.06734803657880581, "eval_loss": 2.52734375, "eval_runtime": 2.4146, "eval_samples_per_second": 32.304, "eval_steps_per_second": 1.242, "step": 10 }, { "epoch": 0.85, "learning_rate": 5e-05, "loss": 2.3914, "step": 11 }, { "epoch": 0.85, "eval_accuracy": 0.06722252106867492, "eval_loss": 2.5234375, "eval_runtime": 2.4302, "eval_samples_per_second": 32.096, "eval_steps_per_second": 1.234, "step": 11 }, { "epoch": 0.92, "learning_rate": 5e-05, "loss": 2.429, "step": 12 }, { "epoch": 0.92, "eval_accuracy": 0.06709700555854402, "eval_loss": 2.51953125, "eval_runtime": 2.9318, "eval_samples_per_second": 26.605, "eval_steps_per_second": 1.023, "step": 12 }, { "epoch": 1.0, "learning_rate": 5e-05, "loss": 2.3055, "step": 13 }, { "epoch": 1.0, "eval_accuracy": 0.06720459028151336, "eval_loss": 2.51171875, "eval_runtime": 2.9231, "eval_samples_per_second": 26.684, "eval_steps_per_second": 1.026, "step": 13 }, { "epoch": 1.08, "learning_rate": 5e-05, "loss": 1.7162, "step": 14 }, { "epoch": 1.08, "eval_accuracy": 0.0671866594943518, "eval_loss": 2.521484375, "eval_runtime": 2.9212, "eval_samples_per_second": 26.702, "eval_steps_per_second": 1.027, "step": 14 }, { "epoch": 1.15, "learning_rate": 5e-05, "loss": 1.7264, "step": 15 }, { "epoch": 1.15, "eval_accuracy": 0.06774251389636005, "eval_loss": 2.546875, "eval_runtime": 2.6863, "eval_samples_per_second": 29.036, "eval_steps_per_second": 1.117, "step": 15 }, { "epoch": 1.23, "learning_rate": 5e-05, "loss": 1.7559, "step": 16 }, { "epoch": 1.23, "eval_accuracy": 0.06707907477138246, "eval_loss": 2.587890625, "eval_runtime": 2.9302, "eval_samples_per_second": 26.619, "eval_steps_per_second": 1.024, "step": 16 }, { "epoch": 1.31, "learning_rate": 5e-05, "loss": 1.7899, "step": 17 }, { "epoch": 1.31, "eval_accuracy": 0.0667383898153129, "eval_loss": 2.611328125, "eval_runtime": 2.6864, "eval_samples_per_second": 29.035, "eval_steps_per_second": 1.117, "step": 17 }, { "epoch": 1.38, "learning_rate": 5e-05, "loss": 1.6465, "step": 18 }, { "epoch": 1.38, "eval_accuracy": 0.06664873587950511, "eval_loss": 2.619140625, "eval_runtime": 2.9278, "eval_samples_per_second": 26.641, "eval_steps_per_second": 1.025, "step": 18 }, { "epoch": 1.46, "learning_rate": 5e-05, "loss": 1.5955, "step": 19 }, { "epoch": 1.46, "eval_accuracy": 0.0670611439842209, "eval_loss": 2.607421875, "eval_runtime": 2.4278, "eval_samples_per_second": 32.128, "eval_steps_per_second": 1.236, "step": 19 }, { "epoch": 1.54, "learning_rate": 5e-05, "loss": 1.5389, "step": 20 }, { "epoch": 1.54, "eval_accuracy": 0.06720459028151336, "eval_loss": 2.595703125, "eval_runtime": 2.6825, "eval_samples_per_second": 29.077, "eval_steps_per_second": 1.118, "step": 20 }, { "epoch": 1.62, "learning_rate": 5e-05, "loss": 1.5356, "step": 21 }, { "epoch": 1.62, "eval_accuracy": 0.06697149004841313, "eval_loss": 2.5859375, "eval_runtime": 2.6798, "eval_samples_per_second": 29.107, "eval_steps_per_second": 1.119, "step": 21 }, { "epoch": 1.69, "learning_rate": 5e-05, "loss": 1.386, "step": 22 }, { "epoch": 1.69, "eval_accuracy": 0.06722252106867492, "eval_loss": 2.58203125, "eval_runtime": 2.9465, "eval_samples_per_second": 26.472, "eval_steps_per_second": 1.018, "step": 22 }, { "epoch": 1.77, "learning_rate": 5e-05, "loss": 1.7698, "step": 23 }, { "epoch": 1.77, "eval_accuracy": 0.06695355926125157, "eval_loss": 2.57421875, "eval_runtime": 2.1897, "eval_samples_per_second": 35.622, "eval_steps_per_second": 1.37, "step": 23 }, { "epoch": 1.85, "learning_rate": 5e-05, "loss": 1.3923, "step": 24 }, { "epoch": 1.85, "eval_accuracy": 0.06686390532544378, "eval_loss": 2.580078125, "eval_runtime": 2.6567, "eval_samples_per_second": 29.36, "eval_steps_per_second": 1.129, "step": 24 }, { "epoch": 1.92, "learning_rate": 5e-05, "loss": 1.4723, "step": 25 }, { "epoch": 1.92, "eval_accuracy": 0.06722252106867492, "eval_loss": 2.58984375, "eval_runtime": 2.6764, "eval_samples_per_second": 29.144, "eval_steps_per_second": 1.121, "step": 25 }, { "epoch": 2.0, "learning_rate": 5e-05, "loss": 1.5653, "step": 26 }, { "epoch": 2.0, "eval_accuracy": 0.06715079792002869, "eval_loss": 2.603515625, "eval_runtime": 2.4205, "eval_samples_per_second": 32.225, "eval_steps_per_second": 1.239, "step": 26 }, { "epoch": 2.0, "step": 26, "total_flos": 2728683110400.0, "train_loss": 2.0602510892427883, "train_runtime": 142.5326, "train_samples_per_second": 5.837, "train_steps_per_second": 0.182 } ], "max_steps": 26, "num_train_epochs": 2, "total_flos": 2728683110400.0, "trial_name": null, "trial_params": null }