{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 7320, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.68, "learning_rate": 6.25e-05, "loss": 6.1051, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.39493334876226643, "eval_loss": 3.5367648601531982, "eval_runtime": 3.7763, "eval_samples_per_second": 1189.255, "eval_steps_per_second": 2.383, "step": 732 }, { "epoch": 1.37, "learning_rate": 0.000125, "loss": 3.4882, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.4336529577089437, "eval_loss": 3.115656852722168, "eval_runtime": 3.9524, "eval_samples_per_second": 1136.258, "eval_steps_per_second": 2.277, "step": 1464 }, { "epoch": 2.05, "learning_rate": 0.0001875, "loss": 3.1377, "step": 1500 }, { "epoch": 2.73, "learning_rate": 0.00025, "loss": 2.9431, "step": 2000 }, { "epoch": 3.0, "eval_accuracy": 0.45023730751090985, "eval_loss": 2.916651725769043, "eval_runtime": 3.9895, "eval_samples_per_second": 1125.698, "eval_steps_per_second": 2.256, "step": 2196 }, { "epoch": 3.42, "learning_rate": 0.0003125, "loss": 2.7991, "step": 2500 }, { "epoch": 4.0, "eval_accuracy": 0.461877385567285, "eval_loss": 2.7866480350494385, "eval_runtime": 4.1737, "eval_samples_per_second": 1076.013, "eval_steps_per_second": 2.156, "step": 2928 }, { "epoch": 4.1, "learning_rate": 0.000375, "loss": 2.706, "step": 3000 }, { "epoch": 4.78, "learning_rate": 0.00043750000000000006, "loss": 2.6152, "step": 3500 }, { "epoch": 5.0, "eval_accuracy": 0.47078584114861394, "eval_loss": 2.697930097579956, "eval_runtime": 4.1031, "eval_samples_per_second": 1094.526, "eval_steps_per_second": 2.193, "step": 3660 }, { "epoch": 5.46, "learning_rate": 0.0005, "loss": 2.5487, "step": 4000 }, { "epoch": 6.0, "eval_accuracy": 0.4759054416795095, "eval_loss": 2.6426050662994385, "eval_runtime": 4.0254, "eval_samples_per_second": 1115.658, "eval_steps_per_second": 2.236, "step": 4392 }, { "epoch": 6.15, "learning_rate": 0.0005625000000000001, "loss": 2.4959, "step": 4500 }, { "epoch": 6.83, "learning_rate": 0.000625, "loss": 2.4491, "step": 5000 }, { "epoch": 7.0, "eval_accuracy": 0.48190869928834046, "eval_loss": 2.6028361320495605, "eval_runtime": 3.9975, "eval_samples_per_second": 1123.445, "eval_steps_per_second": 2.251, "step": 5124 }, { "epoch": 7.51, "learning_rate": 0.0006875, "loss": 2.4034, "step": 5500 }, { "epoch": 8.0, "eval_accuracy": 0.48347263205325786, "eval_loss": 2.5807623863220215, "eval_runtime": 4.0838, "eval_samples_per_second": 1099.699, "eval_steps_per_second": 2.204, "step": 5856 }, { "epoch": 8.2, "learning_rate": 0.00075, "loss": 2.3808, "step": 6000 }, { "epoch": 8.88, "learning_rate": 0.0008125, "loss": 2.364, "step": 6500 }, { "epoch": 9.0, "eval_accuracy": 0.48510669633229714, "eval_loss": 2.565694570541382, "eval_runtime": 4.2034, "eval_samples_per_second": 1068.427, "eval_steps_per_second": 2.141, "step": 6588 }, { "epoch": 9.56, "learning_rate": 0.0008750000000000001, "loss": 2.3269, "step": 7000 }, { "epoch": 10.0, "eval_accuracy": 0.4868845302152862, "eval_loss": 2.5603630542755127, "eval_runtime": 4.0953, "eval_samples_per_second": 1096.619, "eval_steps_per_second": 2.198, "step": 7320 }, { "epoch": 10.0, "step": 7320, "total_flos": 2273237316403200.0, "train_loss": 2.8865941365559897, "train_runtime": 626.0526, "train_samples_per_second": 748.26, "train_steps_per_second": 11.692 } ], "logging_steps": 500, "max_steps": 7320, "num_train_epochs": 10, "save_steps": 2000, "total_flos": 2273237316403200.0, "trial_name": null, "trial_params": null }