{ "best_metric": 1.0927647352218628, "best_model_checkpoint": "/kaggle/output/checkpoint-12000", "epoch": 0.5296610169491526, "eval_steps": 1000, "global_step": 13000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.7777777777777777e-11, "loss": 1.2184, "step": 1 }, { "epoch": 0.04, "learning_rate": 2.7750000000000004e-08, "loss": 1.1394, "step": 1000 }, { "epoch": 0.04, "eval_accuracy": 0.3327345309381238, "eval_loss": 1.1149410009384155, "eval_runtime": 20.6803, "eval_samples_per_second": 242.26, "eval_steps_per_second": 30.319, "step": 1000 }, { "epoch": 0.08, "learning_rate": 5.5527777777777784e-08, "loss": 1.1141, "step": 2000 }, { "epoch": 0.08, "eval_accuracy": 0.3401197604790419, "eval_loss": 1.104099988937378, "eval_runtime": 20.8477, "eval_samples_per_second": 240.314, "eval_steps_per_second": 30.075, "step": 2000 }, { "epoch": 0.12, "learning_rate": 8.330555555555556e-08, "loss": 1.116, "step": 3000 }, { "epoch": 0.12, "eval_accuracy": 0.3407185628742515, "eval_loss": 1.1040862798690796, "eval_runtime": 20.6818, "eval_samples_per_second": 242.242, "eval_steps_per_second": 30.317, "step": 3000 }, { "epoch": 0.16, "learning_rate": 1.1108333333333333e-07, "loss": 1.1158, "step": 4000 }, { "epoch": 0.16, "eval_accuracy": 0.32894211576846305, "eval_loss": 1.1020556688308716, "eval_runtime": 20.8541, "eval_samples_per_second": 240.241, "eval_steps_per_second": 30.066, "step": 4000 }, { "epoch": 0.2, "learning_rate": 1.3883333333333335e-07, "loss": 1.1135, "step": 5000 }, { "epoch": 0.2, "eval_accuracy": 0.34271457085828344, "eval_loss": 1.1008552312850952, "eval_runtime": 20.8055, "eval_samples_per_second": 240.802, "eval_steps_per_second": 30.136, "step": 5000 }, { "epoch": 0.24, "learning_rate": 1.6658333333333335e-07, "loss": 1.1121, "step": 6000 }, { "epoch": 0.24, "eval_accuracy": 0.3395209580838323, "eval_loss": 1.1004050970077515, "eval_runtime": 20.8985, "eval_samples_per_second": 239.731, "eval_steps_per_second": 30.002, "step": 6000 }, { "epoch": 0.29, "learning_rate": 1.9436111111111112e-07, "loss": 1.1089, "step": 7000 }, { "epoch": 0.29, "eval_accuracy": 0.35788423153692617, "eval_loss": 1.0985721349716187, "eval_runtime": 20.84, "eval_samples_per_second": 240.403, "eval_steps_per_second": 30.086, "step": 7000 }, { "epoch": 0.33, "learning_rate": 2.2213888888888891e-07, "loss": 1.1079, "step": 8000 }, { "epoch": 0.33, "eval_accuracy": 0.3331337325349301, "eval_loss": 1.098374843597412, "eval_runtime": 20.7886, "eval_samples_per_second": 240.998, "eval_steps_per_second": 30.161, "step": 8000 }, { "epoch": 0.37, "learning_rate": 2.4988888888888893e-07, "loss": 1.1087, "step": 9000 }, { "epoch": 0.37, "eval_accuracy": 0.34510978043912177, "eval_loss": 1.0993521213531494, "eval_runtime": 20.782, "eval_samples_per_second": 241.074, "eval_steps_per_second": 30.17, "step": 9000 }, { "epoch": 0.41, "learning_rate": 2.776666666666667e-07, "loss": 1.109, "step": 10000 }, { "epoch": 0.41, "eval_accuracy": 0.3475049900199601, "eval_loss": 1.0967597961425781, "eval_runtime": 20.6798, "eval_samples_per_second": 242.265, "eval_steps_per_second": 30.319, "step": 10000 }, { "epoch": 0.45, "learning_rate": 3.054444444444444e-07, "loss": 1.1052, "step": 11000 }, { "epoch": 0.45, "eval_accuracy": 0.37544910179640717, "eval_loss": 1.0941349267959595, "eval_runtime": 20.8641, "eval_samples_per_second": 240.126, "eval_steps_per_second": 30.052, "step": 11000 }, { "epoch": 0.49, "learning_rate": 3.3322222222222225e-07, "loss": 1.105, "step": 12000 }, { "epoch": 0.49, "eval_accuracy": 0.3834331337325349, "eval_loss": 1.0927647352218628, "eval_runtime": 20.6541, "eval_samples_per_second": 242.567, "eval_steps_per_second": 30.357, "step": 12000 }, { "epoch": 0.53, "learning_rate": 3.609722222222222e-07, "loss": 1.1016, "step": 13000 }, { "epoch": 0.53, "eval_accuracy": 0.3457085828343313, "eval_loss": 1.0942081212997437, "eval_runtime": 21.0733, "eval_samples_per_second": 237.742, "eval_steps_per_second": 29.753, "step": 13000 } ], "logging_steps": 1000, "max_steps": 10000000, "num_train_epochs": 408, "save_steps": 1000, "total_flos": 9058696298496000.0, "trial_name": null, "trial_params": null }