{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.1027095148078134, "global_step": 140000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.9921235034656587e-06, "loss": 0.2621, "step": 5000 }, { "epoch": 0.08, "learning_rate": 1.984247006931317e-06, "loss": 0.2548, "step": 10000 }, { "epoch": 0.12, "learning_rate": 1.9763705103969753e-06, "loss": 0.2451, "step": 15000 }, { "epoch": 0.16, "learning_rate": 1.9684940138626337e-06, "loss": 0.2479, "step": 20000 }, { "epoch": 0.2, "learning_rate": 1.9606175173282924e-06, "loss": 0.2477, "step": 25000 }, { "epoch": 0.2, "eval_accuracy": 0.9227449297904968, "eval_loss": 0.42459121346473694, "eval_runtime": 532.8547, "eval_samples_per_second": 53.054, "eval_steps_per_second": 13.264, "step": 25000 }, { "epoch": 0.24, "learning_rate": 1.9527410207939508e-06, "loss": 0.253, "step": 30000 }, { "epoch": 0.28, "learning_rate": 1.9448645242596095e-06, "loss": 0.2466, "step": 35000 }, { "epoch": 0.32, "learning_rate": 1.936988027725268e-06, "loss": 0.25, "step": 40000 }, { "epoch": 0.35, "learning_rate": 1.929111531190926e-06, "loss": 0.2402, "step": 45000 }, { "epoch": 0.39, "learning_rate": 1.9212350346565845e-06, "loss": 0.2515, "step": 50000 }, { "epoch": 0.39, "eval_accuracy": 0.926034688949585, "eval_loss": 0.3926495909690857, "eval_runtime": 506.3742, "eval_samples_per_second": 55.828, "eval_steps_per_second": 13.958, "step": 50000 }, { "epoch": 0.43, "learning_rate": 1.9133585381222433e-06, "loss": 0.2383, "step": 55000 }, { "epoch": 0.47, "learning_rate": 1.9054820415879016e-06, "loss": 0.2523, "step": 60000 }, { "epoch": 0.51, "learning_rate": 1.8976055450535602e-06, "loss": 0.2372, "step": 65000 }, { "epoch": 0.55, "learning_rate": 1.8897290485192185e-06, "loss": 0.2395, "step": 70000 }, { "epoch": 0.59, "learning_rate": 1.881852551984877e-06, "loss": 0.2376, "step": 75000 }, { "epoch": 0.59, "eval_accuracy": 0.9263883829116821, "eval_loss": 0.3989144265651703, "eval_runtime": 505.8843, "eval_samples_per_second": 55.882, "eval_steps_per_second": 13.972, "step": 75000 }, { "epoch": 0.63, "learning_rate": 1.8739760554505356e-06, "loss": 0.2331, "step": 80000 }, { "epoch": 0.67, "learning_rate": 1.8660995589161941e-06, "loss": 0.2426, "step": 85000 }, { "epoch": 0.71, "learning_rate": 1.8582230623818525e-06, "loss": 0.2493, "step": 90000 }, { "epoch": 0.75, "learning_rate": 1.850346565847511e-06, "loss": 0.2379, "step": 95000 }, { "epoch": 0.79, "learning_rate": 1.8424700693131694e-06, "loss": 0.2428, "step": 100000 }, { "epoch": 0.79, "eval_accuracy": 0.9267421364784241, "eval_loss": 0.3985295295715332, "eval_runtime": 549.265, "eval_samples_per_second": 51.469, "eval_steps_per_second": 12.868, "step": 100000 }, { "epoch": 0.83, "learning_rate": 1.834593572778828e-06, "loss": 0.2429, "step": 105000 }, { "epoch": 0.87, "learning_rate": 1.8267170762444864e-06, "loss": 0.238, "step": 110000 }, { "epoch": 0.91, "learning_rate": 1.818840579710145e-06, "loss": 0.2322, "step": 115000 }, { "epoch": 0.95, "learning_rate": 1.8109640831758033e-06, "loss": 0.2371, "step": 120000 }, { "epoch": 0.98, "learning_rate": 1.8030875866414619e-06, "loss": 0.2303, "step": 125000 }, { "epoch": 0.98, "eval_accuracy": 0.9282631874084473, "eval_loss": 0.40024659037590027, "eval_runtime": 548.3799, "eval_samples_per_second": 51.552, "eval_steps_per_second": 12.889, "step": 125000 }, { "epoch": 1.02, "learning_rate": 1.7952110901071202e-06, "loss": 0.221, "step": 130000 }, { "epoch": 1.06, "learning_rate": 1.7873345935727788e-06, "loss": 0.2199, "step": 135000 }, { "epoch": 1.1, "learning_rate": 1.779458097038437e-06, "loss": 0.2097, "step": 140000 } ], "max_steps": 1269600, "num_train_epochs": 10, "total_flos": 1.473443106221998e+17, "trial_name": null, "trial_params": null }