{ "best_metric": 0.9393565058708191, "best_model_checkpoint": "./checkpoint-220", "epoch": 5.930232558139535, "global_step": 220, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27, "learning_rate": 5e-05, "loss": 6.7937, "step": 10 }, { "epoch": 0.27, "eval_loss": 6.501975059509277, "eval_runtime": 33.9782, "eval_samples_per_second": 59.685, "eval_steps_per_second": 1.884, "step": 10 }, { "epoch": 0.53, "learning_rate": 0.0001, "loss": 5.985, "step": 20 }, { "epoch": 0.53, "eval_loss": 5.326801776885986, "eval_runtime": 18.802, "eval_samples_per_second": 107.861, "eval_steps_per_second": 3.404, "step": 20 }, { "epoch": 0.8, "learning_rate": 0.00015, "loss": 4.6798, "step": 30 }, { "epoch": 0.8, "eval_loss": 3.829317331314087, "eval_runtime": 18.7742, "eval_samples_per_second": 108.021, "eval_steps_per_second": 3.409, "step": 30 }, { "epoch": 1.08, "learning_rate": 0.0002, "loss": 3.0462, "step": 40 }, { "epoch": 1.08, "eval_loss": 1.9157757759094238, "eval_runtime": 18.7907, "eval_samples_per_second": 107.926, "eval_steps_per_second": 3.406, "step": 40 }, { "epoch": 1.35, "learning_rate": 0.00025, "loss": 1.5197, "step": 50 }, { "epoch": 1.35, "eval_loss": 1.2286747694015503, "eval_runtime": 18.812, "eval_samples_per_second": 107.804, "eval_steps_per_second": 3.402, "step": 50 }, { "epoch": 1.61, "learning_rate": 0.0003, "loss": 1.1349, "step": 60 }, { "epoch": 1.61, "eval_loss": 1.1046648025512695, "eval_runtime": 18.8413, "eval_samples_per_second": 107.636, "eval_steps_per_second": 3.397, "step": 60 }, { "epoch": 1.88, "learning_rate": 0.00035, "loss": 1.0718, "step": 70 }, { "epoch": 1.88, "eval_loss": 1.0654218196868896, "eval_runtime": 18.8231, "eval_samples_per_second": 107.74, "eval_steps_per_second": 3.4, "step": 70 }, { "epoch": 2.16, "learning_rate": 0.0004, "loss": 1.0987, "step": 80 }, { "epoch": 2.16, "eval_loss": 1.0404733419418335, "eval_runtime": 18.8082, "eval_samples_per_second": 107.825, "eval_steps_per_second": 3.403, "step": 80 }, { "epoch": 2.43, "learning_rate": 0.00045000000000000004, "loss": 1.0133, "step": 90 }, { "epoch": 2.43, "eval_loss": 1.0208371877670288, "eval_runtime": 18.8192, "eval_samples_per_second": 107.762, "eval_steps_per_second": 3.401, "step": 90 }, { "epoch": 2.69, "learning_rate": 0.0005, "loss": 0.9869, "step": 100 }, { "epoch": 2.69, "eval_loss": 1.0047191381454468, "eval_runtime": 18.852, "eval_samples_per_second": 107.575, "eval_steps_per_second": 3.395, "step": 100 }, { "epoch": 2.96, "learning_rate": 0.00045901639344262296, "loss": 0.9809, "step": 110 }, { "epoch": 2.96, "eval_loss": 0.9912181496620178, "eval_runtime": 18.8516, "eval_samples_per_second": 107.577, "eval_steps_per_second": 3.395, "step": 110 }, { "epoch": 3.24, "learning_rate": 0.0004180327868852459, "loss": 1.0275, "step": 120 }, { "epoch": 3.24, "eval_loss": 0.9803428649902344, "eval_runtime": 18.8293, "eval_samples_per_second": 107.705, "eval_steps_per_second": 3.399, "step": 120 }, { "epoch": 3.5, "learning_rate": 0.0003770491803278688, "loss": 0.9608, "step": 130 }, { "epoch": 3.5, "eval_loss": 0.9717100262641907, "eval_runtime": 18.8516, "eval_samples_per_second": 107.577, "eval_steps_per_second": 3.395, "step": 130 }, { "epoch": 3.77, "learning_rate": 0.0003360655737704918, "loss": 0.9431, "step": 140 }, { "epoch": 3.77, "eval_loss": 0.9643934965133667, "eval_runtime": 18.8147, "eval_samples_per_second": 107.788, "eval_steps_per_second": 3.402, "step": 140 }, { "epoch": 4.05, "learning_rate": 0.00029508196721311476, "loss": 0.991, "step": 150 }, { "epoch": 4.05, "eval_loss": 0.9581753015518188, "eval_runtime": 18.8481, "eval_samples_per_second": 107.597, "eval_steps_per_second": 3.396, "step": 150 }, { "epoch": 4.32, "learning_rate": 0.0002540983606557377, "loss": 0.9387, "step": 160 }, { "epoch": 4.32, "eval_loss": 0.9531411528587341, "eval_runtime": 18.8022, "eval_samples_per_second": 107.86, "eval_steps_per_second": 3.404, "step": 160 }, { "epoch": 4.58, "learning_rate": 0.00021311475409836064, "loss": 0.9203, "step": 170 }, { "epoch": 4.58, "eval_loss": 0.9489945769309998, "eval_runtime": 18.7924, "eval_samples_per_second": 107.916, "eval_steps_per_second": 3.406, "step": 170 }, { "epoch": 4.85, "learning_rate": 0.00017213114754098362, "loss": 0.9235, "step": 180 }, { "epoch": 4.85, "eval_loss": 0.9456363320350647, "eval_runtime": 18.8084, "eval_samples_per_second": 107.824, "eval_steps_per_second": 3.403, "step": 180 }, { "epoch": 5.13, "learning_rate": 0.00013114754098360657, "loss": 0.9746, "step": 190 }, { "epoch": 5.13, "eval_loss": 0.9429621696472168, "eval_runtime": 18.8357, "eval_samples_per_second": 107.668, "eval_steps_per_second": 3.398, "step": 190 }, { "epoch": 5.4, "learning_rate": 9.016393442622952e-05, "loss": 0.9176, "step": 200 }, { "epoch": 5.4, "eval_loss": 0.9410804510116577, "eval_runtime": 18.8338, "eval_samples_per_second": 107.679, "eval_steps_per_second": 3.398, "step": 200 }, { "epoch": 5.66, "learning_rate": 4.9180327868852456e-05, "loss": 0.9175, "step": 210 }, { "epoch": 5.66, "eval_loss": 0.9398788213729858, "eval_runtime": 18.8076, "eval_samples_per_second": 107.829, "eval_steps_per_second": 3.403, "step": 210 }, { "epoch": 5.93, "learning_rate": 8.19672131147541e-06, "loss": 0.91, "step": 220 }, { "epoch": 5.93, "eval_loss": 0.9393565058708191, "eval_runtime": 18.7996, "eval_samples_per_second": 107.874, "eval_steps_per_second": 3.404, "step": 220 } ], "max_steps": 222, "num_train_epochs": 6, "total_flos": 8.954608582656e+16, "trial_name": null, "trial_params": null }