{ "best_metric": 1.2128502130508423, "best_model_checkpoint": "./outputs/checkpoint-4100", "epoch": 2.987249544626594, "eval_steps": 100, "global_step": 4100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0002, "loss": 2.1918, "step": 100 }, { "epoch": 0.07, "eval_loss": 2.024646282196045, "eval_runtime": 143.5031, "eval_samples_per_second": 43.72, "eval_steps_per_second": 5.47, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 1.9739, "step": 200 }, { "epoch": 0.15, "eval_loss": 1.9310301542282104, "eval_runtime": 143.4865, "eval_samples_per_second": 43.725, "eval_steps_per_second": 5.471, "step": 200 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 1.8997, "step": 300 }, { "epoch": 0.22, "eval_loss": 1.869322419166565, "eval_runtime": 143.5199, "eval_samples_per_second": 43.715, "eval_steps_per_second": 5.47, "step": 300 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 1.8476, "step": 400 }, { "epoch": 0.29, "eval_loss": 1.8241251707077026, "eval_runtime": 143.5226, "eval_samples_per_second": 43.714, "eval_steps_per_second": 5.47, "step": 400 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 1.7962, "step": 500 }, { "epoch": 0.36, "eval_loss": 1.7821052074432373, "eval_runtime": 143.5402, "eval_samples_per_second": 43.709, "eval_steps_per_second": 5.469, "step": 500 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 1.7679, "step": 600 }, { "epoch": 0.44, "eval_loss": 1.7543551921844482, "eval_runtime": 143.714, "eval_samples_per_second": 43.656, "eval_steps_per_second": 5.462, "step": 600 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 1.7293, "step": 700 }, { "epoch": 0.51, "eval_loss": 1.7226487398147583, "eval_runtime": 143.4831, "eval_samples_per_second": 43.726, "eval_steps_per_second": 5.471, "step": 700 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 1.7081, "step": 800 }, { "epoch": 0.58, "eval_loss": 1.6933537721633911, "eval_runtime": 143.5765, "eval_samples_per_second": 43.698, "eval_steps_per_second": 5.467, "step": 800 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 1.6726, "step": 900 }, { "epoch": 0.66, "eval_loss": 1.6686474084854126, "eval_runtime": 143.516, "eval_samples_per_second": 43.716, "eval_steps_per_second": 5.47, "step": 900 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 1.6577, "step": 1000 }, { "epoch": 0.73, "eval_loss": 1.6451665163040161, "eval_runtime": 146.451, "eval_samples_per_second": 42.84, "eval_steps_per_second": 5.36, "step": 1000 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 1.6377, "step": 1100 }, { "epoch": 0.8, "eval_loss": 1.623993992805481, "eval_runtime": 143.5112, "eval_samples_per_second": 43.718, "eval_steps_per_second": 5.47, "step": 1100 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 1.6028, "step": 1200 }, { "epoch": 0.87, "eval_loss": 1.6054009199142456, "eval_runtime": 143.561, "eval_samples_per_second": 43.703, "eval_steps_per_second": 5.468, "step": 1200 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 1.6101, "step": 1300 }, { "epoch": 0.95, "eval_loss": 1.5822986364364624, "eval_runtime": 143.5287, "eval_samples_per_second": 43.713, "eval_steps_per_second": 5.469, "step": 1300 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 1.5635, "step": 1400 }, { "epoch": 1.02, "eval_loss": 1.5618135929107666, "eval_runtime": 143.5801, "eval_samples_per_second": 43.697, "eval_steps_per_second": 5.467, "step": 1400 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 1.5353, "step": 1500 }, { "epoch": 1.09, "eval_loss": 1.546177864074707, "eval_runtime": 143.5634, "eval_samples_per_second": 43.702, "eval_steps_per_second": 5.468, "step": 1500 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 1.5167, "step": 1600 }, { "epoch": 1.17, "eval_loss": 1.5285366773605347, "eval_runtime": 143.5631, "eval_samples_per_second": 43.702, "eval_steps_per_second": 5.468, "step": 1600 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 1.5143, "step": 1700 }, { "epoch": 1.24, "eval_loss": 1.511216640472412, "eval_runtime": 143.4634, "eval_samples_per_second": 43.732, "eval_steps_per_second": 5.472, "step": 1700 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 1.4878, "step": 1800 }, { "epoch": 1.31, "eval_loss": 1.496885061264038, "eval_runtime": 143.4809, "eval_samples_per_second": 43.727, "eval_steps_per_second": 5.471, "step": 1800 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 1.4773, "step": 1900 }, { "epoch": 1.38, "eval_loss": 1.4805316925048828, "eval_runtime": 143.5169, "eval_samples_per_second": 43.716, "eval_steps_per_second": 5.47, "step": 1900 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 1.4603, "step": 2000 }, { "epoch": 1.46, "eval_loss": 1.4675525426864624, "eval_runtime": 143.6108, "eval_samples_per_second": 43.688, "eval_steps_per_second": 5.466, "step": 2000 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 1.446, "step": 2100 }, { "epoch": 1.53, "eval_loss": 1.4523862600326538, "eval_runtime": 143.5584, "eval_samples_per_second": 43.703, "eval_steps_per_second": 5.468, "step": 2100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 1.4205, "step": 2200 }, { "epoch": 1.6, "eval_loss": 1.4386054277420044, "eval_runtime": 143.497, "eval_samples_per_second": 43.722, "eval_steps_per_second": 5.47, "step": 2200 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 1.4198, "step": 2300 }, { "epoch": 1.68, "eval_loss": 1.4249650239944458, "eval_runtime": 143.5274, "eval_samples_per_second": 43.713, "eval_steps_per_second": 5.469, "step": 2300 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 1.4191, "step": 2400 }, { "epoch": 1.75, "eval_loss": 1.4121719598770142, "eval_runtime": 143.5251, "eval_samples_per_second": 43.714, "eval_steps_per_second": 5.469, "step": 2400 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 1.3902, "step": 2500 }, { "epoch": 1.82, "eval_loss": 1.3990552425384521, "eval_runtime": 143.4243, "eval_samples_per_second": 43.744, "eval_steps_per_second": 5.473, "step": 2500 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 1.3802, "step": 2600 }, { "epoch": 1.89, "eval_loss": 1.3864269256591797, "eval_runtime": 144.1474, "eval_samples_per_second": 43.525, "eval_steps_per_second": 5.446, "step": 2600 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 1.3683, "step": 2700 }, { "epoch": 1.97, "eval_loss": 1.3723174333572388, "eval_runtime": 143.4406, "eval_samples_per_second": 43.739, "eval_steps_per_second": 5.473, "step": 2700 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 1.34, "step": 2800 }, { "epoch": 2.04, "eval_loss": 1.3610557317733765, "eval_runtime": 143.5173, "eval_samples_per_second": 43.716, "eval_steps_per_second": 5.47, "step": 2800 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 1.3145, "step": 2900 }, { "epoch": 2.11, "eval_loss": 1.347936749458313, "eval_runtime": 143.5505, "eval_samples_per_second": 43.706, "eval_steps_per_second": 5.468, "step": 2900 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 1.3152, "step": 3000 }, { "epoch": 2.19, "eval_loss": 1.3374146223068237, "eval_runtime": 143.4898, "eval_samples_per_second": 43.724, "eval_steps_per_second": 5.471, "step": 3000 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 1.2956, "step": 3100 }, { "epoch": 2.26, "eval_loss": 1.325629472732544, "eval_runtime": 143.5978, "eval_samples_per_second": 43.691, "eval_steps_per_second": 5.467, "step": 3100 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 1.2991, "step": 3200 }, { "epoch": 2.33, "eval_loss": 1.3144720792770386, "eval_runtime": 143.5018, "eval_samples_per_second": 43.721, "eval_steps_per_second": 5.47, "step": 3200 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 1.2803, "step": 3300 }, { "epoch": 2.4, "eval_loss": 1.3016444444656372, "eval_runtime": 143.5185, "eval_samples_per_second": 43.716, "eval_steps_per_second": 5.47, "step": 3300 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 1.2618, "step": 3400 }, { "epoch": 2.48, "eval_loss": 1.2920358180999756, "eval_runtime": 143.6048, "eval_samples_per_second": 43.689, "eval_steps_per_second": 5.466, "step": 3400 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 1.2626, "step": 3500 }, { "epoch": 2.55, "eval_loss": 1.2805367708206177, "eval_runtime": 143.5377, "eval_samples_per_second": 43.71, "eval_steps_per_second": 5.469, "step": 3500 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 1.2507, "step": 3600 }, { "epoch": 2.62, "eval_loss": 1.267604947090149, "eval_runtime": 143.5375, "eval_samples_per_second": 43.71, "eval_steps_per_second": 5.469, "step": 3600 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 1.2342, "step": 3700 }, { "epoch": 2.7, "eval_loss": 1.2559330463409424, "eval_runtime": 143.5745, "eval_samples_per_second": 43.699, "eval_steps_per_second": 5.468, "step": 3700 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 1.2114, "step": 3800 }, { "epoch": 2.77, "eval_loss": 1.2479283809661865, "eval_runtime": 143.5861, "eval_samples_per_second": 43.695, "eval_steps_per_second": 5.467, "step": 3800 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 1.2207, "step": 3900 }, { "epoch": 2.84, "eval_loss": 1.234686255455017, "eval_runtime": 143.5291, "eval_samples_per_second": 43.712, "eval_steps_per_second": 5.469, "step": 3900 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 1.2032, "step": 4000 }, { "epoch": 2.91, "eval_loss": 1.2260726690292358, "eval_runtime": 143.5138, "eval_samples_per_second": 43.717, "eval_steps_per_second": 5.47, "step": 4000 }, { "epoch": 2.99, "learning_rate": 0.0002, "loss": 1.2012, "step": 4100 }, { "epoch": 2.99, "eval_loss": 1.2128502130508423, "eval_runtime": 143.5068, "eval_samples_per_second": 43.719, "eval_steps_per_second": 5.47, "step": 4100 } ], "logging_steps": 100, "max_steps": 4116, "num_train_epochs": 3, "save_steps": 100, "total_flos": 2.4438234279579648e+17, "trial_name": null, "trial_params": null }