|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.952286282306163, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1e-05, |
|
"loss": 9.1779, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 3.894397258758545, |
|
"eval_runtime": 33.2081, |
|
"eval_samples_per_second": 11.774, |
|
"eval_steps_per_second": 1.686, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2e-05, |
|
"loss": 1.9391, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.7052955627441406, |
|
"eval_runtime": 33.2014, |
|
"eval_samples_per_second": 11.777, |
|
"eval_steps_per_second": 1.687, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4896, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.597261667251587, |
|
"eval_runtime": 33.2105, |
|
"eval_samples_per_second": 11.773, |
|
"eval_steps_per_second": 1.686, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4e-05, |
|
"loss": 1.4279, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.559727668762207, |
|
"eval_runtime": 33.1972, |
|
"eval_samples_per_second": 11.778, |
|
"eval_steps_per_second": 1.687, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3676, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 1.5377521514892578, |
|
"eval_runtime": 33.1973, |
|
"eval_samples_per_second": 11.778, |
|
"eval_steps_per_second": 1.687, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.965729952021933e-05, |
|
"loss": 1.3666, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 1.5202970504760742, |
|
"eval_runtime": 33.2, |
|
"eval_samples_per_second": 11.777, |
|
"eval_steps_per_second": 1.687, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.9314599040438655e-05, |
|
"loss": 1.3432, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 1.5091770887374878, |
|
"eval_runtime": 33.2069, |
|
"eval_samples_per_second": 11.775, |
|
"eval_steps_per_second": 1.686, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.8971898560657985e-05, |
|
"loss": 1.3226, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 1.5007234811782837, |
|
"eval_runtime": 33.2119, |
|
"eval_samples_per_second": 11.773, |
|
"eval_steps_per_second": 1.686, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.8629198080877314e-05, |
|
"loss": 1.3145, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 1.4917149543762207, |
|
"eval_runtime": 33.2096, |
|
"eval_samples_per_second": 11.774, |
|
"eval_steps_per_second": 1.686, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 4.8286497601096644e-05, |
|
"loss": 1.2897, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 1.487337350845337, |
|
"eval_runtime": 33.2063, |
|
"eval_samples_per_second": 11.775, |
|
"eval_steps_per_second": 1.686, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.794379712131597e-05, |
|
"loss": 1.2956, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.481351613998413, |
|
"eval_runtime": 33.2118, |
|
"eval_samples_per_second": 11.773, |
|
"eval_steps_per_second": 1.686, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.76010966415353e-05, |
|
"loss": 1.2791, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 1.4768741130828857, |
|
"eval_runtime": 33.2069, |
|
"eval_samples_per_second": 11.775, |
|
"eval_steps_per_second": 1.686, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.725839616175463e-05, |
|
"loss": 1.2679, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_loss": 1.4751813411712646, |
|
"eval_runtime": 33.1963, |
|
"eval_samples_per_second": 11.778, |
|
"eval_steps_per_second": 1.687, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.6915695681973956e-05, |
|
"loss": 1.2583, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_loss": 1.4700709581375122, |
|
"eval_runtime": 33.2157, |
|
"eval_samples_per_second": 11.772, |
|
"eval_steps_per_second": 1.686, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.6572995202193286e-05, |
|
"loss": 1.2813, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 1.4654401540756226, |
|
"eval_runtime": 33.205, |
|
"eval_samples_per_second": 11.775, |
|
"eval_steps_per_second": 1.686, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 4.6230294722412615e-05, |
|
"loss": 1.25, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_loss": 1.4636775255203247, |
|
"eval_runtime": 33.207, |
|
"eval_samples_per_second": 11.775, |
|
"eval_steps_per_second": 1.686, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 4.5887594242631945e-05, |
|
"loss": 1.2393, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_loss": 1.4608376026153564, |
|
"eval_runtime": 33.2094, |
|
"eval_samples_per_second": 11.774, |
|
"eval_steps_per_second": 1.686, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 4.554489376285127e-05, |
|
"loss": 1.2599, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"eval_loss": 1.4582923650741577, |
|
"eval_runtime": 33.2003, |
|
"eval_samples_per_second": 11.777, |
|
"eval_steps_per_second": 1.687, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 4.52021932830706e-05, |
|
"loss": 1.2336, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 1.455262303352356, |
|
"eval_runtime": 33.2047, |
|
"eval_samples_per_second": 11.775, |
|
"eval_steps_per_second": 1.687, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 4.485949280328993e-05, |
|
"loss": 1.2374, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_loss": 1.4536148309707642, |
|
"eval_runtime": 33.2036, |
|
"eval_samples_per_second": 11.776, |
|
"eval_steps_per_second": 1.687, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 4.451679232350926e-05, |
|
"loss": 1.2171, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_loss": 1.4514210224151611, |
|
"eval_runtime": 33.2199, |
|
"eval_samples_per_second": 11.77, |
|
"eval_steps_per_second": 1.686, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 4.417409184372858e-05, |
|
"loss": 1.2154, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_loss": 1.450243592262268, |
|
"eval_runtime": 33.2384, |
|
"eval_samples_per_second": 11.763, |
|
"eval_steps_per_second": 1.685, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 4.383139136394791e-05, |
|
"loss": 1.2247, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_loss": 1.4490052461624146, |
|
"eval_runtime": 33.2063, |
|
"eval_samples_per_second": 11.775, |
|
"eval_steps_per_second": 1.686, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 4.348869088416724e-05, |
|
"loss": 1.2234, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"eval_loss": 1.4472484588623047, |
|
"eval_runtime": 33.2181, |
|
"eval_samples_per_second": 11.771, |
|
"eval_steps_per_second": 1.686, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 4.314599040438657e-05, |
|
"loss": 1.2152, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"eval_loss": 1.4455540180206299, |
|
"eval_runtime": 33.205, |
|
"eval_samples_per_second": 11.775, |
|
"eval_steps_per_second": 1.686, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 4.280328992460589e-05, |
|
"loss": 1.1883, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"eval_loss": 1.4456433057785034, |
|
"eval_runtime": 33.2111, |
|
"eval_samples_per_second": 11.773, |
|
"eval_steps_per_second": 1.686, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 4.246058944482523e-05, |
|
"loss": 1.1947, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"eval_loss": 1.4448508024215698, |
|
"eval_runtime": 33.1978, |
|
"eval_samples_per_second": 11.778, |
|
"eval_steps_per_second": 1.687, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 4.211788896504456e-05, |
|
"loss": 1.2127, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_loss": 1.4433377981185913, |
|
"eval_runtime": 33.2104, |
|
"eval_samples_per_second": 11.773, |
|
"eval_steps_per_second": 1.686, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 4.177518848526388e-05, |
|
"loss": 1.1959, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"eval_loss": 1.4412949085235596, |
|
"eval_runtime": 33.2197, |
|
"eval_samples_per_second": 11.77, |
|
"eval_steps_per_second": 1.686, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 4.143248800548321e-05, |
|
"loss": 1.1913, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_loss": 1.4401354789733887, |
|
"eval_runtime": 33.21, |
|
"eval_samples_per_second": 11.774, |
|
"eval_steps_per_second": 1.686, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 4.108978752570254e-05, |
|
"loss": 1.1966, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"eval_loss": 1.4406956434249878, |
|
"eval_runtime": 33.2007, |
|
"eval_samples_per_second": 11.777, |
|
"eval_steps_per_second": 1.687, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 4.0747087045921863e-05, |
|
"loss": 1.1798, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"eval_loss": 1.4382696151733398, |
|
"eval_runtime": 33.2065, |
|
"eval_samples_per_second": 11.775, |
|
"eval_steps_per_second": 1.686, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 4.040438656614119e-05, |
|
"loss": 1.1736, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_loss": 1.4385173320770264, |
|
"eval_runtime": 33.2237, |
|
"eval_samples_per_second": 11.769, |
|
"eval_steps_per_second": 1.686, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 4.006168608636052e-05, |
|
"loss": 1.1661, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"eval_loss": 1.4370990991592407, |
|
"eval_runtime": 33.2081, |
|
"eval_samples_per_second": 11.774, |
|
"eval_steps_per_second": 1.686, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 3.971898560657985e-05, |
|
"loss": 1.1802, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_loss": 1.4352325201034546, |
|
"eval_runtime": 33.2067, |
|
"eval_samples_per_second": 11.775, |
|
"eval_steps_per_second": 1.686, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 3.9376285126799175e-05, |
|
"loss": 1.1649, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"eval_loss": 1.436591386795044, |
|
"eval_runtime": 33.2379, |
|
"eval_samples_per_second": 11.764, |
|
"eval_steps_per_second": 1.685, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 3.9033584647018505e-05, |
|
"loss": 1.1654, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"eval_loss": 1.4353548288345337, |
|
"eval_runtime": 33.2162, |
|
"eval_samples_per_second": 11.771, |
|
"eval_steps_per_second": 1.686, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 3.8690884167237835e-05, |
|
"loss": 1.1604, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 1.4346917867660522, |
|
"eval_runtime": 33.2322, |
|
"eval_samples_per_second": 11.766, |
|
"eval_steps_per_second": 1.685, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 3.8348183687457165e-05, |
|
"loss": 1.1555, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"eval_loss": 1.4331263303756714, |
|
"eval_runtime": 33.2124, |
|
"eval_samples_per_second": 11.773, |
|
"eval_steps_per_second": 1.686, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 3.800548320767649e-05, |
|
"loss": 1.1617, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_loss": 1.4325422048568726, |
|
"eval_runtime": 33.2006, |
|
"eval_samples_per_second": 11.777, |
|
"eval_steps_per_second": 1.687, |
|
"step": 4000 |
|
} |
|
], |
|
"max_steps": 15090, |
|
"num_train_epochs": 30, |
|
"total_flos": 8.075550264813158e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|