|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 40080, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.875249500998003e-05, |
|
"loss": 7.8671, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.750499001996009e-05, |
|
"loss": 7.0941, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.625748502994012e-05, |
|
"loss": 6.7944, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.500998003992016e-05, |
|
"loss": 6.526, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 9.37624750499002e-05, |
|
"loss": 6.2583, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.251497005988024e-05, |
|
"loss": 6.0239, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 9.126746506986029e-05, |
|
"loss": 5.7969, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.001996007984033e-05, |
|
"loss": 5.5919, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.877245508982036e-05, |
|
"loss": 5.3796, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.752495009980041e-05, |
|
"loss": 5.1887, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.627744510978044e-05, |
|
"loss": 5.0026, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 8.502994011976048e-05, |
|
"loss": 4.843, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 8.378243512974053e-05, |
|
"loss": 4.6736, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 8.253493013972057e-05, |
|
"loss": 4.553, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 8.12874251497006e-05, |
|
"loss": 4.4224, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 8.003992015968065e-05, |
|
"loss": 4.3097, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 7.879241516966068e-05, |
|
"loss": 4.1975, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 7.754491017964072e-05, |
|
"loss": 4.0994, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 7.629740518962077e-05, |
|
"loss": 4.0258, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 7.50499001996008e-05, |
|
"loss": 3.9443, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 7.380239520958085e-05, |
|
"loss": 3.8616, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 7.255489021956088e-05, |
|
"loss": 3.8086, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 7.130738522954092e-05, |
|
"loss": 3.7464, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 7.005988023952096e-05, |
|
"loss": 3.6925, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 6.8812375249501e-05, |
|
"loss": 3.6216, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 6.756487025948103e-05, |
|
"loss": 3.585, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 6.631736526946109e-05, |
|
"loss": 3.5383, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 6.506986027944112e-05, |
|
"loss": 3.4925, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 6.382235528942116e-05, |
|
"loss": 3.4524, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 6.25748502994012e-05, |
|
"loss": 3.3985, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 6.132734530938125e-05, |
|
"loss": 3.3684, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 6.007984031936128e-05, |
|
"loss": 3.3273, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 5.8832335329341316e-05, |
|
"loss": 3.2898, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 5.758483033932136e-05, |
|
"loss": 3.2554, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 5.6337325349301393e-05, |
|
"loss": 3.2215, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 5.508982035928144e-05, |
|
"loss": 3.2006, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 5.384231536926148e-05, |
|
"loss": 3.1627, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 5.259481037924152e-05, |
|
"loss": 3.1391, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 5.1347305389221555e-05, |
|
"loss": 3.1167, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 5.0099800399201604e-05, |
|
"loss": 3.108, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 4.885229540918164e-05, |
|
"loss": 3.0782, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 4.7604790419161675e-05, |
|
"loss": 3.054, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 4.635728542914172e-05, |
|
"loss": 3.0302, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 4.510978043912176e-05, |
|
"loss": 3.0129, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 4.3862275449101795e-05, |
|
"loss": 2.9978, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 4.261477045908184e-05, |
|
"loss": 2.9658, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 4.136726546906188e-05, |
|
"loss": 2.9536, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 4.0119760479041915e-05, |
|
"loss": 2.937, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 3.887225548902196e-05, |
|
"loss": 2.9144, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 3.7624750499002e-05, |
|
"loss": 2.9045, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 3.637724550898204e-05, |
|
"loss": 2.8814, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 3.512974051896208e-05, |
|
"loss": 2.8749, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 3.388223552894212e-05, |
|
"loss": 2.8333, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 3.263473053892216e-05, |
|
"loss": 2.8402, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 3.13872255489022e-05, |
|
"loss": 2.8363, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 3.013972055888224e-05, |
|
"loss": 2.8235, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 2.8892215568862274e-05, |
|
"loss": 2.8038, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 2.7644710578842313e-05, |
|
"loss": 2.7948, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 2.6397205588822355e-05, |
|
"loss": 2.7762, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 2.5149700598802394e-05, |
|
"loss": 2.7711, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 2.3902195608782436e-05, |
|
"loss": 2.7616, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 2.2654690618762475e-05, |
|
"loss": 2.7325, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"learning_rate": 2.1407185628742514e-05, |
|
"loss": 2.7418, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 2.0159680638722556e-05, |
|
"loss": 2.7358, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 16.22, |
|
"learning_rate": 1.8912175648702595e-05, |
|
"loss": 2.7215, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 16.47, |
|
"learning_rate": 1.7664670658682637e-05, |
|
"loss": 2.7199, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 1.6417165668662676e-05, |
|
"loss": 2.7016, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 16.97, |
|
"learning_rate": 1.5169660678642716e-05, |
|
"loss": 2.6966, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 1.3922155688622754e-05, |
|
"loss": 2.6957, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 17.47, |
|
"learning_rate": 1.2674650698602794e-05, |
|
"loss": 2.7005, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 1.1427145708582835e-05, |
|
"loss": 2.6819, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 17.96, |
|
"learning_rate": 1.0179640718562875e-05, |
|
"loss": 2.6823, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"learning_rate": 8.932135728542916e-06, |
|
"loss": 2.6879, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"learning_rate": 7.684630738522954e-06, |
|
"loss": 2.6588, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 18.71, |
|
"learning_rate": 6.437125748502994e-06, |
|
"loss": 2.6521, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"learning_rate": 5.189620758483034e-06, |
|
"loss": 2.6667, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 19.21, |
|
"learning_rate": 3.942115768463074e-06, |
|
"loss": 2.6727, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 19.46, |
|
"learning_rate": 2.6946107784431138e-06, |
|
"loss": 2.6702, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"learning_rate": 1.4471057884231539e-06, |
|
"loss": 2.6484, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 19.96, |
|
"learning_rate": 1.996007984031936e-07, |
|
"loss": 2.6556, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 40080, |
|
"total_flos": 2362586579435520.0, |
|
"train_loss": 3.5848853368245197, |
|
"train_runtime": 32321.6149, |
|
"train_samples_per_second": 158.707, |
|
"train_steps_per_second": 1.24 |
|
} |
|
], |
|
"max_steps": 40080, |
|
"num_train_epochs": 20, |
|
"total_flos": 2362586579435520.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|