|
{ |
|
"best_metric": 0.9293885827064514, |
|
"best_model_checkpoint": "C:\\Users\\User\\Downloads\\cache2\\checkpoint-7320000", |
|
"epoch": 49.96556427327113, |
|
"global_step": 7400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5.39938690902216e-07, |
|
"loss": 1.0744, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.0361614227294922, |
|
"eval_runtime": 731.8142, |
|
"eval_samples_per_second": 42.359, |
|
"eval_steps_per_second": 7.061, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.0799178944241131e-06, |
|
"loss": 1.0786, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.0365289449691772, |
|
"eval_runtime": 731.8318, |
|
"eval_samples_per_second": 42.358, |
|
"eval_steps_per_second": 7.06, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.61988359373945e-06, |
|
"loss": 1.0767, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 1.0318435430526733, |
|
"eval_runtime": 731.7232, |
|
"eval_samples_per_second": 42.364, |
|
"eval_steps_per_second": 7.061, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.1598357888482263e-06, |
|
"loss": 1.0737, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 1.031322717666626, |
|
"eval_runtime": 731.8002, |
|
"eval_samples_per_second": 42.36, |
|
"eval_steps_per_second": 7.061, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.699828496576684e-06, |
|
"loss": 1.0708, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 1.0267112255096436, |
|
"eval_runtime": 738.3448, |
|
"eval_samples_per_second": 41.984, |
|
"eval_steps_per_second": 6.998, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.239807700098581e-06, |
|
"loss": 1.0707, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 1.0309606790542603, |
|
"eval_runtime": 731.7252, |
|
"eval_samples_per_second": 42.364, |
|
"eval_steps_per_second": 7.061, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.779773399413918e-06, |
|
"loss": 1.0699, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.0270657539367676, |
|
"eval_runtime": 731.6452, |
|
"eval_samples_per_second": 42.369, |
|
"eval_steps_per_second": 7.062, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.319739098729255e-06, |
|
"loss": 1.0683, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 1.0262391567230225, |
|
"eval_runtime": 735.9312, |
|
"eval_samples_per_second": 42.122, |
|
"eval_steps_per_second": 7.021, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.859718302251152e-06, |
|
"loss": 1.0686, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 1.0267152786254883, |
|
"eval_runtime": 731.9313, |
|
"eval_samples_per_second": 42.352, |
|
"eval_steps_per_second": 7.059, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.3997110099796094e-06, |
|
"loss": 1.0693, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.0279877185821533, |
|
"eval_runtime": 731.9449, |
|
"eval_samples_per_second": 42.352, |
|
"eval_steps_per_second": 7.059, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.939690213501507e-06, |
|
"loss": 1.0714, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_loss": 1.030158519744873, |
|
"eval_runtime": 738.5138, |
|
"eval_samples_per_second": 41.975, |
|
"eval_steps_per_second": 6.996, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 6.479655912816843e-06, |
|
"loss": 1.07, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"eval_loss": 1.0300403833389282, |
|
"eval_runtime": 738.4095, |
|
"eval_samples_per_second": 41.981, |
|
"eval_steps_per_second": 6.997, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 7.019648620545301e-06, |
|
"loss": 1.0719, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_loss": 1.0280787944793701, |
|
"eval_runtime": 738.1732, |
|
"eval_samples_per_second": 41.994, |
|
"eval_steps_per_second": 7.0, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 7.5596143198606375e-06, |
|
"loss": 1.0735, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 1.0311589241027832, |
|
"eval_runtime": 741.9081, |
|
"eval_samples_per_second": 41.783, |
|
"eval_steps_per_second": 6.964, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 8.099607027589095e-06, |
|
"loss": 1.0781, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_loss": 1.035496473312378, |
|
"eval_runtime": 733.6394, |
|
"eval_samples_per_second": 42.254, |
|
"eval_steps_per_second": 7.043, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 8.639559222697871e-06, |
|
"loss": 1.0768, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"eval_loss": 1.0344969034194946, |
|
"eval_runtime": 733.6669, |
|
"eval_samples_per_second": 42.252, |
|
"eval_steps_per_second": 7.043, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 9.179538426219769e-06, |
|
"loss": 1.0797, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"eval_loss": 1.0367223024368286, |
|
"eval_runtime": 733.6495, |
|
"eval_samples_per_second": 42.253, |
|
"eval_steps_per_second": 7.043, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 9.719517629741665e-06, |
|
"loss": 1.0834, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"eval_loss": 1.0388565063476562, |
|
"eval_runtime": 733.7938, |
|
"eval_samples_per_second": 42.245, |
|
"eval_steps_per_second": 7.041, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 9.971168518993666e-06, |
|
"loss": 1.0833, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_loss": 1.0380399227142334, |
|
"eval_runtime": 733.6877, |
|
"eval_samples_per_second": 42.251, |
|
"eval_steps_per_second": 7.043, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 9.911172330180852e-06, |
|
"loss": 1.0834, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"eval_loss": 1.0390028953552246, |
|
"eval_runtime": 733.5168, |
|
"eval_samples_per_second": 42.261, |
|
"eval_steps_per_second": 7.044, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 9.851176141368038e-06, |
|
"loss": 1.0842, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"eval_loss": 1.03898286819458, |
|
"eval_runtime": 733.1528, |
|
"eval_samples_per_second": 42.282, |
|
"eval_steps_per_second": 7.048, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 9.791179952555222e-06, |
|
"loss": 1.0854, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_loss": 1.0376673936843872, |
|
"eval_runtime": 733.2924, |
|
"eval_samples_per_second": 42.274, |
|
"eval_steps_per_second": 7.046, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 9.73117926234022e-06, |
|
"loss": 1.0797, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"eval_loss": 1.0366719961166382, |
|
"eval_runtime": 733.1418, |
|
"eval_samples_per_second": 42.282, |
|
"eval_steps_per_second": 7.048, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 9.671183073527405e-06, |
|
"loss": 1.0788, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"eval_loss": 1.0325465202331543, |
|
"eval_runtime": 733.2281, |
|
"eval_samples_per_second": 42.277, |
|
"eval_steps_per_second": 7.047, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 9.611185384247194e-06, |
|
"loss": 1.0798, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"eval_loss": 1.0343610048294067, |
|
"eval_runtime": 733.2347, |
|
"eval_samples_per_second": 42.277, |
|
"eval_steps_per_second": 7.047, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 9.551186194499587e-06, |
|
"loss": 1.0784, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"eval_loss": 1.0340007543563843, |
|
"eval_runtime": 733.1036, |
|
"eval_samples_per_second": 42.285, |
|
"eval_steps_per_second": 7.048, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 9.491185504284586e-06, |
|
"loss": 1.0723, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"eval_loss": 1.0315660238265991, |
|
"eval_runtime": 733.0253, |
|
"eval_samples_per_second": 42.289, |
|
"eval_steps_per_second": 7.049, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 9.43118931547177e-06, |
|
"loss": 1.0732, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"eval_loss": 1.0322542190551758, |
|
"eval_runtime": 733.0537, |
|
"eval_samples_per_second": 42.287, |
|
"eval_steps_per_second": 7.049, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 9.371193126658954e-06, |
|
"loss": 1.0744, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"eval_loss": 1.029881477355957, |
|
"eval_runtime": 733.0163, |
|
"eval_samples_per_second": 42.29, |
|
"eval_steps_per_second": 7.049, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 9.31119693784614e-06, |
|
"loss": 1.0707, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_loss": 1.029407262802124, |
|
"eval_runtime": 733.0808, |
|
"eval_samples_per_second": 42.286, |
|
"eval_steps_per_second": 7.048, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 9.25120224950072e-06, |
|
"loss": 1.0694, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"eval_loss": 1.0290433168411255, |
|
"eval_runtime": 732.9639, |
|
"eval_samples_per_second": 42.293, |
|
"eval_steps_per_second": 7.049, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 9.191201559285718e-06, |
|
"loss": 1.0691, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"eval_loss": 1.028395414352417, |
|
"eval_runtime": 732.9086, |
|
"eval_samples_per_second": 42.296, |
|
"eval_steps_per_second": 7.05, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 9.131205370472902e-06, |
|
"loss": 1.0699, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"eval_loss": 1.0277873277664185, |
|
"eval_runtime": 732.8691, |
|
"eval_samples_per_second": 42.298, |
|
"eval_steps_per_second": 7.05, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 9.071209181660088e-06, |
|
"loss": 1.0643, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"eval_loss": 1.0250962972640991, |
|
"eval_runtime": 731.2424, |
|
"eval_samples_per_second": 42.392, |
|
"eval_steps_per_second": 7.066, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 9.011212992847273e-06, |
|
"loss": 1.0642, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"eval_loss": 1.0227874517440796, |
|
"eval_runtime": 731.2164, |
|
"eval_samples_per_second": 42.394, |
|
"eval_steps_per_second": 7.066, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 8.951216804034457e-06, |
|
"loss": 1.065, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"eval_loss": 1.0216150283813477, |
|
"eval_runtime": 730.9956, |
|
"eval_samples_per_second": 42.407, |
|
"eval_steps_per_second": 7.068, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 8.891219114754247e-06, |
|
"loss": 1.0647, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_loss": 1.0240837335586548, |
|
"eval_runtime": 731.0731, |
|
"eval_samples_per_second": 42.402, |
|
"eval_steps_per_second": 7.068, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 8.831224426408827e-06, |
|
"loss": 1.0587, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"eval_loss": 1.0212384462356567, |
|
"eval_runtime": 733.0687, |
|
"eval_samples_per_second": 42.287, |
|
"eval_steps_per_second": 7.048, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 8.771226737128616e-06, |
|
"loss": 1.0611, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"eval_loss": 1.0210351943969727, |
|
"eval_runtime": 732.9747, |
|
"eval_samples_per_second": 42.292, |
|
"eval_steps_per_second": 7.049, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 8.711230548315802e-06, |
|
"loss": 1.06, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"eval_loss": 1.0200951099395752, |
|
"eval_runtime": 732.955, |
|
"eval_samples_per_second": 42.293, |
|
"eval_steps_per_second": 7.05, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 8.651234359502986e-06, |
|
"loss": 1.058, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"eval_loss": 1.018425703048706, |
|
"eval_runtime": 732.944, |
|
"eval_samples_per_second": 42.294, |
|
"eval_steps_per_second": 7.05, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"learning_rate": 8.591233669287984e-06, |
|
"loss": 1.0543, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"eval_loss": 1.0207031965255737, |
|
"eval_runtime": 733.1758, |
|
"eval_samples_per_second": 42.28, |
|
"eval_steps_per_second": 7.047, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 8.531237480475169e-06, |
|
"loss": 1.057, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"eval_loss": 1.0180833339691162, |
|
"eval_runtime": 733.0248, |
|
"eval_samples_per_second": 42.289, |
|
"eval_steps_per_second": 7.049, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 8.471236790260167e-06, |
|
"loss": 1.0546, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"eval_loss": 1.0137962102890015, |
|
"eval_runtime": 733.3395, |
|
"eval_samples_per_second": 42.271, |
|
"eval_steps_per_second": 7.046, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 8.411240601447351e-06, |
|
"loss": 1.0519, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"eval_loss": 1.0171297788619995, |
|
"eval_runtime": 731.3327, |
|
"eval_samples_per_second": 42.387, |
|
"eval_steps_per_second": 7.065, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"learning_rate": 8.351245913101933e-06, |
|
"loss": 1.0505, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"eval_loss": 1.0128827095031738, |
|
"eval_runtime": 730.8675, |
|
"eval_samples_per_second": 42.414, |
|
"eval_steps_per_second": 7.07, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"learning_rate": 8.291246723354326e-06, |
|
"loss": 1.0513, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"eval_loss": 1.0132421255111694, |
|
"eval_runtime": 730.8331, |
|
"eval_samples_per_second": 42.416, |
|
"eval_steps_per_second": 7.07, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 8.23125053454151e-06, |
|
"loss": 1.0514, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"eval_loss": 1.0138520002365112, |
|
"eval_runtime": 731.3619, |
|
"eval_samples_per_second": 42.385, |
|
"eval_steps_per_second": 7.065, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"learning_rate": 8.1712528452613e-06, |
|
"loss": 1.0473, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"eval_loss": 1.0119198560714722, |
|
"eval_runtime": 731.3481, |
|
"eval_samples_per_second": 42.386, |
|
"eval_steps_per_second": 7.065, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 8.111255155981088e-06, |
|
"loss": 1.0461, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"eval_loss": 1.0108563899993896, |
|
"eval_runtime": 731.5139, |
|
"eval_samples_per_second": 42.377, |
|
"eval_steps_per_second": 7.063, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 8.051255966233482e-06, |
|
"loss": 1.0496, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"eval_loss": 1.007481336593628, |
|
"eval_runtime": 731.5836, |
|
"eval_samples_per_second": 42.372, |
|
"eval_steps_per_second": 7.063, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 7.991261277888063e-06, |
|
"loss": 1.047, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"eval_loss": 1.0081983804702759, |
|
"eval_runtime": 735.3379, |
|
"eval_samples_per_second": 42.156, |
|
"eval_steps_per_second": 7.027, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 7.931263588607852e-06, |
|
"loss": 1.0435, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"eval_loss": 1.0105235576629639, |
|
"eval_runtime": 731.6633, |
|
"eval_samples_per_second": 42.368, |
|
"eval_steps_per_second": 7.062, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"learning_rate": 7.871264398860246e-06, |
|
"loss": 1.0422, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"eval_loss": 1.009293556213379, |
|
"eval_runtime": 738.141, |
|
"eval_samples_per_second": 41.996, |
|
"eval_steps_per_second": 7.0, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 7.81126821004743e-06, |
|
"loss": 1.0438, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"eval_loss": 1.0082749128341675, |
|
"eval_runtime": 731.6793, |
|
"eval_samples_per_second": 42.367, |
|
"eval_steps_per_second": 7.062, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 7.751269020299824e-06, |
|
"loss": 1.0426, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"eval_loss": 1.0077896118164062, |
|
"eval_runtime": 731.8248, |
|
"eval_samples_per_second": 42.358, |
|
"eval_steps_per_second": 7.06, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 7.691274331954405e-06, |
|
"loss": 1.0402, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"eval_loss": 1.0057011842727661, |
|
"eval_runtime": 731.8433, |
|
"eval_samples_per_second": 42.357, |
|
"eval_steps_per_second": 7.06, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"learning_rate": 7.631275142206797e-06, |
|
"loss": 1.0403, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"eval_loss": 1.003925085067749, |
|
"eval_runtime": 732.0244, |
|
"eval_samples_per_second": 42.347, |
|
"eval_steps_per_second": 7.059, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 7.571277452926588e-06, |
|
"loss": 1.0398, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"eval_loss": 1.003111720085144, |
|
"eval_runtime": 732.0283, |
|
"eval_samples_per_second": 42.347, |
|
"eval_steps_per_second": 7.058, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 7.511281264113772e-06, |
|
"loss": 1.0368, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"eval_loss": 1.003316879272461, |
|
"eval_runtime": 731.9328, |
|
"eval_samples_per_second": 42.352, |
|
"eval_steps_per_second": 7.059, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 16.48, |
|
"learning_rate": 7.451283574833561e-06, |
|
"loss": 1.036, |
|
"step": 2440000 |
|
}, |
|
{ |
|
"epoch": 16.48, |
|
"eval_loss": 1.0017095804214478, |
|
"eval_runtime": 733.4383, |
|
"eval_samples_per_second": 42.265, |
|
"eval_steps_per_second": 7.045, |
|
"step": 2440000 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"learning_rate": 7.391287386020746e-06, |
|
"loss": 1.0369, |
|
"step": 2480000 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"eval_loss": 0.9995156526565552, |
|
"eval_runtime": 733.6542, |
|
"eval_samples_per_second": 42.253, |
|
"eval_steps_per_second": 7.043, |
|
"step": 2480000 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 7.33129119720793e-06, |
|
"loss": 1.0383, |
|
"step": 2520000 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"eval_loss": 1.0012413263320923, |
|
"eval_runtime": 733.4897, |
|
"eval_samples_per_second": 42.262, |
|
"eval_steps_per_second": 7.044, |
|
"step": 2520000 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"learning_rate": 7.2712965088625106e-06, |
|
"loss": 1.0312, |
|
"step": 2560000 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"eval_loss": 0.9984557032585144, |
|
"eval_runtime": 733.8, |
|
"eval_samples_per_second": 42.244, |
|
"eval_steps_per_second": 7.041, |
|
"step": 2560000 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"learning_rate": 7.211298819582301e-06, |
|
"loss": 1.0344, |
|
"step": 2600000 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"eval_loss": 1.0010262727737427, |
|
"eval_runtime": 733.4795, |
|
"eval_samples_per_second": 42.263, |
|
"eval_steps_per_second": 7.045, |
|
"step": 2600000 |
|
}, |
|
{ |
|
"epoch": 17.83, |
|
"learning_rate": 7.151301130302089e-06, |
|
"loss": 1.0333, |
|
"step": 2640000 |
|
}, |
|
{ |
|
"epoch": 17.83, |
|
"eval_loss": 0.995519757270813, |
|
"eval_runtime": 733.5669, |
|
"eval_samples_per_second": 42.258, |
|
"eval_steps_per_second": 7.044, |
|
"step": 2640000 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"learning_rate": 7.091303441021878e-06, |
|
"loss": 1.0333, |
|
"step": 2680000 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"eval_loss": 0.9943256974220276, |
|
"eval_runtime": 733.6185, |
|
"eval_samples_per_second": 42.255, |
|
"eval_steps_per_second": 7.043, |
|
"step": 2680000 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"learning_rate": 7.031307252209064e-06, |
|
"loss": 1.0302, |
|
"step": 2720000 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"eval_loss": 0.993632972240448, |
|
"eval_runtime": 731.903, |
|
"eval_samples_per_second": 42.354, |
|
"eval_steps_per_second": 7.06, |
|
"step": 2720000 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 6.971312563863644e-06, |
|
"loss": 1.0301, |
|
"step": 2760000 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"eval_loss": 0.9943361282348633, |
|
"eval_runtime": 731.9076, |
|
"eval_samples_per_second": 42.354, |
|
"eval_steps_per_second": 7.06, |
|
"step": 2760000 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 6.911311873648642e-06, |
|
"loss": 1.0312, |
|
"step": 2800000 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"eval_loss": 0.9974587559700012, |
|
"eval_runtime": 731.8693, |
|
"eval_samples_per_second": 42.356, |
|
"eval_steps_per_second": 7.06, |
|
"step": 2800000 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"learning_rate": 6.8513171853032225e-06, |
|
"loss": 1.0265, |
|
"step": 2840000 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"eval_loss": 0.9932205677032471, |
|
"eval_runtime": 731.8147, |
|
"eval_samples_per_second": 42.359, |
|
"eval_steps_per_second": 7.061, |
|
"step": 2840000 |
|
}, |
|
{ |
|
"epoch": 19.45, |
|
"learning_rate": 6.791320996490407e-06, |
|
"loss": 1.0271, |
|
"step": 2880000 |
|
}, |
|
{ |
|
"epoch": 19.45, |
|
"eval_loss": 0.9906313419342041, |
|
"eval_runtime": 732.268, |
|
"eval_samples_per_second": 42.333, |
|
"eval_steps_per_second": 7.056, |
|
"step": 2880000 |
|
}, |
|
{ |
|
"epoch": 19.72, |
|
"learning_rate": 6.7313233072101965e-06, |
|
"loss": 1.0275, |
|
"step": 2920000 |
|
}, |
|
{ |
|
"epoch": 19.72, |
|
"eval_loss": 0.9913482666015625, |
|
"eval_runtime": 731.9864, |
|
"eval_samples_per_second": 42.349, |
|
"eval_steps_per_second": 7.059, |
|
"step": 2920000 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"learning_rate": 6.6713271183973816e-06, |
|
"loss": 1.0267, |
|
"step": 2960000 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"eval_loss": 0.9909727573394775, |
|
"eval_runtime": 731.9562, |
|
"eval_samples_per_second": 42.351, |
|
"eval_steps_per_second": 7.059, |
|
"step": 2960000 |
|
}, |
|
{ |
|
"epoch": 20.26, |
|
"learning_rate": 6.611332430051962e-06, |
|
"loss": 1.0238, |
|
"step": 3000000 |
|
}, |
|
{ |
|
"epoch": 20.26, |
|
"eval_loss": 0.9900842308998108, |
|
"eval_runtime": 732.034, |
|
"eval_samples_per_second": 42.346, |
|
"eval_steps_per_second": 7.058, |
|
"step": 3000000 |
|
}, |
|
{ |
|
"epoch": 20.53, |
|
"learning_rate": 6.551330239369564e-06, |
|
"loss": 1.0236, |
|
"step": 3040000 |
|
}, |
|
{ |
|
"epoch": 20.53, |
|
"eval_loss": 0.9898263812065125, |
|
"eval_runtime": 731.9968, |
|
"eval_samples_per_second": 42.349, |
|
"eval_steps_per_second": 7.059, |
|
"step": 3040000 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 6.491334050556749e-06, |
|
"loss": 1.0237, |
|
"step": 3080000 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"eval_loss": 0.991007387638092, |
|
"eval_runtime": 731.9959, |
|
"eval_samples_per_second": 42.349, |
|
"eval_steps_per_second": 7.059, |
|
"step": 3080000 |
|
}, |
|
{ |
|
"epoch": 21.07, |
|
"learning_rate": 6.4313378617439335e-06, |
|
"loss": 1.0238, |
|
"step": 3120000 |
|
}, |
|
{ |
|
"epoch": 21.07, |
|
"eval_loss": 0.9906073808670044, |
|
"eval_runtime": 731.8574, |
|
"eval_samples_per_second": 42.357, |
|
"eval_steps_per_second": 7.06, |
|
"step": 3120000 |
|
}, |
|
{ |
|
"epoch": 21.34, |
|
"learning_rate": 6.3713401724637225e-06, |
|
"loss": 1.0193, |
|
"step": 3160000 |
|
}, |
|
{ |
|
"epoch": 21.34, |
|
"eval_loss": 0.9887716770172119, |
|
"eval_runtime": 731.675, |
|
"eval_samples_per_second": 42.367, |
|
"eval_steps_per_second": 7.062, |
|
"step": 3160000 |
|
}, |
|
{ |
|
"epoch": 21.61, |
|
"learning_rate": 6.311342483183512e-06, |
|
"loss": 1.0197, |
|
"step": 3200000 |
|
}, |
|
{ |
|
"epoch": 21.61, |
|
"eval_loss": 0.9868865013122559, |
|
"eval_runtime": 741.4509, |
|
"eval_samples_per_second": 41.809, |
|
"eval_steps_per_second": 6.969, |
|
"step": 3200000 |
|
}, |
|
{ |
|
"epoch": 21.88, |
|
"learning_rate": 6.251343293435905e-06, |
|
"loss": 1.0211, |
|
"step": 3240000 |
|
}, |
|
{ |
|
"epoch": 21.88, |
|
"eval_loss": 0.9857591986656189, |
|
"eval_runtime": 731.6308, |
|
"eval_samples_per_second": 42.37, |
|
"eval_steps_per_second": 7.062, |
|
"step": 3240000 |
|
}, |
|
{ |
|
"epoch": 22.15, |
|
"learning_rate": 6.191347104623091e-06, |
|
"loss": 1.0186, |
|
"step": 3280000 |
|
}, |
|
{ |
|
"epoch": 22.15, |
|
"eval_loss": 0.9871023893356323, |
|
"eval_runtime": 731.6768, |
|
"eval_samples_per_second": 42.367, |
|
"eval_steps_per_second": 7.062, |
|
"step": 3280000 |
|
}, |
|
{ |
|
"epoch": 22.42, |
|
"learning_rate": 6.131346414408088e-06, |
|
"loss": 1.0155, |
|
"step": 3320000 |
|
}, |
|
{ |
|
"epoch": 22.42, |
|
"eval_loss": 0.9836686849594116, |
|
"eval_runtime": 731.7409, |
|
"eval_samples_per_second": 42.363, |
|
"eval_steps_per_second": 7.061, |
|
"step": 3320000 |
|
}, |
|
{ |
|
"epoch": 22.69, |
|
"learning_rate": 6.071350225595274e-06, |
|
"loss": 1.0179, |
|
"step": 3360000 |
|
}, |
|
{ |
|
"epoch": 22.69, |
|
"eval_loss": 0.9840074777603149, |
|
"eval_runtime": 731.7805, |
|
"eval_samples_per_second": 42.361, |
|
"eval_steps_per_second": 7.061, |
|
"step": 3360000 |
|
}, |
|
{ |
|
"epoch": 22.96, |
|
"learning_rate": 6.011354036782459e-06, |
|
"loss": 1.0193, |
|
"step": 3400000 |
|
}, |
|
{ |
|
"epoch": 22.96, |
|
"eval_loss": 0.9879838824272156, |
|
"eval_runtime": 731.7957, |
|
"eval_samples_per_second": 42.36, |
|
"eval_steps_per_second": 7.061, |
|
"step": 3400000 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"learning_rate": 5.951354847034852e-06, |
|
"loss": 1.0132, |
|
"step": 3440000 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"eval_loss": 0.9860868453979492, |
|
"eval_runtime": 731.3704, |
|
"eval_samples_per_second": 42.385, |
|
"eval_steps_per_second": 7.065, |
|
"step": 3440000 |
|
}, |
|
{ |
|
"epoch": 23.5, |
|
"learning_rate": 5.8913571577546415e-06, |
|
"loss": 1.0131, |
|
"step": 3480000 |
|
}, |
|
{ |
|
"epoch": 23.5, |
|
"eval_loss": 0.980789840221405, |
|
"eval_runtime": 731.4402, |
|
"eval_samples_per_second": 42.381, |
|
"eval_steps_per_second": 7.064, |
|
"step": 3480000 |
|
}, |
|
{ |
|
"epoch": 23.77, |
|
"learning_rate": 5.8313594684744305e-06, |
|
"loss": 1.0122, |
|
"step": 3520000 |
|
}, |
|
{ |
|
"epoch": 23.77, |
|
"eval_loss": 0.97965407371521, |
|
"eval_runtime": 731.5077, |
|
"eval_samples_per_second": 42.377, |
|
"eval_steps_per_second": 7.063, |
|
"step": 3520000 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 5.771361779194219e-06, |
|
"loss": 1.0157, |
|
"step": 3560000 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"eval_loss": 0.9817570447921753, |
|
"eval_runtime": 731.419, |
|
"eval_samples_per_second": 42.382, |
|
"eval_steps_per_second": 7.064, |
|
"step": 3560000 |
|
}, |
|
{ |
|
"epoch": 24.31, |
|
"learning_rate": 5.711364089914008e-06, |
|
"loss": 1.0106, |
|
"step": 3600000 |
|
}, |
|
{ |
|
"epoch": 24.31, |
|
"eval_loss": 0.9800501465797424, |
|
"eval_runtime": 731.5621, |
|
"eval_samples_per_second": 42.374, |
|
"eval_steps_per_second": 7.063, |
|
"step": 3600000 |
|
}, |
|
{ |
|
"epoch": 24.58, |
|
"learning_rate": 5.65136940156859e-06, |
|
"loss": 1.011, |
|
"step": 3640000 |
|
}, |
|
{ |
|
"epoch": 24.58, |
|
"eval_loss": 0.9801167249679565, |
|
"eval_runtime": 731.533, |
|
"eval_samples_per_second": 42.375, |
|
"eval_steps_per_second": 7.063, |
|
"step": 3640000 |
|
}, |
|
{ |
|
"epoch": 24.85, |
|
"learning_rate": 5.591368711353587e-06, |
|
"loss": 1.0121, |
|
"step": 3680000 |
|
}, |
|
{ |
|
"epoch": 24.85, |
|
"eval_loss": 0.9798985719680786, |
|
"eval_runtime": 731.4872, |
|
"eval_samples_per_second": 42.378, |
|
"eval_steps_per_second": 7.064, |
|
"step": 3680000 |
|
}, |
|
{ |
|
"epoch": 25.12, |
|
"learning_rate": 5.531371022073376e-06, |
|
"loss": 1.0094, |
|
"step": 3720000 |
|
}, |
|
{ |
|
"epoch": 25.12, |
|
"eval_loss": 0.9805187582969666, |
|
"eval_runtime": 731.6262, |
|
"eval_samples_per_second": 42.37, |
|
"eval_steps_per_second": 7.062, |
|
"step": 3720000 |
|
}, |
|
{ |
|
"epoch": 25.39, |
|
"learning_rate": 5.471374833260561e-06, |
|
"loss": 1.0127, |
|
"step": 3760000 |
|
}, |
|
{ |
|
"epoch": 25.39, |
|
"eval_loss": 0.9804235696792603, |
|
"eval_runtime": 731.7948, |
|
"eval_samples_per_second": 42.36, |
|
"eval_steps_per_second": 7.061, |
|
"step": 3760000 |
|
}, |
|
{ |
|
"epoch": 25.66, |
|
"learning_rate": 5.411377143980351e-06, |
|
"loss": 1.0087, |
|
"step": 3800000 |
|
}, |
|
{ |
|
"epoch": 25.66, |
|
"eval_loss": 0.9776991009712219, |
|
"eval_runtime": 731.6622, |
|
"eval_samples_per_second": 42.368, |
|
"eval_steps_per_second": 7.062, |
|
"step": 3800000 |
|
}, |
|
{ |
|
"epoch": 25.93, |
|
"learning_rate": 5.351380955167535e-06, |
|
"loss": 1.009, |
|
"step": 3840000 |
|
}, |
|
{ |
|
"epoch": 25.93, |
|
"eval_loss": 0.9773043990135193, |
|
"eval_runtime": 731.5756, |
|
"eval_samples_per_second": 42.373, |
|
"eval_steps_per_second": 7.063, |
|
"step": 3840000 |
|
}, |
|
{ |
|
"epoch": 26.2, |
|
"learning_rate": 5.291383265887324e-06, |
|
"loss": 1.005, |
|
"step": 3880000 |
|
}, |
|
{ |
|
"epoch": 26.2, |
|
"eval_loss": 0.9772471785545349, |
|
"eval_runtime": 731.6739, |
|
"eval_samples_per_second": 42.367, |
|
"eval_steps_per_second": 7.062, |
|
"step": 3880000 |
|
}, |
|
{ |
|
"epoch": 26.47, |
|
"learning_rate": 5.2313885775419045e-06, |
|
"loss": 1.0049, |
|
"step": 3920000 |
|
}, |
|
{ |
|
"epoch": 26.47, |
|
"eval_loss": 0.9808975458145142, |
|
"eval_runtime": 731.4892, |
|
"eval_samples_per_second": 42.378, |
|
"eval_steps_per_second": 7.064, |
|
"step": 3920000 |
|
}, |
|
{ |
|
"epoch": 26.74, |
|
"learning_rate": 5.171389387794299e-06, |
|
"loss": 1.0059, |
|
"step": 3960000 |
|
}, |
|
{ |
|
"epoch": 26.74, |
|
"eval_loss": 0.9739375710487366, |
|
"eval_runtime": 731.4074, |
|
"eval_samples_per_second": 42.383, |
|
"eval_steps_per_second": 7.064, |
|
"step": 3960000 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 5.111393198981483e-06, |
|
"loss": 1.0052, |
|
"step": 4000000 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"eval_loss": 0.9740833640098572, |
|
"eval_runtime": 730.8502, |
|
"eval_samples_per_second": 42.415, |
|
"eval_steps_per_second": 7.07, |
|
"step": 4000000 |
|
}, |
|
{ |
|
"epoch": 27.28, |
|
"learning_rate": 5.051397010168668e-06, |
|
"loss": 1.0011, |
|
"step": 4040000 |
|
}, |
|
{ |
|
"epoch": 27.28, |
|
"eval_loss": 0.9713684916496277, |
|
"eval_runtime": 730.9391, |
|
"eval_samples_per_second": 42.41, |
|
"eval_steps_per_second": 7.069, |
|
"step": 4040000 |
|
}, |
|
{ |
|
"epoch": 27.55, |
|
"learning_rate": 4.991400821355853e-06, |
|
"loss": 1.0007, |
|
"step": 4080000 |
|
}, |
|
{ |
|
"epoch": 27.55, |
|
"eval_loss": 0.9728115200996399, |
|
"eval_runtime": 730.9125, |
|
"eval_samples_per_second": 42.411, |
|
"eval_steps_per_second": 7.069, |
|
"step": 4080000 |
|
}, |
|
{ |
|
"epoch": 27.82, |
|
"learning_rate": 4.931401631608246e-06, |
|
"loss": 1.0029, |
|
"step": 4120000 |
|
}, |
|
{ |
|
"epoch": 27.82, |
|
"eval_loss": 0.9748100638389587, |
|
"eval_runtime": 730.8598, |
|
"eval_samples_per_second": 42.414, |
|
"eval_steps_per_second": 7.07, |
|
"step": 4120000 |
|
}, |
|
{ |
|
"epoch": 28.09, |
|
"learning_rate": 4.871408443730222e-06, |
|
"loss": 1.0009, |
|
"step": 4160000 |
|
}, |
|
{ |
|
"epoch": 28.09, |
|
"eval_loss": 0.969658374786377, |
|
"eval_runtime": 730.837, |
|
"eval_samples_per_second": 42.416, |
|
"eval_steps_per_second": 7.07, |
|
"step": 4160000 |
|
}, |
|
{ |
|
"epoch": 28.36, |
|
"learning_rate": 4.811412254917407e-06, |
|
"loss": 0.9986, |
|
"step": 4200000 |
|
}, |
|
{ |
|
"epoch": 28.36, |
|
"eval_loss": 0.9716975092887878, |
|
"eval_runtime": 730.9826, |
|
"eval_samples_per_second": 42.407, |
|
"eval_steps_per_second": 7.069, |
|
"step": 4200000 |
|
}, |
|
{ |
|
"epoch": 28.63, |
|
"learning_rate": 4.751413065169801e-06, |
|
"loss": 0.9983, |
|
"step": 4240000 |
|
}, |
|
{ |
|
"epoch": 28.63, |
|
"eval_loss": 0.9703741073608398, |
|
"eval_runtime": 730.8801, |
|
"eval_samples_per_second": 42.413, |
|
"eval_steps_per_second": 7.07, |
|
"step": 4240000 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"learning_rate": 4.69141537588959e-06, |
|
"loss": 1.0006, |
|
"step": 4280000 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"eval_loss": 0.9680435657501221, |
|
"eval_runtime": 730.9241, |
|
"eval_samples_per_second": 42.411, |
|
"eval_steps_per_second": 7.069, |
|
"step": 4280000 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"learning_rate": 4.631419187076775e-06, |
|
"loss": 0.9981, |
|
"step": 4320000 |
|
}, |
|
{ |
|
"epoch": 29.17, |
|
"eval_loss": 0.9698230028152466, |
|
"eval_runtime": 730.9102, |
|
"eval_samples_per_second": 42.412, |
|
"eval_steps_per_second": 7.069, |
|
"step": 4320000 |
|
}, |
|
{ |
|
"epoch": 29.44, |
|
"learning_rate": 4.571419997329168e-06, |
|
"loss": 0.9955, |
|
"step": 4360000 |
|
}, |
|
{ |
|
"epoch": 29.44, |
|
"eval_loss": 0.9697496891021729, |
|
"eval_runtime": 730.9617, |
|
"eval_samples_per_second": 42.409, |
|
"eval_steps_per_second": 7.069, |
|
"step": 4360000 |
|
}, |
|
{ |
|
"epoch": 29.71, |
|
"learning_rate": 4.5114238085163535e-06, |
|
"loss": 0.9965, |
|
"step": 4400000 |
|
}, |
|
{ |
|
"epoch": 29.71, |
|
"eval_loss": 0.9706552028656006, |
|
"eval_runtime": 730.9543, |
|
"eval_samples_per_second": 42.409, |
|
"eval_steps_per_second": 7.069, |
|
"step": 4400000 |
|
}, |
|
{ |
|
"epoch": 29.98, |
|
"learning_rate": 4.4514261192361425e-06, |
|
"loss": 0.9978, |
|
"step": 4440000 |
|
}, |
|
{ |
|
"epoch": 29.98, |
|
"eval_loss": 0.9674049615859985, |
|
"eval_runtime": 731.0402, |
|
"eval_samples_per_second": 42.404, |
|
"eval_steps_per_second": 7.068, |
|
"step": 4440000 |
|
}, |
|
{ |
|
"epoch": 30.25, |
|
"learning_rate": 4.391428429955931e-06, |
|
"loss": 0.9921, |
|
"step": 4480000 |
|
}, |
|
{ |
|
"epoch": 30.25, |
|
"eval_loss": 0.9650031328201294, |
|
"eval_runtime": 730.9913, |
|
"eval_samples_per_second": 42.407, |
|
"eval_steps_per_second": 7.068, |
|
"step": 4480000 |
|
}, |
|
{ |
|
"epoch": 30.52, |
|
"learning_rate": 4.331429240208325e-06, |
|
"loss": 0.9934, |
|
"step": 4520000 |
|
}, |
|
{ |
|
"epoch": 30.52, |
|
"eval_loss": 0.9674308896064758, |
|
"eval_runtime": 732.3441, |
|
"eval_samples_per_second": 42.328, |
|
"eval_steps_per_second": 7.055, |
|
"step": 4520000 |
|
}, |
|
{ |
|
"epoch": 30.79, |
|
"learning_rate": 4.271430050460719e-06, |
|
"loss": 0.9943, |
|
"step": 4560000 |
|
}, |
|
{ |
|
"epoch": 30.79, |
|
"eval_loss": 0.9649813771247864, |
|
"eval_runtime": 731.9631, |
|
"eval_samples_per_second": 42.35, |
|
"eval_steps_per_second": 7.059, |
|
"step": 4560000 |
|
}, |
|
{ |
|
"epoch": 31.06, |
|
"learning_rate": 4.2114308607131125e-06, |
|
"loss": 0.9946, |
|
"step": 4600000 |
|
}, |
|
{ |
|
"epoch": 31.06, |
|
"eval_loss": 0.9624907374382019, |
|
"eval_runtime": 731.9597, |
|
"eval_samples_per_second": 42.351, |
|
"eval_steps_per_second": 7.059, |
|
"step": 4600000 |
|
}, |
|
{ |
|
"epoch": 31.33, |
|
"learning_rate": 4.151434671900298e-06, |
|
"loss": 0.9917, |
|
"step": 4640000 |
|
}, |
|
{ |
|
"epoch": 31.33, |
|
"eval_loss": 0.9647811055183411, |
|
"eval_runtime": 731.9469, |
|
"eval_samples_per_second": 42.351, |
|
"eval_steps_per_second": 7.059, |
|
"step": 4640000 |
|
}, |
|
{ |
|
"epoch": 31.6, |
|
"learning_rate": 4.091436982620087e-06, |
|
"loss": 0.9902, |
|
"step": 4680000 |
|
}, |
|
{ |
|
"epoch": 31.6, |
|
"eval_loss": 0.962338387966156, |
|
"eval_runtime": 731.9973, |
|
"eval_samples_per_second": 42.349, |
|
"eval_steps_per_second": 7.059, |
|
"step": 4680000 |
|
}, |
|
{ |
|
"epoch": 31.87, |
|
"learning_rate": 4.031442294274667e-06, |
|
"loss": 0.9918, |
|
"step": 4720000 |
|
}, |
|
{ |
|
"epoch": 31.87, |
|
"eval_loss": 0.9640004634857178, |
|
"eval_runtime": 723.6956, |
|
"eval_samples_per_second": 42.834, |
|
"eval_steps_per_second": 7.14, |
|
"step": 4720000 |
|
}, |
|
{ |
|
"epoch": 32.14, |
|
"learning_rate": 3.9714416040596645e-06, |
|
"loss": 0.9897, |
|
"step": 4760000 |
|
}, |
|
{ |
|
"epoch": 32.14, |
|
"eval_loss": 0.961030125617981, |
|
"eval_runtime": 723.561, |
|
"eval_samples_per_second": 42.842, |
|
"eval_steps_per_second": 7.141, |
|
"step": 4760000 |
|
}, |
|
{ |
|
"epoch": 32.41, |
|
"learning_rate": 3.91144541524685e-06, |
|
"loss": 0.9886, |
|
"step": 4800000 |
|
}, |
|
{ |
|
"epoch": 32.41, |
|
"eval_loss": 0.9641813039779663, |
|
"eval_runtime": 723.7468, |
|
"eval_samples_per_second": 42.831, |
|
"eval_steps_per_second": 7.139, |
|
"step": 4800000 |
|
}, |
|
{ |
|
"epoch": 32.68, |
|
"learning_rate": 3.8514477259666385e-06, |
|
"loss": 0.9892, |
|
"step": 4840000 |
|
}, |
|
{ |
|
"epoch": 32.68, |
|
"eval_loss": 0.9588494896888733, |
|
"eval_runtime": 723.6649, |
|
"eval_samples_per_second": 42.836, |
|
"eval_steps_per_second": 7.14, |
|
"step": 4840000 |
|
}, |
|
{ |
|
"epoch": 32.95, |
|
"learning_rate": 3.7914485362190323e-06, |
|
"loss": 0.9885, |
|
"step": 4880000 |
|
}, |
|
{ |
|
"epoch": 32.95, |
|
"eval_loss": 0.959790050983429, |
|
"eval_runtime": 731.8929, |
|
"eval_samples_per_second": 42.355, |
|
"eval_steps_per_second": 7.06, |
|
"step": 4880000 |
|
}, |
|
{ |
|
"epoch": 33.22, |
|
"learning_rate": 3.7314508469388216e-06, |
|
"loss": 0.9867, |
|
"step": 4920000 |
|
}, |
|
{ |
|
"epoch": 33.22, |
|
"eval_loss": 0.9605200290679932, |
|
"eval_runtime": 731.9065, |
|
"eval_samples_per_second": 42.354, |
|
"eval_steps_per_second": 7.06, |
|
"step": 4920000 |
|
}, |
|
{ |
|
"epoch": 33.49, |
|
"learning_rate": 3.6714546581260063e-06, |
|
"loss": 0.9851, |
|
"step": 4960000 |
|
}, |
|
{ |
|
"epoch": 33.49, |
|
"eval_loss": 0.9623289704322815, |
|
"eval_runtime": 731.9518, |
|
"eval_samples_per_second": 42.351, |
|
"eval_steps_per_second": 7.059, |
|
"step": 4960000 |
|
}, |
|
{ |
|
"epoch": 33.76, |
|
"learning_rate": 3.6114539679110043e-06, |
|
"loss": 0.9865, |
|
"step": 5000000 |
|
}, |
|
{ |
|
"epoch": 33.76, |
|
"eval_loss": 0.9566452503204346, |
|
"eval_runtime": 731.9347, |
|
"eval_samples_per_second": 42.352, |
|
"eval_steps_per_second": 7.059, |
|
"step": 5000000 |
|
}, |
|
{ |
|
"epoch": 34.03, |
|
"learning_rate": 3.5514562786307937e-06, |
|
"loss": 0.9847, |
|
"step": 5040000 |
|
}, |
|
{ |
|
"epoch": 34.03, |
|
"eval_loss": 0.9604076743125916, |
|
"eval_runtime": 731.8939, |
|
"eval_samples_per_second": 42.354, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5040000 |
|
}, |
|
{ |
|
"epoch": 34.3, |
|
"learning_rate": 3.4914570888831874e-06, |
|
"loss": 0.9813, |
|
"step": 5080000 |
|
}, |
|
{ |
|
"epoch": 34.3, |
|
"eval_loss": 0.9602544903755188, |
|
"eval_runtime": 731.8897, |
|
"eval_samples_per_second": 42.355, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5080000 |
|
}, |
|
{ |
|
"epoch": 34.57, |
|
"learning_rate": 3.431457899135581e-06, |
|
"loss": 0.9838, |
|
"step": 5120000 |
|
}, |
|
{ |
|
"epoch": 34.57, |
|
"eval_loss": 0.9588419198989868, |
|
"eval_runtime": 731.8728, |
|
"eval_samples_per_second": 42.356, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5120000 |
|
}, |
|
{ |
|
"epoch": 34.84, |
|
"learning_rate": 3.37146020985537e-06, |
|
"loss": 0.9842, |
|
"step": 5160000 |
|
}, |
|
{ |
|
"epoch": 34.84, |
|
"eval_loss": 0.956282377243042, |
|
"eval_runtime": 731.8168, |
|
"eval_samples_per_second": 42.359, |
|
"eval_steps_per_second": 7.061, |
|
"step": 5160000 |
|
}, |
|
{ |
|
"epoch": 35.11, |
|
"learning_rate": 3.311464021042555e-06, |
|
"loss": 0.982, |
|
"step": 5200000 |
|
}, |
|
{ |
|
"epoch": 35.11, |
|
"eval_loss": 0.9570952653884888, |
|
"eval_runtime": 731.9288, |
|
"eval_samples_per_second": 42.352, |
|
"eval_steps_per_second": 7.059, |
|
"step": 5200000 |
|
}, |
|
{ |
|
"epoch": 35.38, |
|
"learning_rate": 3.251464831294949e-06, |
|
"loss": 0.9803, |
|
"step": 5240000 |
|
}, |
|
{ |
|
"epoch": 35.38, |
|
"eval_loss": 0.9592018127441406, |
|
"eval_runtime": 731.9144, |
|
"eval_samples_per_second": 42.353, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5240000 |
|
}, |
|
{ |
|
"epoch": 35.65, |
|
"learning_rate": 3.1914671420147383e-06, |
|
"loss": 0.9804, |
|
"step": 5280000 |
|
}, |
|
{ |
|
"epoch": 35.65, |
|
"eval_loss": 0.9540479779243469, |
|
"eval_runtime": 731.9484, |
|
"eval_samples_per_second": 42.351, |
|
"eval_steps_per_second": 7.059, |
|
"step": 5280000 |
|
}, |
|
{ |
|
"epoch": 35.92, |
|
"learning_rate": 3.131469452734527e-06, |
|
"loss": 0.9799, |
|
"step": 5320000 |
|
}, |
|
{ |
|
"epoch": 35.92, |
|
"eval_loss": 0.9547669887542725, |
|
"eval_runtime": 731.8909, |
|
"eval_samples_per_second": 42.355, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5320000 |
|
}, |
|
{ |
|
"epoch": 36.19, |
|
"learning_rate": 3.0714732639217123e-06, |
|
"loss": 0.9793, |
|
"step": 5360000 |
|
}, |
|
{ |
|
"epoch": 36.19, |
|
"eval_loss": 0.9536721706390381, |
|
"eval_runtime": 731.8644, |
|
"eval_samples_per_second": 42.356, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5360000 |
|
}, |
|
{ |
|
"epoch": 36.46, |
|
"learning_rate": 3.011475574641501e-06, |
|
"loss": 0.9783, |
|
"step": 5400000 |
|
}, |
|
{ |
|
"epoch": 36.46, |
|
"eval_loss": 0.9515364766120911, |
|
"eval_runtime": 731.9101, |
|
"eval_samples_per_second": 42.354, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5400000 |
|
}, |
|
{ |
|
"epoch": 36.73, |
|
"learning_rate": 2.951476384893895e-06, |
|
"loss": 0.9772, |
|
"step": 5440000 |
|
}, |
|
{ |
|
"epoch": 36.73, |
|
"eval_loss": 0.95276939868927, |
|
"eval_runtime": 731.8184, |
|
"eval_samples_per_second": 42.359, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5440000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 2.8914801960810796e-06, |
|
"loss": 0.9804, |
|
"step": 5480000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 0.9519599676132202, |
|
"eval_runtime": 731.9107, |
|
"eval_samples_per_second": 42.354, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5480000 |
|
}, |
|
{ |
|
"epoch": 37.27, |
|
"learning_rate": 2.83148550773566e-06, |
|
"loss": 0.9745, |
|
"step": 5520000 |
|
}, |
|
{ |
|
"epoch": 37.27, |
|
"eval_loss": 0.9534033536911011, |
|
"eval_runtime": 731.9292, |
|
"eval_samples_per_second": 42.352, |
|
"eval_steps_per_second": 7.059, |
|
"step": 5520000 |
|
}, |
|
{ |
|
"epoch": 37.54, |
|
"learning_rate": 2.7714863179880536e-06, |
|
"loss": 0.9757, |
|
"step": 5560000 |
|
}, |
|
{ |
|
"epoch": 37.54, |
|
"eval_loss": 0.9531847238540649, |
|
"eval_runtime": 731.8616, |
|
"eval_samples_per_second": 42.356, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5560000 |
|
}, |
|
{ |
|
"epoch": 37.81, |
|
"learning_rate": 2.7114901291752383e-06, |
|
"loss": 0.9774, |
|
"step": 5600000 |
|
}, |
|
{ |
|
"epoch": 37.81, |
|
"eval_loss": 0.9502018690109253, |
|
"eval_runtime": 731.8317, |
|
"eval_samples_per_second": 42.358, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5600000 |
|
}, |
|
{ |
|
"epoch": 38.08, |
|
"learning_rate": 2.6514909394276316e-06, |
|
"loss": 0.9753, |
|
"step": 5640000 |
|
}, |
|
{ |
|
"epoch": 38.08, |
|
"eval_loss": 0.9478103518486023, |
|
"eval_runtime": 731.8606, |
|
"eval_samples_per_second": 42.356, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5640000 |
|
}, |
|
{ |
|
"epoch": 38.35, |
|
"learning_rate": 2.591494750614817e-06, |
|
"loss": 0.9718, |
|
"step": 5680000 |
|
}, |
|
{ |
|
"epoch": 38.35, |
|
"eval_loss": 0.95122891664505, |
|
"eval_runtime": 731.8255, |
|
"eval_samples_per_second": 42.358, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5680000 |
|
}, |
|
{ |
|
"epoch": 38.62, |
|
"learning_rate": 2.5314970613346056e-06, |
|
"loss": 0.9734, |
|
"step": 5720000 |
|
}, |
|
{ |
|
"epoch": 38.62, |
|
"eval_loss": 0.9479803442955017, |
|
"eval_runtime": 731.8289, |
|
"eval_samples_per_second": 42.358, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5720000 |
|
}, |
|
{ |
|
"epoch": 38.89, |
|
"learning_rate": 2.4715023729891864e-06, |
|
"loss": 0.9745, |
|
"step": 5760000 |
|
}, |
|
{ |
|
"epoch": 38.89, |
|
"eval_loss": 0.9491930603981018, |
|
"eval_runtime": 731.856, |
|
"eval_samples_per_second": 42.357, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5760000 |
|
}, |
|
{ |
|
"epoch": 39.16, |
|
"learning_rate": 2.4115046837089757e-06, |
|
"loss": 0.9712, |
|
"step": 5800000 |
|
}, |
|
{ |
|
"epoch": 39.16, |
|
"eval_loss": 0.949318528175354, |
|
"eval_runtime": 731.8365, |
|
"eval_samples_per_second": 42.358, |
|
"eval_steps_per_second": 7.06, |
|
"step": 5800000 |
|
}, |
|
{ |
|
"epoch": 39.43, |
|
"learning_rate": 2.3515084948961604e-06, |
|
"loss": 0.97, |
|
"step": 5840000 |
|
}, |
|
{ |
|
"epoch": 39.43, |
|
"eval_loss": 0.9494725465774536, |
|
"eval_runtime": 731.7796, |
|
"eval_samples_per_second": 42.361, |
|
"eval_steps_per_second": 7.061, |
|
"step": 5840000 |
|
}, |
|
{ |
|
"epoch": 39.7, |
|
"learning_rate": 2.291512306083345e-06, |
|
"loss": 0.9719, |
|
"step": 5880000 |
|
}, |
|
{ |
|
"epoch": 39.7, |
|
"eval_loss": 0.9493642449378967, |
|
"eval_runtime": 731.7838, |
|
"eval_samples_per_second": 42.361, |
|
"eval_steps_per_second": 7.061, |
|
"step": 5880000 |
|
}, |
|
{ |
|
"epoch": 39.97, |
|
"learning_rate": 2.2315131163357387e-06, |
|
"loss": 0.9725, |
|
"step": 5920000 |
|
}, |
|
{ |
|
"epoch": 39.97, |
|
"eval_loss": 0.945613443851471, |
|
"eval_runtime": 731.8091, |
|
"eval_samples_per_second": 42.359, |
|
"eval_steps_per_second": 7.061, |
|
"step": 5920000 |
|
}, |
|
{ |
|
"epoch": 40.24, |
|
"learning_rate": 2.171515427055528e-06, |
|
"loss": 0.9692, |
|
"step": 5960000 |
|
}, |
|
{ |
|
"epoch": 40.24, |
|
"eval_loss": 0.9440425634384155, |
|
"eval_runtime": 731.7685, |
|
"eval_samples_per_second": 42.362, |
|
"eval_steps_per_second": 7.061, |
|
"step": 5960000 |
|
}, |
|
{ |
|
"epoch": 40.51, |
|
"learning_rate": 2.111517737775317e-06, |
|
"loss": 0.9686, |
|
"step": 6000000 |
|
}, |
|
{ |
|
"epoch": 40.51, |
|
"eval_loss": 0.9432018399238586, |
|
"eval_runtime": 731.8797, |
|
"eval_samples_per_second": 42.355, |
|
"eval_steps_per_second": 7.06, |
|
"step": 6000000 |
|
}, |
|
{ |
|
"epoch": 40.78, |
|
"learning_rate": 2.051523049429898e-06, |
|
"loss": 0.9691, |
|
"step": 6040000 |
|
}, |
|
{ |
|
"epoch": 40.78, |
|
"eval_loss": 0.9459187388420105, |
|
"eval_runtime": 731.8926, |
|
"eval_samples_per_second": 42.355, |
|
"eval_steps_per_second": 7.06, |
|
"step": 6040000 |
|
}, |
|
{ |
|
"epoch": 41.05, |
|
"learning_rate": 1.991523859682291e-06, |
|
"loss": 0.9677, |
|
"step": 6080000 |
|
}, |
|
{ |
|
"epoch": 41.05, |
|
"eval_loss": 0.9420450329780579, |
|
"eval_runtime": 731.8945, |
|
"eval_samples_per_second": 42.354, |
|
"eval_steps_per_second": 7.06, |
|
"step": 6080000 |
|
}, |
|
{ |
|
"epoch": 41.32, |
|
"learning_rate": 1.931527670869476e-06, |
|
"loss": 0.9677, |
|
"step": 6120000 |
|
}, |
|
{ |
|
"epoch": 41.32, |
|
"eval_loss": 0.945288360118866, |
|
"eval_runtime": 731.7905, |
|
"eval_samples_per_second": 42.36, |
|
"eval_steps_per_second": 7.061, |
|
"step": 6120000 |
|
}, |
|
{ |
|
"epoch": 41.59, |
|
"learning_rate": 1.8715299815892652e-06, |
|
"loss": 0.9661, |
|
"step": 6160000 |
|
}, |
|
{ |
|
"epoch": 41.59, |
|
"eval_loss": 0.9455643892288208, |
|
"eval_runtime": 731.7869, |
|
"eval_samples_per_second": 42.361, |
|
"eval_steps_per_second": 7.061, |
|
"step": 6160000 |
|
}, |
|
{ |
|
"epoch": 41.86, |
|
"learning_rate": 1.8115352932438457e-06, |
|
"loss": 0.9664, |
|
"step": 6200000 |
|
}, |
|
{ |
|
"epoch": 41.86, |
|
"eval_loss": 0.9422553181648254, |
|
"eval_runtime": 731.8325, |
|
"eval_samples_per_second": 42.358, |
|
"eval_steps_per_second": 7.06, |
|
"step": 6200000 |
|
}, |
|
{ |
|
"epoch": 42.13, |
|
"learning_rate": 1.7515361034962392e-06, |
|
"loss": 0.9653, |
|
"step": 6240000 |
|
}, |
|
{ |
|
"epoch": 42.13, |
|
"eval_loss": 0.9434319734573364, |
|
"eval_runtime": 731.8054, |
|
"eval_samples_per_second": 42.36, |
|
"eval_steps_per_second": 7.061, |
|
"step": 6240000 |
|
}, |
|
{ |
|
"epoch": 42.4, |
|
"learning_rate": 1.6915429156182154e-06, |
|
"loss": 0.9633, |
|
"step": 6280000 |
|
}, |
|
{ |
|
"epoch": 42.4, |
|
"eval_loss": 0.9410218000411987, |
|
"eval_runtime": 731.7099, |
|
"eval_samples_per_second": 42.365, |
|
"eval_steps_per_second": 7.062, |
|
"step": 6280000 |
|
}, |
|
{ |
|
"epoch": 42.67, |
|
"learning_rate": 1.6315452263380044e-06, |
|
"loss": 0.9699, |
|
"step": 6320000 |
|
}, |
|
{ |
|
"epoch": 42.67, |
|
"eval_loss": 0.9450401663780212, |
|
"eval_runtime": 731.4804, |
|
"eval_samples_per_second": 42.378, |
|
"eval_steps_per_second": 7.064, |
|
"step": 6320000 |
|
}, |
|
{ |
|
"epoch": 42.94, |
|
"learning_rate": 1.571546036590398e-06, |
|
"loss": 0.9673, |
|
"step": 6360000 |
|
}, |
|
{ |
|
"epoch": 42.94, |
|
"eval_loss": 0.9430739283561707, |
|
"eval_runtime": 731.5218, |
|
"eval_samples_per_second": 42.376, |
|
"eval_steps_per_second": 7.063, |
|
"step": 6360000 |
|
}, |
|
{ |
|
"epoch": 43.21, |
|
"learning_rate": 1.5115513482449784e-06, |
|
"loss": 0.9635, |
|
"step": 6400000 |
|
}, |
|
{ |
|
"epoch": 43.21, |
|
"eval_loss": 0.9403471350669861, |
|
"eval_runtime": 731.5739, |
|
"eval_samples_per_second": 42.373, |
|
"eval_steps_per_second": 7.063, |
|
"step": 6400000 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"learning_rate": 1.4515536589647678e-06, |
|
"loss": 0.9643, |
|
"step": 6440000 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"eval_loss": 0.9411180019378662, |
|
"eval_runtime": 731.5857, |
|
"eval_samples_per_second": 42.372, |
|
"eval_steps_per_second": 7.063, |
|
"step": 6440000 |
|
}, |
|
{ |
|
"epoch": 43.75, |
|
"learning_rate": 1.3915604710867436e-06, |
|
"loss": 0.9629, |
|
"step": 6480000 |
|
}, |
|
{ |
|
"epoch": 43.75, |
|
"eval_loss": 0.942000150680542, |
|
"eval_runtime": 731.7059, |
|
"eval_samples_per_second": 42.365, |
|
"eval_steps_per_second": 7.062, |
|
"step": 6480000 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"learning_rate": 1.3315612813391373e-06, |
|
"loss": 0.9636, |
|
"step": 6520000 |
|
}, |
|
{ |
|
"epoch": 44.02, |
|
"eval_loss": 0.9400261640548706, |
|
"eval_runtime": 731.6205, |
|
"eval_samples_per_second": 42.37, |
|
"eval_steps_per_second": 7.062, |
|
"step": 6520000 |
|
}, |
|
{ |
|
"epoch": 44.29, |
|
"learning_rate": 1.2715620915915308e-06, |
|
"loss": 0.9603, |
|
"step": 6560000 |
|
}, |
|
{ |
|
"epoch": 44.29, |
|
"eval_loss": 0.9395481944084167, |
|
"eval_runtime": 731.6787, |
|
"eval_samples_per_second": 42.367, |
|
"eval_steps_per_second": 7.062, |
|
"step": 6560000 |
|
}, |
|
{ |
|
"epoch": 44.56, |
|
"learning_rate": 1.2115659027787157e-06, |
|
"loss": 0.9623, |
|
"step": 6600000 |
|
}, |
|
{ |
|
"epoch": 44.56, |
|
"eval_loss": 0.9395203590393066, |
|
"eval_runtime": 731.6894, |
|
"eval_samples_per_second": 42.366, |
|
"eval_steps_per_second": 7.062, |
|
"step": 6600000 |
|
}, |
|
{ |
|
"epoch": 44.83, |
|
"learning_rate": 1.1515682134985048e-06, |
|
"loss": 0.9631, |
|
"step": 6640000 |
|
}, |
|
{ |
|
"epoch": 44.83, |
|
"eval_loss": 0.9380507469177246, |
|
"eval_runtime": 731.5615, |
|
"eval_samples_per_second": 42.374, |
|
"eval_steps_per_second": 7.063, |
|
"step": 6640000 |
|
}, |
|
{ |
|
"epoch": 45.1, |
|
"learning_rate": 1.0915720246856897e-06, |
|
"loss": 0.9592, |
|
"step": 6680000 |
|
}, |
|
{ |
|
"epoch": 45.1, |
|
"eval_loss": 0.9388097524642944, |
|
"eval_runtime": 731.6793, |
|
"eval_samples_per_second": 42.367, |
|
"eval_steps_per_second": 7.062, |
|
"step": 6680000 |
|
}, |
|
{ |
|
"epoch": 45.37, |
|
"learning_rate": 1.0315758358728746e-06, |
|
"loss": 0.9597, |
|
"step": 6720000 |
|
}, |
|
{ |
|
"epoch": 45.37, |
|
"eval_loss": 0.9375278949737549, |
|
"eval_runtime": 731.6622, |
|
"eval_samples_per_second": 42.368, |
|
"eval_steps_per_second": 7.062, |
|
"step": 6720000 |
|
}, |
|
{ |
|
"epoch": 45.64, |
|
"learning_rate": 9.715796470600592e-07, |
|
"loss": 0.9597, |
|
"step": 6760000 |
|
}, |
|
{ |
|
"epoch": 45.64, |
|
"eval_loss": 0.938642680644989, |
|
"eval_runtime": 731.6777, |
|
"eval_samples_per_second": 42.367, |
|
"eval_steps_per_second": 7.062, |
|
"step": 6760000 |
|
}, |
|
{ |
|
"epoch": 45.91, |
|
"learning_rate": 9.115819577798485e-07, |
|
"loss": 0.9572, |
|
"step": 6800000 |
|
}, |
|
{ |
|
"epoch": 45.91, |
|
"eval_loss": 0.9379501342773438, |
|
"eval_runtime": 731.8028, |
|
"eval_samples_per_second": 42.36, |
|
"eval_steps_per_second": 7.061, |
|
"step": 6800000 |
|
}, |
|
{ |
|
"epoch": 46.18, |
|
"learning_rate": 8.515827680322421e-07, |
|
"loss": 0.9591, |
|
"step": 6840000 |
|
}, |
|
{ |
|
"epoch": 46.18, |
|
"eval_loss": 0.9362252950668335, |
|
"eval_runtime": 731.8252, |
|
"eval_samples_per_second": 42.358, |
|
"eval_steps_per_second": 7.06, |
|
"step": 6840000 |
|
}, |
|
{ |
|
"epoch": 46.45, |
|
"learning_rate": 7.915865792194268e-07, |
|
"loss": 0.9566, |
|
"step": 6880000 |
|
}, |
|
{ |
|
"epoch": 46.45, |
|
"eval_loss": 0.9348224997520447, |
|
"eval_runtime": 731.8651, |
|
"eval_samples_per_second": 42.356, |
|
"eval_steps_per_second": 7.06, |
|
"step": 6880000 |
|
}, |
|
{ |
|
"epoch": 46.72, |
|
"learning_rate": 7.315903904066117e-07, |
|
"loss": 0.9574, |
|
"step": 6920000 |
|
}, |
|
{ |
|
"epoch": 46.72, |
|
"eval_loss": 0.9346414804458618, |
|
"eval_runtime": 731.9242, |
|
"eval_samples_per_second": 42.353, |
|
"eval_steps_per_second": 7.059, |
|
"step": 6920000 |
|
}, |
|
{ |
|
"epoch": 46.99, |
|
"learning_rate": 6.715957020611921e-07, |
|
"loss": 0.9563, |
|
"step": 6960000 |
|
}, |
|
{ |
|
"epoch": 46.99, |
|
"eval_loss": 0.9362134337425232, |
|
"eval_runtime": 731.9416, |
|
"eval_samples_per_second": 42.352, |
|
"eval_steps_per_second": 7.059, |
|
"step": 6960000 |
|
}, |
|
{ |
|
"epoch": 47.26, |
|
"learning_rate": 6.116025141831681e-07, |
|
"loss": 0.9546, |
|
"step": 7000000 |
|
}, |
|
{ |
|
"epoch": 47.26, |
|
"eval_loss": 0.9321693778038025, |
|
"eval_runtime": 731.9394, |
|
"eval_samples_per_second": 42.352, |
|
"eval_steps_per_second": 7.059, |
|
"step": 7000000 |
|
}, |
|
{ |
|
"epoch": 47.53, |
|
"learning_rate": 5.516048249029573e-07, |
|
"loss": 0.955, |
|
"step": 7040000 |
|
}, |
|
{ |
|
"epoch": 47.53, |
|
"eval_loss": 0.9357685446739197, |
|
"eval_runtime": 731.9226, |
|
"eval_samples_per_second": 42.353, |
|
"eval_steps_per_second": 7.059, |
|
"step": 7040000 |
|
}, |
|
{ |
|
"epoch": 47.8, |
|
"learning_rate": 4.916071356227465e-07, |
|
"loss": 0.9558, |
|
"step": 7080000 |
|
}, |
|
{ |
|
"epoch": 47.8, |
|
"eval_loss": 0.9322512149810791, |
|
"eval_runtime": 731.9386, |
|
"eval_samples_per_second": 42.352, |
|
"eval_steps_per_second": 7.059, |
|
"step": 7080000 |
|
}, |
|
{ |
|
"epoch": 48.07, |
|
"learning_rate": 4.3160794587514014e-07, |
|
"loss": 0.9547, |
|
"step": 7120000 |
|
}, |
|
{ |
|
"epoch": 48.07, |
|
"eval_loss": 0.9324430227279663, |
|
"eval_runtime": 731.9445, |
|
"eval_samples_per_second": 42.352, |
|
"eval_steps_per_second": 7.059, |
|
"step": 7120000 |
|
}, |
|
{ |
|
"epoch": 48.35, |
|
"learning_rate": 3.7161025659492937e-07, |
|
"loss": 0.9536, |
|
"step": 7160000 |
|
}, |
|
{ |
|
"epoch": 48.35, |
|
"eval_loss": 0.9327294230461121, |
|
"eval_runtime": 731.9579, |
|
"eval_samples_per_second": 42.351, |
|
"eval_steps_per_second": 7.059, |
|
"step": 7160000 |
|
}, |
|
{ |
|
"epoch": 48.62, |
|
"learning_rate": 3.116140677821142e-07, |
|
"loss": 0.9533, |
|
"step": 7200000 |
|
}, |
|
{ |
|
"epoch": 48.62, |
|
"eval_loss": 0.933174192905426, |
|
"eval_runtime": 732.1291, |
|
"eval_samples_per_second": 42.341, |
|
"eval_steps_per_second": 7.057, |
|
"step": 7200000 |
|
}, |
|
{ |
|
"epoch": 48.89, |
|
"learning_rate": 2.5161637850190335e-07, |
|
"loss": 0.9536, |
|
"step": 7240000 |
|
}, |
|
{ |
|
"epoch": 48.89, |
|
"eval_loss": 0.9329264163970947, |
|
"eval_runtime": 732.2008, |
|
"eval_samples_per_second": 42.337, |
|
"eval_steps_per_second": 7.057, |
|
"step": 7240000 |
|
}, |
|
{ |
|
"epoch": 49.16, |
|
"learning_rate": 1.9162018968908816e-07, |
|
"loss": 0.9517, |
|
"step": 7280000 |
|
}, |
|
{ |
|
"epoch": 49.16, |
|
"eval_loss": 0.9311585426330566, |
|
"eval_runtime": 732.166, |
|
"eval_samples_per_second": 42.339, |
|
"eval_steps_per_second": 7.057, |
|
"step": 7280000 |
|
}, |
|
{ |
|
"epoch": 49.43, |
|
"learning_rate": 1.3162550134366856e-07, |
|
"loss": 0.9525, |
|
"step": 7320000 |
|
}, |
|
{ |
|
"epoch": 49.43, |
|
"eval_loss": 0.9293885827064514, |
|
"eval_runtime": 732.1103, |
|
"eval_samples_per_second": 42.342, |
|
"eval_steps_per_second": 7.058, |
|
"step": 7320000 |
|
}, |
|
{ |
|
"epoch": 49.7, |
|
"learning_rate": 7.162931253085337e-08, |
|
"loss": 0.9519, |
|
"step": 7360000 |
|
}, |
|
{ |
|
"epoch": 49.7, |
|
"eval_loss": 0.9308800101280212, |
|
"eval_runtime": 732.0979, |
|
"eval_samples_per_second": 42.343, |
|
"eval_steps_per_second": 7.058, |
|
"step": 7360000 |
|
}, |
|
{ |
|
"epoch": 49.97, |
|
"learning_rate": 1.1630122783246982e-08, |
|
"loss": 0.9518, |
|
"step": 7400000 |
|
}, |
|
{ |
|
"epoch": 49.97, |
|
"eval_loss": 0.9334636926651001, |
|
"eval_runtime": 731.9724, |
|
"eval_samples_per_second": 42.35, |
|
"eval_steps_per_second": 7.059, |
|
"step": 7400000 |
|
} |
|
], |
|
"max_steps": 7405100, |
|
"num_train_epochs": 50, |
|
"total_flos": 7.798931788086835e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|