|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 78735, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009936495840477551, |
|
"loss": 1.2096, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009872991680955102, |
|
"loss": 1.2958, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0009809487521432654, |
|
"loss": 1.3278, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0009745983361910206, |
|
"loss": 1.3214, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0009682479202387756, |
|
"loss": 1.3299, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0009618975042865308, |
|
"loss": 1.3244, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0009555470883342859, |
|
"loss": 1.3279, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.000949196672382041, |
|
"loss": 1.3269, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0009428462564297961, |
|
"loss": 1.3179, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0009364958404775514, |
|
"loss": 1.3057, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.58219165847995, |
|
"eval_loss": 2.046381950378418, |
|
"eval_runtime": 1873.4069, |
|
"eval_samples_per_second": 89.652, |
|
"eval_steps_per_second": 0.701, |
|
"step": 5249 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0009301454245253064, |
|
"loss": 1.1726, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0009237950085730616, |
|
"loss": 1.0732, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0009174445926208167, |
|
"loss": 1.0922, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0009110941766685719, |
|
"loss": 1.1075, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0009047437607163269, |
|
"loss": 1.112, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0008983933447640821, |
|
"loss": 1.1417, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0008920429288118372, |
|
"loss": 1.1343, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0008856925128595922, |
|
"loss": 1.1319, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0008793420969073475, |
|
"loss": 1.1326, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.0008729916809551026, |
|
"loss": 1.1387, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6084486916138251, |
|
"eval_loss": 1.9456982612609863, |
|
"eval_runtime": 4861.9841, |
|
"eval_samples_per_second": 34.545, |
|
"eval_steps_per_second": 0.27, |
|
"step": 10498 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0008666412650028577, |
|
"loss": 1.124, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.0008602908490506128, |
|
"loss": 0.9, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.000853940433098368, |
|
"loss": 0.9401, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.000847590017146123, |
|
"loss": 0.967, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.0008412396011938782, |
|
"loss": 0.9734, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0008348891852416333, |
|
"loss": 0.9657, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.0008285387692893885, |
|
"loss": 0.9883, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0008221883533371436, |
|
"loss": 0.9781, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.0008158379373848988, |
|
"loss": 0.9822, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.0008094875214326538, |
|
"loss": 0.992, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.000803137105480409, |
|
"loss": 0.988, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6124319014021613, |
|
"eval_loss": 1.965035319328308, |
|
"eval_runtime": 4587.2857, |
|
"eval_samples_per_second": 36.613, |
|
"eval_steps_per_second": 0.286, |
|
"step": 15747 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.0007967866895281641, |
|
"loss": 0.8822, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.0007904362735759192, |
|
"loss": 0.8149, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.0007840858576236743, |
|
"loss": 0.8325, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.0007777354416714295, |
|
"loss": 0.8412, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.0007713850257191847, |
|
"loss": 0.8421, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.0007650346097669397, |
|
"loss": 0.8487, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0007586841938146949, |
|
"loss": 0.855, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.00075233377786245, |
|
"loss": 0.8623, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0007459833619102051, |
|
"loss": 0.8668, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.0007396329459579602, |
|
"loss": 0.8653, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.634402071983567, |
|
"eval_loss": 1.9380866289138794, |
|
"eval_runtime": 1885.7719, |
|
"eval_samples_per_second": 89.064, |
|
"eval_steps_per_second": 0.696, |
|
"step": 20996 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0007332825300057155, |
|
"loss": 0.864, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.0007269321140534705, |
|
"loss": 0.7073, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.0007205816981012257, |
|
"loss": 0.7257, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.0007142312821489808, |
|
"loss": 0.7415, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.0007078808661967359, |
|
"loss": 0.7397, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.000701530450244491, |
|
"loss": 0.7635, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.0006951800342922462, |
|
"loss": 0.7736, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 0.0006888296183400012, |
|
"loss": 0.7694, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.0006824792023877565, |
|
"loss": 0.759, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.0006761287864355116, |
|
"loss": 0.7728, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0006697783704832666, |
|
"loss": 0.7662, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6335327915215385, |
|
"eval_loss": 1.9391114711761475, |
|
"eval_runtime": 1717.5004, |
|
"eval_samples_per_second": 97.79, |
|
"eval_steps_per_second": 0.764, |
|
"step": 26245 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.0006634279545310218, |
|
"loss": 0.7034, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.0006570775385787769, |
|
"loss": 0.6446, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.000650727122626532, |
|
"loss": 0.6511, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.0006443767066742871, |
|
"loss": 0.6562, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.0006380262907220423, |
|
"loss": 0.6673, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 0.0006316758747697975, |
|
"loss": 0.6719, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.0006253254588175526, |
|
"loss": 0.6818, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.0006189750428653077, |
|
"loss": 0.6745, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 0.0006126246269130629, |
|
"loss": 0.6778, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.0006062742109608179, |
|
"loss": 0.6882, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6443749813938258, |
|
"eval_loss": 1.9590805768966675, |
|
"eval_runtime": 1673.9405, |
|
"eval_samples_per_second": 100.335, |
|
"eval_steps_per_second": 0.784, |
|
"step": 31494 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0005999237950085731, |
|
"loss": 0.6787, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.0005935733790563282, |
|
"loss": 0.5613, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.0005872229631040833, |
|
"loss": 0.5867, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.0005808725471518384, |
|
"loss": 0.5839, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.0005745221311995937, |
|
"loss": 0.588, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.0005681717152473487, |
|
"loss": 0.6022, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.0005618212992951039, |
|
"loss": 0.596, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.000555470883342859, |
|
"loss": 0.6072, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.000549120467390614, |
|
"loss": 0.615, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.0005427700514383692, |
|
"loss": 0.6103, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.0005364196354861243, |
|
"loss": 0.601, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6510374802774552, |
|
"eval_loss": 1.9506298303604126, |
|
"eval_runtime": 4758.5372, |
|
"eval_samples_per_second": 35.296, |
|
"eval_steps_per_second": 0.276, |
|
"step": 36743 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.0005300692195338794, |
|
"loss": 0.5518, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.0005237188035816346, |
|
"loss": 0.5144, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.0005173683876293898, |
|
"loss": 0.5214, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.0005110179716771448, |
|
"loss": 0.522, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 0.0005046675557249, |
|
"loss": 0.5331, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.0004983171397726551, |
|
"loss": 0.5357, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 0.0004919667238204102, |
|
"loss": 0.53, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 0.00048561630786816534, |
|
"loss": 0.5397, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 0.0004792658919159205, |
|
"loss": 0.5319, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.0004729154759636756, |
|
"loss": 0.5363, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6616772349736536, |
|
"eval_loss": 1.9555561542510986, |
|
"eval_runtime": 4754.1095, |
|
"eval_samples_per_second": 35.328, |
|
"eval_steps_per_second": 0.276, |
|
"step": 41992 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0004665650600114307, |
|
"loss": 0.3811, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.0004602146440591859, |
|
"loss": 0.4593, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 0.000453864228106941, |
|
"loss": 0.4769, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 0.00044751381215469617, |
|
"loss": 0.4676, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 0.0004411633962024513, |
|
"loss": 0.4816, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 0.0004348129802502064, |
|
"loss": 0.4765, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 0.00042846256429796155, |
|
"loss": 0.4835, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 0.00042211214834571666, |
|
"loss": 0.4767, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 0.0004157617323934718, |
|
"loss": 0.4877, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 0.00040941131644122694, |
|
"loss": 0.4706, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.00040306090048898205, |
|
"loss": 0.4871, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6740853204727457, |
|
"eval_loss": 1.9037331342697144, |
|
"eval_runtime": 5063.3946, |
|
"eval_samples_per_second": 33.17, |
|
"eval_steps_per_second": 0.259, |
|
"step": 47241 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 0.00039671048453673716, |
|
"loss": 0.4473, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.0003903600685844923, |
|
"loss": 0.4062, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 0.0003840096526322474, |
|
"loss": 0.4208, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 0.00037765923668000255, |
|
"loss": 0.4233, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 0.00037130882072775766, |
|
"loss": 0.4211, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 0.0003649584047755128, |
|
"loss": 0.4273, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 0.00035860798882326794, |
|
"loss": 0.4259, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 0.00035225757287102305, |
|
"loss": 0.4157, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 0.00034590715691877816, |
|
"loss": 0.4294, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 0.00033955674096653333, |
|
"loss": 0.4338, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6806287398410289, |
|
"eval_loss": 1.9794013500213623, |
|
"eval_runtime": 4548.1376, |
|
"eval_samples_per_second": 36.928, |
|
"eval_steps_per_second": 0.289, |
|
"step": 52490 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.00033320632501428844, |
|
"loss": 0.4278, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 0.00032685590906204355, |
|
"loss": 0.3655, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 0.0003205054931097987, |
|
"loss": 0.3792, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 0.00031415507715755383, |
|
"loss": 0.3765, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 0.000307804661205309, |
|
"loss": 0.3777, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 0.0003014542452530641, |
|
"loss": 0.3766, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 0.0002951038293008192, |
|
"loss": 0.3847, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 0.0002887534133485744, |
|
"loss": 0.3802, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 0.00028240299739632944, |
|
"loss": 0.3836, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 0.00027605258144408455, |
|
"loss": 0.3696, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 0.0002697021654918397, |
|
"loss": 0.3738, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6849275103450329, |
|
"eval_loss": 2.0053181648254395, |
|
"eval_runtime": 3187.3586, |
|
"eval_samples_per_second": 52.694, |
|
"eval_steps_per_second": 0.412, |
|
"step": 57739 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 0.0002633517495395948, |
|
"loss": 0.3603, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 0.00025700133358734994, |
|
"loss": 0.3371, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 0.0002506509176351051, |
|
"loss": 0.3334, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"learning_rate": 0.0002443005016828602, |
|
"loss": 0.3342, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 0.00023795008573061535, |
|
"loss": 0.3392, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 11.53, |
|
"learning_rate": 0.0002315996697783705, |
|
"loss": 0.3323, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 0.00022524925382612563, |
|
"loss": 0.3444, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 0.00021889883787388074, |
|
"loss": 0.3395, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 0.00021254842192163588, |
|
"loss": 0.3321, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 0.00020619800596939102, |
|
"loss": 0.3338, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6955315411866274, |
|
"eval_loss": 2.0140492916107178, |
|
"eval_runtime": 1705.2241, |
|
"eval_samples_per_second": 98.494, |
|
"eval_steps_per_second": 0.77, |
|
"step": 62988 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0001998475900171461, |
|
"loss": 0.3326, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 0.00019349717406490124, |
|
"loss": 0.3104, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"learning_rate": 0.00018714675811265638, |
|
"loss": 0.2943, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 0.00018079634216041152, |
|
"loss": 0.3063, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 12.38, |
|
"learning_rate": 0.00017444592620816663, |
|
"loss": 0.302, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 0.00016809551025592177, |
|
"loss": 0.295, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 0.0001617450943036769, |
|
"loss": 0.2945, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 0.00015539467835143204, |
|
"loss": 0.3072, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 0.00014904426239918715, |
|
"loss": 0.2913, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 0.00014269384644694227, |
|
"loss": 0.3055, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 0.0001363434304946974, |
|
"loss": 0.2989, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6974427674079366, |
|
"eval_loss": 2.083035707473755, |
|
"eval_runtime": 1683.3073, |
|
"eval_samples_per_second": 99.777, |
|
"eval_steps_per_second": 0.78, |
|
"step": 68237 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 0.00012999301454245252, |
|
"loss": 0.2808, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"learning_rate": 0.00012364259859020765, |
|
"loss": 0.2789, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 0.00011729218263796279, |
|
"loss": 0.2743, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"learning_rate": 0.00011094176668571792, |
|
"loss": 0.2767, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 0.00010459135073347304, |
|
"loss": 0.2648, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 9.824093478122817e-05, |
|
"loss": 0.2724, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 9.18905188289833e-05, |
|
"loss": 0.2753, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 8.554010287673843e-05, |
|
"loss": 0.2743, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 7.918968692449357e-05, |
|
"loss": 0.2658, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 7.283927097224868e-05, |
|
"loss": 0.267, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7040516805096603, |
|
"eval_loss": 2.0823681354522705, |
|
"eval_runtime": 2701.2349, |
|
"eval_samples_per_second": 62.177, |
|
"eval_steps_per_second": 0.486, |
|
"step": 73486 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 6.64888550200038e-05, |
|
"loss": 0.2592, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 6.013843906775894e-05, |
|
"loss": 0.2473, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 5.378802311551407e-05, |
|
"loss": 0.2471, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 4.74376071632692e-05, |
|
"loss": 0.2592, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 4.1087191211024324e-05, |
|
"loss": 0.2493, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 3.473677525877945e-05, |
|
"loss": 0.2484, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 2.838635930653458e-05, |
|
"loss": 0.2468, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 2.203594335428971e-05, |
|
"loss": 0.2432, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 14.76, |
|
"learning_rate": 1.5685527402044834e-05, |
|
"loss": 0.2466, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"learning_rate": 9.335111449799962e-06, |
|
"loss": 0.2472, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"learning_rate": 2.98469549755509e-06, |
|
"loss": 0.236, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.708475484504778, |
|
"eval_loss": 2.0700299739837646, |
|
"eval_runtime": 1790.1892, |
|
"eval_samples_per_second": 93.82, |
|
"eval_steps_per_second": 0.733, |
|
"step": 78735 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 78735, |
|
"total_flos": 6.872831806674565e+20, |
|
"train_loss": 0.0, |
|
"train_runtime": 39.8476, |
|
"train_samples_per_second": 252894.596, |
|
"train_steps_per_second": 1975.901 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 78735, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"total_flos": 6.872831806674565e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|