|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 76518, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9869311795917304e-05, |
|
"loss": 1.2439, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9738623591834603e-05, |
|
"loss": 0.6806, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9607935387751905e-05, |
|
"loss": 0.6678, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9477247183669204e-05, |
|
"loss": 0.6594, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9346558979586506e-05, |
|
"loss": 0.6537, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9215870775503805e-05, |
|
"loss": 0.6493, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9085182571421107e-05, |
|
"loss": 0.646, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8954494367338406e-05, |
|
"loss": 0.6475, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8823806163255708e-05, |
|
"loss": 0.6358, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8693117959173007e-05, |
|
"loss": 0.6404, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.856242975509031e-05, |
|
"loss": 0.636, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.8431741551007608e-05, |
|
"loss": 0.6307, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.830105334692491e-05, |
|
"loss": 0.6292, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.817036514284221e-05, |
|
"loss": 0.6254, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.803967693875951e-05, |
|
"loss": 0.632, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.790898873467681e-05, |
|
"loss": 0.6276, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.7778300530594112e-05, |
|
"loss": 0.6295, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.764761232651141e-05, |
|
"loss": 0.625, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.751692412242871e-05, |
|
"loss": 0.6272, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.7386235918346012e-05, |
|
"loss": 0.6241, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.725554771426331e-05, |
|
"loss": 0.6208, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.7124859510180613e-05, |
|
"loss": 0.6201, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.699417130609791e-05, |
|
"loss": 0.6271, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.6863483102015214e-05, |
|
"loss": 0.6173, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6732794897932513e-05, |
|
"loss": 0.6172, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6602106693849815e-05, |
|
"loss": 0.6135, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.6471418489767114e-05, |
|
"loss": 0.6115, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.6340730285684416e-05, |
|
"loss": 0.6191, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6210042081601715e-05, |
|
"loss": 0.6177, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6079353877519017e-05, |
|
"loss": 0.6171, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.5948665673436316e-05, |
|
"loss": 0.6225, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.5817977469353618e-05, |
|
"loss": 0.6078, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5687289265270917e-05, |
|
"loss": 0.6115, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.555660106118822e-05, |
|
"loss": 0.6085, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5425912857105518e-05, |
|
"loss": 0.6011, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.529522465302282e-05, |
|
"loss": 0.6096, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5164536448940119e-05, |
|
"loss": 0.6144, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5033848244857421e-05, |
|
"loss": 0.6042, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.490316004077472e-05, |
|
"loss": 0.6149, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4772471836692022e-05, |
|
"loss": 0.6024, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.464178363260932e-05, |
|
"loss": 0.603, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.4511095428526623e-05, |
|
"loss": 0.6062, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4380407224443922e-05, |
|
"loss": 0.6073, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4249719020361224e-05, |
|
"loss": 0.6058, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4119030816278523e-05, |
|
"loss": 0.6066, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3988342612195825e-05, |
|
"loss": 0.6008, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3857654408113126e-05, |
|
"loss": 0.622, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3726966204030424e-05, |
|
"loss": 0.6013, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3596277999947727e-05, |
|
"loss": 0.6073, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3465589795865025e-05, |
|
"loss": 0.5973, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3334901591782328e-05, |
|
"loss": 0.6065, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 18.7995, |
|
"eval_loss": 0.547476589679718, |
|
"eval_rouge1": 34.0929, |
|
"eval_rouge2": 12.225, |
|
"eval_rougeL": 27.465, |
|
"eval_rougeLsum": 27.4736, |
|
"eval_runtime": 1500.9794, |
|
"eval_samples_per_second": 7.55, |
|
"eval_steps_per_second": 1.887, |
|
"step": 25506 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.3204213387699626e-05, |
|
"loss": 0.5931, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.3073525183616929e-05, |
|
"loss": 0.5981, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.2942836979534227e-05, |
|
"loss": 0.5865, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.281214877545153e-05, |
|
"loss": 0.5894, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.2681460571368828e-05, |
|
"loss": 0.5844, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.255077236728613e-05, |
|
"loss": 0.5928, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.242008416320343e-05, |
|
"loss": 0.5891, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2289395959120732e-05, |
|
"loss": 0.5868, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.215870775503803e-05, |
|
"loss": 0.5831, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.2028019550955333e-05, |
|
"loss": 0.5894, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.1897331346872632e-05, |
|
"loss": 0.5926, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.1766643142789934e-05, |
|
"loss": 0.5891, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.1635954938707233e-05, |
|
"loss": 0.5925, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.1505266734624533e-05, |
|
"loss": 0.582, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1374578530541834e-05, |
|
"loss": 0.5904, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.1243890326459134e-05, |
|
"loss": 0.596, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.1113202122376435e-05, |
|
"loss": 0.5851, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.0982513918293735e-05, |
|
"loss": 0.5852, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.0851825714211037e-05, |
|
"loss": 0.5952, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.0721137510128336e-05, |
|
"loss": 0.5857, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.0590449306045638e-05, |
|
"loss": 0.5875, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.0459761101962937e-05, |
|
"loss": 0.5901, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.032907289788024e-05, |
|
"loss": 0.5883, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0198384693797538e-05, |
|
"loss": 0.5893, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.006769648971484e-05, |
|
"loss": 0.5853, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.93700828563214e-06, |
|
"loss": 0.5839, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.80632008154944e-06, |
|
"loss": 0.5877, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.67563187746674e-06, |
|
"loss": 0.5904, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.54494367338404e-06, |
|
"loss": 0.5753, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.414255469301343e-06, |
|
"loss": 0.5855, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.283567265218643e-06, |
|
"loss": 0.5917, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.152879061135942e-06, |
|
"loss": 0.5818, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.022190857053243e-06, |
|
"loss": 0.578, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.891502652970543e-06, |
|
"loss": 0.5831, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.760814448887844e-06, |
|
"loss": 0.583, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.630126244805144e-06, |
|
"loss": 0.5869, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.499438040722445e-06, |
|
"loss": 0.5802, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.368749836639745e-06, |
|
"loss": 0.5776, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.238061632557046e-06, |
|
"loss": 0.5833, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.107373428474346e-06, |
|
"loss": 0.5844, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.976685224391647e-06, |
|
"loss": 0.5834, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.845997020308947e-06, |
|
"loss": 0.5829, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.715308816226248e-06, |
|
"loss": 0.5849, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.584620612143548e-06, |
|
"loss": 0.5874, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.453932408060849e-06, |
|
"loss": 0.5879, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.323244203978149e-06, |
|
"loss": 0.577, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.19255599989545e-06, |
|
"loss": 0.5807, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.06186779581275e-06, |
|
"loss": 0.5865, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.931179591730051e-06, |
|
"loss": 0.5765, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.8004913876473514e-06, |
|
"loss": 0.5883, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.669803183564651e-06, |
|
"loss": 0.5787, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 18.8155, |
|
"eval_loss": 0.5370269417762756, |
|
"eval_rouge1": 34.4751, |
|
"eval_rouge2": 12.6984, |
|
"eval_rougeL": 27.9605, |
|
"eval_rougeLsum": 27.9586, |
|
"eval_runtime": 1516.6364, |
|
"eval_samples_per_second": 7.472, |
|
"eval_steps_per_second": 1.868, |
|
"step": 51012 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.539114979481952e-06, |
|
"loss": 0.5674, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.408426775399254e-06, |
|
"loss": 0.5708, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 6.277738571316554e-06, |
|
"loss": 0.5746, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.147050367233854e-06, |
|
"loss": 0.5726, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 6.0163621631511545e-06, |
|
"loss": 0.5789, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 5.885673959068455e-06, |
|
"loss": 0.5693, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.7549857549857555e-06, |
|
"loss": 0.5677, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 5.624297550903056e-06, |
|
"loss": 0.5724, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.4936093468203565e-06, |
|
"loss": 0.5737, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.362921142737657e-06, |
|
"loss": 0.5758, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5.2322329386549575e-06, |
|
"loss": 0.5693, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 5.101544734572258e-06, |
|
"loss": 0.5772, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.9708565304895586e-06, |
|
"loss": 0.5793, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.840168326406859e-06, |
|
"loss": 0.576, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.70948012232416e-06, |
|
"loss": 0.5768, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.57879191824146e-06, |
|
"loss": 0.5685, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.448103714158761e-06, |
|
"loss": 0.5706, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.31741551007606e-06, |
|
"loss": 0.5796, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.186727305993362e-06, |
|
"loss": 0.5763, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.056039101910662e-06, |
|
"loss": 0.5707, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.925350897827963e-06, |
|
"loss": 0.5748, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.794662693745263e-06, |
|
"loss": 0.5762, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.6639744896625632e-06, |
|
"loss": 0.5693, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.5332862855798637e-06, |
|
"loss": 0.5754, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.4025980814971643e-06, |
|
"loss": 0.5832, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.2719098774144648e-06, |
|
"loss": 0.5653, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.1412216733317653e-06, |
|
"loss": 0.5635, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.0105334692490658e-06, |
|
"loss": 0.5743, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.8798452651663663e-06, |
|
"loss": 0.5633, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.7491570610836672e-06, |
|
"loss": 0.5741, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.6184688570009677e-06, |
|
"loss": 0.5691, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.487780652918268e-06, |
|
"loss": 0.5758, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.3570924488355683e-06, |
|
"loss": 0.5637, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.226404244752869e-06, |
|
"loss": 0.5753, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.0957160406701694e-06, |
|
"loss": 0.5763, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.96502783658747e-06, |
|
"loss": 0.5738, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.8343396325047702e-06, |
|
"loss": 0.5698, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.7036514284220707e-06, |
|
"loss": 0.5725, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.5729632243393714e-06, |
|
"loss": 0.5733, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.442275020256672e-06, |
|
"loss": 0.578, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.3115868161739722e-06, |
|
"loss": 0.5755, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.1808986120912727e-06, |
|
"loss": 0.5806, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.0502104080085732e-06, |
|
"loss": 0.5736, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 9.195222039258737e-07, |
|
"loss": 0.5665, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 7.888339998431741e-07, |
|
"loss": 0.5711, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 6.581457957604747e-07, |
|
"loss": 0.5699, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.274575916777753e-07, |
|
"loss": 0.5727, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.967693875950757e-07, |
|
"loss": 0.5702, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.660811835123762e-07, |
|
"loss": 0.5697, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.3539297942967668e-07, |
|
"loss": 0.5658, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.704775346977182e-09, |
|
"loss": 0.5692, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 18.8131, |
|
"eval_loss": 0.5340152382850647, |
|
"eval_rouge1": 34.6791, |
|
"eval_rouge2": 12.8236, |
|
"eval_rougeL": 28.1201, |
|
"eval_rougeLsum": 28.1241, |
|
"eval_runtime": 1504.5288, |
|
"eval_samples_per_second": 7.532, |
|
"eval_steps_per_second": 1.883, |
|
"step": 76518 |
|
} |
|
], |
|
"max_steps": 76518, |
|
"num_train_epochs": 3, |
|
"total_flos": 3.727644383379456e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|