|
{ |
|
"best_metric": 1.341736078262329, |
|
"best_model_checkpoint": "./dual/flan-t5-base-dual/checkpoint-52010", |
|
"epoch": 10.0, |
|
"global_step": 52010, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.951932320707556e-05, |
|
"loss": 2.0525, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.903864641415113e-05, |
|
"loss": 1.9359, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.855796962122669e-05, |
|
"loss": 1.892, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.807729282830225e-05, |
|
"loss": 1.8455, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.7596616035377816e-05, |
|
"loss": 1.8273, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.7115939242453377e-05, |
|
"loss": 1.8317, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.663526244952894e-05, |
|
"loss": 1.8103, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.61545856566045e-05, |
|
"loss": 1.8032, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.5673908863680064e-05, |
|
"loss": 1.7842, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.5193232070755624e-05, |
|
"loss": 1.7674, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 16.8, |
|
"eval_loss": 1.4587862491607666, |
|
"eval_rouge1": 43.5356, |
|
"eval_rouge2": 25.8338, |
|
"eval_rougeL": 41.1764, |
|
"eval_rougeLsum": 41.816, |
|
"eval_runtime": 629.4032, |
|
"eval_samples_per_second": 16.524, |
|
"eval_steps_per_second": 1.033, |
|
"step": 5201 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.4712555277831184e-05, |
|
"loss": 1.7405, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.423187848490675e-05, |
|
"loss": 1.7135, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.375120169198231e-05, |
|
"loss": 1.7161, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.327052489905787e-05, |
|
"loss": 1.7208, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.278984810613344e-05, |
|
"loss": 1.6999, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.2309171313209e-05, |
|
"loss": 1.694, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.182849452028456e-05, |
|
"loss": 1.6837, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.1347817727360125e-05, |
|
"loss": 1.6892, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.0867140934435685e-05, |
|
"loss": 1.6811, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.038646414151125e-05, |
|
"loss": 1.7004, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 16.87192307692308, |
|
"eval_loss": 1.4108598232269287, |
|
"eval_rouge1": 44.1953, |
|
"eval_rouge2": 26.6443, |
|
"eval_rougeL": 41.7387, |
|
"eval_rougeLsum": 42.3745, |
|
"eval_runtime": 674.3077, |
|
"eval_samples_per_second": 15.423, |
|
"eval_steps_per_second": 0.964, |
|
"step": 10402 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.990578734858681e-05, |
|
"loss": 1.6572, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.942511055566238e-05, |
|
"loss": 1.6449, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.894443376273794e-05, |
|
"loss": 1.6235, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.84637569698135e-05, |
|
"loss": 1.6573, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.7983080176889066e-05, |
|
"loss": 1.6262, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.7502403383964626e-05, |
|
"loss": 1.6377, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.7021726591040186e-05, |
|
"loss": 1.6407, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.654104979811575e-05, |
|
"loss": 1.6343, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.606037300519131e-05, |
|
"loss": 1.6221, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.557969621226687e-05, |
|
"loss": 1.6127, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.509901941934244e-05, |
|
"loss": 1.622, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 16.911923076923078, |
|
"eval_loss": 1.387160301208496, |
|
"eval_rouge1": 44.6617, |
|
"eval_rouge2": 27.2456, |
|
"eval_rougeL": 42.2185, |
|
"eval_rougeLsum": 42.8309, |
|
"eval_runtime": 678.3822, |
|
"eval_samples_per_second": 15.331, |
|
"eval_steps_per_second": 0.958, |
|
"step": 15603 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.4618342626418e-05, |
|
"loss": 1.5886, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 3.413766583349356e-05, |
|
"loss": 1.5913, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3.365698904056912e-05, |
|
"loss": 1.5693, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.317631224764469e-05, |
|
"loss": 1.5781, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 3.269563545472025e-05, |
|
"loss": 1.5944, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 3.221495866179581e-05, |
|
"loss": 1.5671, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.1734281868871374e-05, |
|
"loss": 1.5979, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 3.1253605075946935e-05, |
|
"loss": 1.6014, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.0772928283022495e-05, |
|
"loss": 1.5929, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 3.029225149009806e-05, |
|
"loss": 1.5822, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 16.976153846153846, |
|
"eval_loss": 1.3675929307937622, |
|
"eval_rouge1": 44.7885, |
|
"eval_rouge2": 27.4914, |
|
"eval_rougeL": 42.3527, |
|
"eval_rougeLsum": 42.9959, |
|
"eval_runtime": 672.7849, |
|
"eval_samples_per_second": 15.458, |
|
"eval_steps_per_second": 0.966, |
|
"step": 20804 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 2.981157469717362e-05, |
|
"loss": 1.5658, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.9330897904249182e-05, |
|
"loss": 1.5656, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 2.885022111132475e-05, |
|
"loss": 1.5643, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.836954431840031e-05, |
|
"loss": 1.5484, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 2.788886752547587e-05, |
|
"loss": 1.5504, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 2.7408190732551436e-05, |
|
"loss": 1.5546, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 2.6927513939626996e-05, |
|
"loss": 1.556, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 2.644683714670256e-05, |
|
"loss": 1.5448, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 2.596616035377812e-05, |
|
"loss": 1.5519, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 2.5485483560853686e-05, |
|
"loss": 1.5606, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.5004806767929246e-05, |
|
"loss": 1.5541, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 17.028846153846153, |
|
"eval_loss": 1.3574897050857544, |
|
"eval_rouge1": 44.7589, |
|
"eval_rouge2": 27.4697, |
|
"eval_rougeL": 42.3549, |
|
"eval_rougeLsum": 42.9704, |
|
"eval_runtime": 665.8005, |
|
"eval_samples_per_second": 15.62, |
|
"eval_steps_per_second": 0.976, |
|
"step": 26005 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 2.452412997500481e-05, |
|
"loss": 1.5154, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 2.404345318208037e-05, |
|
"loss": 1.5163, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 2.3562776389155933e-05, |
|
"loss": 1.5176, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 2.3082099596231497e-05, |
|
"loss": 1.5293, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 2.2601422803307057e-05, |
|
"loss": 1.5237, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 2.212074601038262e-05, |
|
"loss": 1.5422, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 2.164006921745818e-05, |
|
"loss": 1.5309, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 2.1159392424533744e-05, |
|
"loss": 1.5296, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 2.0678715631609308e-05, |
|
"loss": 1.5137, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 2.0198038838684868e-05, |
|
"loss": 1.5116, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 16.991923076923076, |
|
"eval_loss": 1.3511042594909668, |
|
"eval_rouge1": 45.0017, |
|
"eval_rouge2": 27.6906, |
|
"eval_rougeL": 42.5834, |
|
"eval_rougeLsum": 43.2073, |
|
"eval_runtime": 658.0426, |
|
"eval_samples_per_second": 15.804, |
|
"eval_steps_per_second": 0.988, |
|
"step": 31206 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.971736204576043e-05, |
|
"loss": 1.5018, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 1.9236685252835995e-05, |
|
"loss": 1.5037, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 1.8756008459911555e-05, |
|
"loss": 1.4991, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 1.8275331666987118e-05, |
|
"loss": 1.4977, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 1.779465487406268e-05, |
|
"loss": 1.5024, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 1.7313978081138242e-05, |
|
"loss": 1.5043, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 1.6833301288213805e-05, |
|
"loss": 1.506, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 1.635262449528937e-05, |
|
"loss": 1.497, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 1.5871947702364932e-05, |
|
"loss": 1.5132, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 1.5391270909440492e-05, |
|
"loss": 1.5079, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 16.982019230769232, |
|
"eval_loss": 1.347075343132019, |
|
"eval_rouge1": 44.9759, |
|
"eval_rouge2": 27.7179, |
|
"eval_rougeL": 42.5719, |
|
"eval_rougeLsum": 43.1803, |
|
"eval_runtime": 667.8543, |
|
"eval_samples_per_second": 15.572, |
|
"eval_steps_per_second": 0.973, |
|
"step": 36407 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.4910594116516054e-05, |
|
"loss": 1.5017, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 1.4429917323591618e-05, |
|
"loss": 1.4946, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 1.394924053066718e-05, |
|
"loss": 1.4941, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 1.3468563737742743e-05, |
|
"loss": 1.5029, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 1.2987886944818307e-05, |
|
"loss": 1.4855, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 1.2507210151893867e-05, |
|
"loss": 1.4726, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 1.202653335896943e-05, |
|
"loss": 1.4687, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 1.1545856566044992e-05, |
|
"loss": 1.4915, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 1.1065179773120554e-05, |
|
"loss": 1.4793, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 1.0584502980196116e-05, |
|
"loss": 1.4818, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 1.0103826187271679e-05, |
|
"loss": 1.4771, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 16.986923076923077, |
|
"eval_loss": 1.3443900346755981, |
|
"eval_rouge1": 45.2057, |
|
"eval_rouge2": 27.9779, |
|
"eval_rougeL": 42.7648, |
|
"eval_rougeLsum": 43.3885, |
|
"eval_runtime": 559.194, |
|
"eval_samples_per_second": 18.598, |
|
"eval_steps_per_second": 1.162, |
|
"step": 41608 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 9.623149394347242e-06, |
|
"loss": 1.4658, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 9.142472601422804e-06, |
|
"loss": 1.469, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 8.661795808498366e-06, |
|
"loss": 1.4966, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 8.181119015573928e-06, |
|
"loss": 1.4691, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 7.700442222649491e-06, |
|
"loss": 1.4767, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 7.219765429725053e-06, |
|
"loss": 1.4875, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 6.739088636800615e-06, |
|
"loss": 1.473, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 6.258411843876178e-06, |
|
"loss": 1.4826, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 5.77773505095174e-06, |
|
"loss": 1.4683, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 5.297058258027303e-06, |
|
"loss": 1.4691, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 16.951634615384616, |
|
"eval_loss": 1.3431836366653442, |
|
"eval_rouge1": 45.197, |
|
"eval_rouge2": 27.8923, |
|
"eval_rougeL": 42.7387, |
|
"eval_rougeLsum": 43.3577, |
|
"eval_runtime": 562.3582, |
|
"eval_samples_per_second": 18.494, |
|
"eval_steps_per_second": 1.156, |
|
"step": 46809 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 4.816381465102865e-06, |
|
"loss": 1.4663, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 4.335704672178427e-06, |
|
"loss": 1.4658, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 3.85502787925399e-06, |
|
"loss": 1.4679, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 3.3743510863295526e-06, |
|
"loss": 1.4573, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 2.8936742934051144e-06, |
|
"loss": 1.465, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.412997500480677e-06, |
|
"loss": 1.4582, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 1.9323207075562393e-06, |
|
"loss": 1.4736, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 1.4516439146318017e-06, |
|
"loss": 1.4768, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 9.70967121707364e-07, |
|
"loss": 1.4634, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 4.902903287829264e-07, |
|
"loss": 1.4832, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 9.613535858488752e-09, |
|
"loss": 1.4719, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 16.988557692307694, |
|
"eval_loss": 1.341736078262329, |
|
"eval_rouge1": 45.2143, |
|
"eval_rouge2": 27.9673, |
|
"eval_rougeL": 42.7712, |
|
"eval_rougeLsum": 43.3892, |
|
"eval_runtime": 603.039, |
|
"eval_samples_per_second": 17.246, |
|
"eval_steps_per_second": 1.078, |
|
"step": 52010 |
|
} |
|
], |
|
"max_steps": 52010, |
|
"num_train_epochs": 10, |
|
"total_flos": 5.697455075308339e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|