|
{ |
|
"best_metric": 49.33, |
|
"best_model_checkpoint": "outputs/UniPoll-t5/checkpoint-60760", |
|
"epoch": 10.0, |
|
"global_step": 60760, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9753127057274523e-05, |
|
"loss": 16.5215, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.9506254114549046e-05, |
|
"loss": 3.5706, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.9259381171823572e-05, |
|
"loss": 2.9107, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.901250822909809e-05, |
|
"loss": 2.6763, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.8765635286372614e-05, |
|
"loss": 2.4391, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8518762343647136e-05, |
|
"loss": 2.2868, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.827188940092166e-05, |
|
"loss": 2.2185, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.802501645819618e-05, |
|
"loss": 2.0841, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.7778143515470708e-05, |
|
"loss": 2.013, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.7531270572745227e-05, |
|
"loss": 1.9524, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.728439763001975e-05, |
|
"loss": 1.9129, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7037524687294272e-05, |
|
"loss": 1.8501, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_choices_bleu1": 34.2743, |
|
"eval_choices_bleu2": 27.071, |
|
"eval_choices_bleu3": 23.8714, |
|
"eval_choices_bleu4": 19.9582, |
|
"eval_choices_rouge1": 43.056, |
|
"eval_choices_rouge2": 30.797, |
|
"eval_choices_rougeL": 40.79, |
|
"eval_choices_rougeLsum": 30.362, |
|
"eval_loss": 1.7218139171600342, |
|
"eval_mean_rouge1": 44.783500000000004, |
|
"eval_runtime": 613.1086, |
|
"eval_samples_per_second": 3.303, |
|
"eval_steps_per_second": 0.414, |
|
"eval_title_bleu1": 39.1598, |
|
"eval_title_bleu2": 24.8472, |
|
"eval_title_bleu3": 17.1189, |
|
"eval_title_bleu4": 11.4654, |
|
"eval_title_rouge1": 46.511, |
|
"eval_title_rouge2": 28.336, |
|
"eval_title_rougeL": 43.814, |
|
"eval_title_rougeLsum": 28.185, |
|
"step": 6076 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.6790651744568795e-05, |
|
"loss": 1.7659, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.654377880184332e-05, |
|
"loss": 1.7064, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.6296905859117843e-05, |
|
"loss": 1.6739, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.6050032916392366e-05, |
|
"loss": 1.6589, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.5803159973666885e-05, |
|
"loss": 1.6193, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5556287030941408e-05, |
|
"loss": 1.6175, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.530941408821593e-05, |
|
"loss": 1.6042, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.5062541145490456e-05, |
|
"loss": 1.5438, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.481566820276498e-05, |
|
"loss": 1.5275, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.45687952600395e-05, |
|
"loss": 1.5144, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.432192231731402e-05, |
|
"loss": 1.4911, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.4075049374588544e-05, |
|
"loss": 1.4301, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_choices_bleu1": 36.2304, |
|
"eval_choices_bleu2": 29.2436, |
|
"eval_choices_bleu3": 26.0301, |
|
"eval_choices_bleu4": 21.8164, |
|
"eval_choices_rouge1": 45.164, |
|
"eval_choices_rouge2": 33.104, |
|
"eval_choices_rougeL": 42.835, |
|
"eval_choices_rougeLsum": 32.498, |
|
"eval_loss": 1.6042917966842651, |
|
"eval_mean_rouge1": 47.2485, |
|
"eval_runtime": 613.8133, |
|
"eval_samples_per_second": 3.299, |
|
"eval_steps_per_second": 0.414, |
|
"eval_title_bleu1": 41.7754, |
|
"eval_title_bleu2": 27.1102, |
|
"eval_title_bleu3": 18.344, |
|
"eval_title_bleu4": 12.3234, |
|
"eval_title_rouge1": 49.333, |
|
"eval_title_rouge2": 30.676, |
|
"eval_title_rougeL": 46.561, |
|
"eval_title_rougeLsum": 30.748, |
|
"step": 12152 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.3828176431863066e-05, |
|
"loss": 1.4002, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.3581303489137592e-05, |
|
"loss": 1.3791, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.3334430546412115e-05, |
|
"loss": 1.3332, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.3087557603686637e-05, |
|
"loss": 1.3577, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.284068466096116e-05, |
|
"loss": 1.3483, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.259381171823568e-05, |
|
"loss": 1.3584, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.2346938775510205e-05, |
|
"loss": 1.3186, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.2100065832784728e-05, |
|
"loss": 1.3283, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.185319289005925e-05, |
|
"loss": 1.2867, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.1606319947333773e-05, |
|
"loss": 1.2766, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1359447004608296e-05, |
|
"loss": 1.3025, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.111257406188282e-05, |
|
"loss": 1.2736, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_choices_bleu1": 36.9274, |
|
"eval_choices_bleu2": 30.039, |
|
"eval_choices_bleu3": 26.4107, |
|
"eval_choices_bleu4": 22.0831, |
|
"eval_choices_rouge1": 46.229, |
|
"eval_choices_rouge2": 33.898, |
|
"eval_choices_rougeL": 43.746, |
|
"eval_choices_rougeLsum": 33.141, |
|
"eval_loss": 1.5690504312515259, |
|
"eval_mean_rouge1": 48.167500000000004, |
|
"eval_runtime": 614.3575, |
|
"eval_samples_per_second": 3.296, |
|
"eval_steps_per_second": 0.413, |
|
"eval_title_bleu1": 42.3694, |
|
"eval_title_bleu2": 28.0839, |
|
"eval_title_bleu3": 19.3916, |
|
"eval_title_bleu4": 13.1408, |
|
"eval_title_rouge1": 50.106, |
|
"eval_title_rouge2": 31.608, |
|
"eval_title_rougeL": 47.1, |
|
"eval_title_rougeLsum": 31.399, |
|
"step": 18228 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 2.086570111915734e-05, |
|
"loss": 1.2176, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 2.0618828176431864e-05, |
|
"loss": 1.2037, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 2.0371955233706386e-05, |
|
"loss": 1.2104, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 2.012508229098091e-05, |
|
"loss": 1.1522, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.987820934825543e-05, |
|
"loss": 1.1969, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.9631336405529957e-05, |
|
"loss": 1.1861, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.9384463462804477e-05, |
|
"loss": 1.1536, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 1.9137590520079e-05, |
|
"loss": 1.1394, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.8890717577353522e-05, |
|
"loss": 1.1229, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.8643844634628044e-05, |
|
"loss": 1.1233, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.8396971691902567e-05, |
|
"loss": 1.1265, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 1.8150098749177093e-05, |
|
"loss": 1.1438, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_choices_bleu1": 38.2715, |
|
"eval_choices_bleu2": 30.7803, |
|
"eval_choices_bleu3": 26.9954, |
|
"eval_choices_bleu4": 22.6626, |
|
"eval_choices_rouge1": 46.767, |
|
"eval_choices_rouge2": 34.301, |
|
"eval_choices_rougeL": 44.062, |
|
"eval_choices_rougeLsum": 33.605, |
|
"eval_loss": 1.5627468824386597, |
|
"eval_mean_rouge1": 48.6515, |
|
"eval_runtime": 625.5501, |
|
"eval_samples_per_second": 3.237, |
|
"eval_steps_per_second": 0.406, |
|
"eval_title_bleu1": 42.7056, |
|
"eval_title_bleu2": 28.0993, |
|
"eval_title_bleu3": 19.498, |
|
"eval_title_bleu4": 12.7399, |
|
"eval_title_rouge1": 50.536, |
|
"eval_title_rouge2": 31.852, |
|
"eval_title_rougeL": 47.453, |
|
"eval_title_rougeLsum": 31.678, |
|
"step": 24304 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.7903225806451616e-05, |
|
"loss": 1.0962, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 1.7656352863726135e-05, |
|
"loss": 1.0664, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.7409479921000658e-05, |
|
"loss": 1.0874, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.716260697827518e-05, |
|
"loss": 1.0608, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 1.6915734035549703e-05, |
|
"loss": 1.0367, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.666886109282423e-05, |
|
"loss": 1.0397, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.642198815009875e-05, |
|
"loss": 1.0378, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 1.617511520737327e-05, |
|
"loss": 1.0143, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 1.5928242264647793e-05, |
|
"loss": 1.0186, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 1.5681369321922316e-05, |
|
"loss": 1.0243, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 1.5434496379196842e-05, |
|
"loss": 1.038, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.5187623436471364e-05, |
|
"loss": 1.0026, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_choices_bleu1": 37.513, |
|
"eval_choices_bleu2": 30.5119, |
|
"eval_choices_bleu3": 26.8236, |
|
"eval_choices_bleu4": 22.3797, |
|
"eval_choices_rouge1": 46.999, |
|
"eval_choices_rouge2": 34.451, |
|
"eval_choices_rougeL": 44.427, |
|
"eval_choices_rougeLsum": 33.652, |
|
"eval_loss": 1.5795127153396606, |
|
"eval_mean_rouge1": 48.658500000000004, |
|
"eval_runtime": 619.9047, |
|
"eval_samples_per_second": 3.267, |
|
"eval_steps_per_second": 0.41, |
|
"eval_title_bleu1": 42.3561, |
|
"eval_title_bleu2": 27.804, |
|
"eval_title_bleu3": 19.0851, |
|
"eval_title_bleu4": 12.6803, |
|
"eval_title_rouge1": 50.318, |
|
"eval_title_rouge2": 31.494, |
|
"eval_title_rougeL": 47.297, |
|
"eval_title_rougeLsum": 31.461, |
|
"step": 30380 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 1.4940750493745885e-05, |
|
"loss": 1.0107, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 1.4693877551020408e-05, |
|
"loss": 0.9664, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 1.4447004608294932e-05, |
|
"loss": 0.9342, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 1.4200131665569453e-05, |
|
"loss": 0.9632, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 1.3953258722843976e-05, |
|
"loss": 0.9448, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 1.37063857801185e-05, |
|
"loss": 0.9669, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 1.3459512837393023e-05, |
|
"loss": 0.9361, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 1.3212639894667544e-05, |
|
"loss": 0.9445, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 1.2965766951942068e-05, |
|
"loss": 0.9515, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 1.271889400921659e-05, |
|
"loss": 0.9488, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 1.2472021066491112e-05, |
|
"loss": 0.9145, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 1.2225148123765636e-05, |
|
"loss": 0.9493, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_choices_bleu1": 38.7421, |
|
"eval_choices_bleu2": 31.1954, |
|
"eval_choices_bleu3": 27.0903, |
|
"eval_choices_bleu4": 22.4639, |
|
"eval_choices_rouge1": 47.617, |
|
"eval_choices_rouge2": 34.86, |
|
"eval_choices_rougeL": 44.679, |
|
"eval_choices_rougeLsum": 34.003, |
|
"eval_loss": 1.5864702463150024, |
|
"eval_mean_rouge1": 48.966499999999996, |
|
"eval_runtime": 615.3469, |
|
"eval_samples_per_second": 3.291, |
|
"eval_steps_per_second": 0.413, |
|
"eval_title_bleu1": 42.4031, |
|
"eval_title_bleu2": 28.2011, |
|
"eval_title_bleu3": 19.3663, |
|
"eval_title_bleu4": 12.8818, |
|
"eval_title_rouge1": 50.316, |
|
"eval_title_rouge2": 31.841, |
|
"eval_title_rougeL": 47.179, |
|
"eval_title_rougeLsum": 31.509, |
|
"step": 36456 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 1.1978275181040158e-05, |
|
"loss": 0.9359, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 1.1731402238314681e-05, |
|
"loss": 0.9096, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 1.1484529295589204e-05, |
|
"loss": 0.9069, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 1.1237656352863726e-05, |
|
"loss": 0.89, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 1.099078341013825e-05, |
|
"loss": 0.8761, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 1.0743910467412772e-05, |
|
"loss": 0.8919, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 1.0497037524687294e-05, |
|
"loss": 0.8869, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 1.0250164581961818e-05, |
|
"loss": 0.8912, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.000329163923634e-05, |
|
"loss": 0.8885, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 9.756418696510862e-06, |
|
"loss": 0.8691, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 9.509545753785386e-06, |
|
"loss": 0.8882, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 9.262672811059907e-06, |
|
"loss": 0.8822, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 9.01579986833443e-06, |
|
"loss": 0.8622, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_choices_bleu1": 38.5191, |
|
"eval_choices_bleu2": 31.1743, |
|
"eval_choices_bleu3": 27.2188, |
|
"eval_choices_bleu4": 22.5257, |
|
"eval_choices_rouge1": 47.345, |
|
"eval_choices_rouge2": 34.733, |
|
"eval_choices_rougeL": 44.637, |
|
"eval_choices_rougeLsum": 33.956, |
|
"eval_loss": 1.6173765659332275, |
|
"eval_mean_rouge1": 49.0085, |
|
"eval_runtime": 624.2904, |
|
"eval_samples_per_second": 3.244, |
|
"eval_steps_per_second": 0.407, |
|
"eval_title_bleu1": 42.7395, |
|
"eval_title_bleu2": 28.2813, |
|
"eval_title_bleu3": 19.4836, |
|
"eval_title_bleu4": 12.7587, |
|
"eval_title_rouge1": 50.672, |
|
"eval_title_rouge2": 32.013, |
|
"eval_title_rougeL": 47.817, |
|
"eval_title_rougeLsum": 31.899, |
|
"step": 42532 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 8.768926925608954e-06, |
|
"loss": 0.8468, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 8.522053982883475e-06, |
|
"loss": 0.8209, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 8.275181040158e-06, |
|
"loss": 0.8379, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 8.028308097432522e-06, |
|
"loss": 0.8544, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 7.781435154707045e-06, |
|
"loss": 0.8394, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 7.534562211981568e-06, |
|
"loss": 0.8285, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 7.28768926925609e-06, |
|
"loss": 0.8333, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 7.0408163265306125e-06, |
|
"loss": 0.8359, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 6.793943383805135e-06, |
|
"loss": 0.8176, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 6.547070441079658e-06, |
|
"loss": 0.842, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 6.30019749835418e-06, |
|
"loss": 0.8351, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 6.053324555628703e-06, |
|
"loss": 0.8367, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_choices_bleu1": 38.7622, |
|
"eval_choices_bleu2": 31.0867, |
|
"eval_choices_bleu3": 26.9654, |
|
"eval_choices_bleu4": 22.4556, |
|
"eval_choices_rouge1": 47.531, |
|
"eval_choices_rouge2": 34.756, |
|
"eval_choices_rougeL": 44.649, |
|
"eval_choices_rougeLsum": 33.959, |
|
"eval_loss": 1.6293848752975464, |
|
"eval_mean_rouge1": 49.2605, |
|
"eval_runtime": 622.1469, |
|
"eval_samples_per_second": 3.255, |
|
"eval_steps_per_second": 0.408, |
|
"eval_title_bleu1": 43.0901, |
|
"eval_title_bleu2": 28.6471, |
|
"eval_title_bleu3": 19.7616, |
|
"eval_title_bleu4": 12.9726, |
|
"eval_title_rouge1": 50.99, |
|
"eval_title_rouge2": 32.44, |
|
"eval_title_rougeL": 47.948, |
|
"eval_title_rougeLsum": 32.24, |
|
"step": 48608 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 5.8064516129032256e-06, |
|
"loss": 0.8004, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 5.559578670177749e-06, |
|
"loss": 0.789, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 5.312705727452272e-06, |
|
"loss": 0.7843, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 5.065832784726793e-06, |
|
"loss": 0.8102, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 4.818959842001317e-06, |
|
"loss": 0.7979, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 4.5720868992758395e-06, |
|
"loss": 0.8262, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 4.325213956550362e-06, |
|
"loss": 0.8149, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 4.078341013824885e-06, |
|
"loss": 0.8178, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 3.831468071099407e-06, |
|
"loss": 0.7796, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 3.5845951283739304e-06, |
|
"loss": 0.8036, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 3.337722185648453e-06, |
|
"loss": 0.8169, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 3.0908492429229756e-06, |
|
"loss": 0.8138, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_choices_bleu1": 38.8037, |
|
"eval_choices_bleu2": 31.1809, |
|
"eval_choices_bleu3": 27.0585, |
|
"eval_choices_bleu4": 22.3774, |
|
"eval_choices_rouge1": 47.629, |
|
"eval_choices_rouge2": 34.778, |
|
"eval_choices_rougeL": 44.722, |
|
"eval_choices_rougeLsum": 33.944, |
|
"eval_loss": 1.6443856954574585, |
|
"eval_mean_rouge1": 49.32, |
|
"eval_runtime": 624.8802, |
|
"eval_samples_per_second": 3.241, |
|
"eval_steps_per_second": 0.406, |
|
"eval_title_bleu1": 43.0453, |
|
"eval_title_bleu2": 28.6763, |
|
"eval_title_bleu3": 19.7491, |
|
"eval_title_bleu4": 13.004, |
|
"eval_title_rouge1": 51.011, |
|
"eval_title_rouge2": 32.398, |
|
"eval_title_rougeL": 47.974, |
|
"eval_title_rougeLsum": 32.251, |
|
"step": 54684 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 2.8439763001974987e-06, |
|
"loss": 0.7764, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 2.5971033574720213e-06, |
|
"loss": 0.7939, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 2.3502304147465435e-06, |
|
"loss": 0.8069, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 2.1033574720210665e-06, |
|
"loss": 0.7649, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 1.8564845292955893e-06, |
|
"loss": 0.7977, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 1.609611586570112e-06, |
|
"loss": 0.803, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 1.3627386438446346e-06, |
|
"loss": 0.7813, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 1.1158657011191574e-06, |
|
"loss": 0.7645, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 8.689927583936801e-07, |
|
"loss": 0.7865, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 6.221198156682028e-07, |
|
"loss": 0.7818, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 3.752468729427255e-07, |
|
"loss": 0.7572, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 1.283739302172482e-07, |
|
"loss": 0.7788, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_choices_bleu1": 38.8515, |
|
"eval_choices_bleu2": 31.2992, |
|
"eval_choices_bleu3": 27.2073, |
|
"eval_choices_bleu4": 22.4633, |
|
"eval_choices_rouge1": 47.688, |
|
"eval_choices_rouge2": 34.913, |
|
"eval_choices_rougeL": 44.77, |
|
"eval_choices_rougeLsum": 34.038, |
|
"eval_loss": 1.6531261205673218, |
|
"eval_mean_rouge1": 49.33, |
|
"eval_runtime": 620.225, |
|
"eval_samples_per_second": 3.265, |
|
"eval_steps_per_second": 0.41, |
|
"eval_title_bleu1": 43.0602, |
|
"eval_title_bleu2": 28.6495, |
|
"eval_title_bleu3": 19.8146, |
|
"eval_title_bleu4": 13.144, |
|
"eval_title_rouge1": 50.972, |
|
"eval_title_rouge2": 32.375, |
|
"eval_title_rougeL": 47.919, |
|
"eval_title_rougeLsum": 32.201, |
|
"step": 60760 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 60760, |
|
"total_flos": 1.262557342857001e+17, |
|
"train_loss": 1.288209796626761, |
|
"train_runtime": 17551.2786, |
|
"train_samples_per_second": 27.692, |
|
"train_steps_per_second": 3.462 |
|
} |
|
], |
|
"max_steps": 60760, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.262557342857001e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|