|
{ |
|
"best_metric": 54.3016, |
|
"best_model_checkpoint": "/uoa/home/s02sd1/Desktop/Project data/pytorch_project/facebook_28.03/checkpoint-43000", |
|
"epoch": 64.88011283497885, |
|
"global_step": 46000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.978843441466855e-06, |
|
"loss": 0.6224, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.9576868829337095e-06, |
|
"loss": 0.5049, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_gen_len": 167.2707, |
|
"eval_loss": 0.75015789270401, |
|
"eval_rouge1": 61.2286, |
|
"eval_rouge2": 43.2506, |
|
"eval_rougeL": 42.2051, |
|
"eval_rougeLsum": 46.3795, |
|
"eval_runtime": 5804.0101, |
|
"eval_samples_per_second": 0.06, |
|
"eval_steps_per_second": 0.003, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.9365303244005645e-06, |
|
"loss": 0.4992, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.915373765867419e-06, |
|
"loss": 0.4523, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_gen_len": 169.4872, |
|
"eval_loss": 0.7313751578330994, |
|
"eval_rouge1": 63.5466, |
|
"eval_rouge2": 45.7831, |
|
"eval_rougeL": 44.4885, |
|
"eval_rougeLsum": 48.6313, |
|
"eval_runtime": 4918.4156, |
|
"eval_samples_per_second": 0.071, |
|
"eval_steps_per_second": 0.004, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.894217207334274e-06, |
|
"loss": 0.4422, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 2.8730606488011285e-06, |
|
"loss": 0.4036, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"eval_gen_len": 160.4074, |
|
"eval_loss": 0.7353909611701965, |
|
"eval_rouge1": 64.7439, |
|
"eval_rouge2": 47.1723, |
|
"eval_rougeL": 45.9479, |
|
"eval_rougeLsum": 50.2341, |
|
"eval_runtime": 4241.5946, |
|
"eval_samples_per_second": 0.083, |
|
"eval_steps_per_second": 0.004, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 2.851904090267983e-06, |
|
"loss": 0.4028, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 2.830747531734838e-06, |
|
"loss": 0.3722, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_gen_len": 162.0085, |
|
"eval_loss": 0.72761070728302, |
|
"eval_rouge1": 65.6958, |
|
"eval_rouge2": 48.5007, |
|
"eval_rougeL": 46.6737, |
|
"eval_rougeLsum": 50.9835, |
|
"eval_runtime": 4232.4341, |
|
"eval_samples_per_second": 0.083, |
|
"eval_steps_per_second": 0.004, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 2.8095909732016925e-06, |
|
"loss": 0.3749, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 2.7884344146685474e-06, |
|
"loss": 0.3482, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_gen_len": 163.9231, |
|
"eval_loss": 0.7417466640472412, |
|
"eval_rouge1": 65.933, |
|
"eval_rouge2": 48.921, |
|
"eval_rougeL": 47.5877, |
|
"eval_rougeLsum": 52.0114, |
|
"eval_runtime": 4836.5569, |
|
"eval_samples_per_second": 0.073, |
|
"eval_steps_per_second": 0.004, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 2.767277856135402e-06, |
|
"loss": 0.3312, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 2.746121297602257e-06, |
|
"loss": 0.3174, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_gen_len": 163.5527, |
|
"eval_loss": 0.7546955943107605, |
|
"eval_rouge1": 65.9445, |
|
"eval_rouge2": 48.9117, |
|
"eval_rougeL": 48.312, |
|
"eval_rougeLsum": 52.4923, |
|
"eval_runtime": 5417.8435, |
|
"eval_samples_per_second": 0.065, |
|
"eval_steps_per_second": 0.003, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 2.7249647390691114e-06, |
|
"loss": 0.3074, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 2.7038081805359663e-06, |
|
"loss": 0.2922, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"eval_gen_len": 163.1311, |
|
"eval_loss": 0.754240095615387, |
|
"eval_rouge1": 66.6619, |
|
"eval_rouge2": 49.9256, |
|
"eval_rougeL": 48.7757, |
|
"eval_rougeLsum": 53.1916, |
|
"eval_runtime": 5340.7776, |
|
"eval_samples_per_second": 0.066, |
|
"eval_steps_per_second": 0.003, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 2.6826516220028213e-06, |
|
"loss": 0.281, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 2.6614950634696754e-06, |
|
"loss": 0.276, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"eval_gen_len": 165.0826, |
|
"eval_loss": 0.7793148756027222, |
|
"eval_rouge1": 66.8911, |
|
"eval_rouge2": 50.331, |
|
"eval_rougeL": 49.8396, |
|
"eval_rougeLsum": 54.0617, |
|
"eval_runtime": 6037.4348, |
|
"eval_samples_per_second": 0.058, |
|
"eval_steps_per_second": 0.003, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 2.6403385049365303e-06, |
|
"loss": 0.2557, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"learning_rate": 2.6191819464033853e-06, |
|
"loss": 0.2464, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"eval_gen_len": 164.9943, |
|
"eval_loss": 0.7877324819564819, |
|
"eval_rouge1": 67.1466, |
|
"eval_rouge2": 50.939, |
|
"eval_rougeL": 50.2255, |
|
"eval_rougeLsum": 54.5145, |
|
"eval_runtime": 5837.726, |
|
"eval_samples_per_second": 0.06, |
|
"eval_steps_per_second": 0.003, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 2.5980253878702398e-06, |
|
"loss": 0.2369, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 2.5768688293370947e-06, |
|
"loss": 0.2265, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"eval_gen_len": 162.1966, |
|
"eval_loss": 0.806422233581543, |
|
"eval_rouge1": 67.5137, |
|
"eval_rouge2": 51.2801, |
|
"eval_rougeL": 51.2323, |
|
"eval_rougeLsum": 55.426, |
|
"eval_runtime": 5856.0164, |
|
"eval_samples_per_second": 0.06, |
|
"eval_steps_per_second": 0.003, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 2.5557122708039492e-06, |
|
"loss": 0.2215, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 15.51, |
|
"learning_rate": 2.534555712270804e-06, |
|
"loss": 0.2004, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 15.51, |
|
"eval_gen_len": 161.2365, |
|
"eval_loss": 0.8361101746559143, |
|
"eval_rouge1": 67.9515, |
|
"eval_rouge2": 51.5857, |
|
"eval_rougeL": 51.5796, |
|
"eval_rougeLsum": 55.9738, |
|
"eval_runtime": 5461.4431, |
|
"eval_samples_per_second": 0.064, |
|
"eval_steps_per_second": 0.003, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 16.22, |
|
"learning_rate": 2.5133991537376587e-06, |
|
"loss": 0.2039, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 16.93, |
|
"learning_rate": 2.4922425952045136e-06, |
|
"loss": 0.1903, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 16.93, |
|
"eval_gen_len": 163.8234, |
|
"eval_loss": 0.8543212413787842, |
|
"eval_rouge1": 67.8403, |
|
"eval_rouge2": 51.7839, |
|
"eval_rougeL": 51.8869, |
|
"eval_rougeLsum": 56.2681, |
|
"eval_runtime": 5935.4542, |
|
"eval_samples_per_second": 0.059, |
|
"eval_steps_per_second": 0.003, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 2.471086036671368e-06, |
|
"loss": 0.1816, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 18.34, |
|
"learning_rate": 2.4499294781382227e-06, |
|
"loss": 0.1782, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 18.34, |
|
"eval_gen_len": 161.2137, |
|
"eval_loss": 0.8672023415565491, |
|
"eval_rouge1": 68.2574, |
|
"eval_rouge2": 52.2649, |
|
"eval_rougeL": 52.3755, |
|
"eval_rougeLsum": 56.4509, |
|
"eval_runtime": 5698.6716, |
|
"eval_samples_per_second": 0.062, |
|
"eval_steps_per_second": 0.003, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 2.4287729196050776e-06, |
|
"loss": 0.1686, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 19.75, |
|
"learning_rate": 2.407616361071932e-06, |
|
"loss": 0.1581, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 19.75, |
|
"eval_gen_len": 163.0057, |
|
"eval_loss": 0.9022919535636902, |
|
"eval_rouge1": 67.8924, |
|
"eval_rouge2": 51.8802, |
|
"eval_rougeL": 52.5074, |
|
"eval_rougeLsum": 56.5946, |
|
"eval_runtime": 5838.2781, |
|
"eval_samples_per_second": 0.06, |
|
"eval_steps_per_second": 0.003, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 20.45, |
|
"learning_rate": 2.386459802538787e-06, |
|
"loss": 0.1525, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 21.16, |
|
"learning_rate": 2.3653032440056416e-06, |
|
"loss": 0.1516, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 21.16, |
|
"eval_gen_len": 163.0883, |
|
"eval_loss": 0.9338411092758179, |
|
"eval_rouge1": 68.0101, |
|
"eval_rouge2": 51.7535, |
|
"eval_rougeL": 52.2777, |
|
"eval_rougeLsum": 56.692, |
|
"eval_runtime": 6063.036, |
|
"eval_samples_per_second": 0.058, |
|
"eval_steps_per_second": 0.003, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"learning_rate": 2.3441466854724966e-06, |
|
"loss": 0.143, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 22.57, |
|
"learning_rate": 2.3229901269393515e-06, |
|
"loss": 0.1358, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 22.57, |
|
"eval_gen_len": 162.3504, |
|
"eval_loss": 0.9403561353683472, |
|
"eval_rouge1": 68.4997, |
|
"eval_rouge2": 52.385, |
|
"eval_rougeL": 52.7137, |
|
"eval_rougeLsum": 57.2165, |
|
"eval_runtime": 5678.9898, |
|
"eval_samples_per_second": 0.062, |
|
"eval_steps_per_second": 0.003, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 23.27, |
|
"learning_rate": 2.301833568406206e-06, |
|
"loss": 0.1268, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 23.98, |
|
"learning_rate": 2.280677009873061e-06, |
|
"loss": 0.1256, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 23.98, |
|
"eval_gen_len": 163.6467, |
|
"eval_loss": 0.9707176685333252, |
|
"eval_rouge1": 68.3426, |
|
"eval_rouge2": 52.1326, |
|
"eval_rougeL": 52.6279, |
|
"eval_rougeLsum": 57.0803, |
|
"eval_runtime": 6169.909, |
|
"eval_samples_per_second": 0.057, |
|
"eval_steps_per_second": 0.003, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 24.68, |
|
"learning_rate": 2.2595204513399155e-06, |
|
"loss": 0.1158, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 25.39, |
|
"learning_rate": 2.23836389280677e-06, |
|
"loss": 0.1119, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 25.39, |
|
"eval_gen_len": 160.9316, |
|
"eval_loss": 1.0022860765457153, |
|
"eval_rouge1": 69.1266, |
|
"eval_rouge2": 52.9311, |
|
"eval_rougeL": 53.9465, |
|
"eval_rougeLsum": 58.0804, |
|
"eval_runtime": 5772.1926, |
|
"eval_samples_per_second": 0.061, |
|
"eval_steps_per_second": 0.003, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 2.217207334273625e-06, |
|
"loss": 0.1125, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"learning_rate": 2.1960507757404795e-06, |
|
"loss": 0.1038, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"eval_gen_len": 161.8746, |
|
"eval_loss": 1.019147515296936, |
|
"eval_rouge1": 68.6629, |
|
"eval_rouge2": 52.6518, |
|
"eval_rougeL": 53.1635, |
|
"eval_rougeLsum": 57.4514, |
|
"eval_runtime": 5939.4826, |
|
"eval_samples_per_second": 0.059, |
|
"eval_steps_per_second": 0.003, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 2.1748942172073344e-06, |
|
"loss": 0.098, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 28.21, |
|
"learning_rate": 2.153737658674189e-06, |
|
"loss": 0.0944, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 28.21, |
|
"eval_gen_len": 161.4359, |
|
"eval_loss": 1.0427296161651611, |
|
"eval_rouge1": 69.1063, |
|
"eval_rouge2": 53.0226, |
|
"eval_rougeL": 53.5979, |
|
"eval_rougeLsum": 57.8602, |
|
"eval_runtime": 6011.7516, |
|
"eval_samples_per_second": 0.058, |
|
"eval_steps_per_second": 0.003, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 28.91, |
|
"learning_rate": 2.132581100141044e-06, |
|
"loss": 0.0929, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 29.62, |
|
"learning_rate": 2.1114245416078984e-06, |
|
"loss": 0.0843, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 29.62, |
|
"eval_gen_len": 161.849, |
|
"eval_loss": 1.0581331253051758, |
|
"eval_rouge1": 68.0982, |
|
"eval_rouge2": 52.0153, |
|
"eval_rougeL": 53.1935, |
|
"eval_rougeLsum": 57.443, |
|
"eval_runtime": 5905.725, |
|
"eval_samples_per_second": 0.059, |
|
"eval_steps_per_second": 0.003, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 30.32, |
|
"learning_rate": 2.0902679830747533e-06, |
|
"loss": 0.0846, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 31.03, |
|
"learning_rate": 2.069111424541608e-06, |
|
"loss": 0.082, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 31.03, |
|
"eval_gen_len": 161.0684, |
|
"eval_loss": 1.0757286548614502, |
|
"eval_rouge1": 68.6318, |
|
"eval_rouge2": 52.8093, |
|
"eval_rougeL": 53.7214, |
|
"eval_rougeLsum": 57.775, |
|
"eval_runtime": 6004.4709, |
|
"eval_samples_per_second": 0.058, |
|
"eval_steps_per_second": 0.003, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 31.73, |
|
"learning_rate": 2.047954866008463e-06, |
|
"loss": 0.0735, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 32.44, |
|
"learning_rate": 2.0267983074753173e-06, |
|
"loss": 0.0756, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 32.44, |
|
"eval_gen_len": 162.4245, |
|
"eval_loss": 1.0798823833465576, |
|
"eval_rouge1": 68.557, |
|
"eval_rouge2": 52.4644, |
|
"eval_rougeL": 53.7606, |
|
"eval_rougeLsum": 57.8967, |
|
"eval_runtime": 6130.0883, |
|
"eval_samples_per_second": 0.057, |
|
"eval_steps_per_second": 0.003, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 33.15, |
|
"learning_rate": 2.005641748942172e-06, |
|
"loss": 0.0706, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 33.85, |
|
"learning_rate": 1.9844851904090268e-06, |
|
"loss": 0.0674, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 33.85, |
|
"eval_gen_len": 162.963, |
|
"eval_loss": 1.1047182083129883, |
|
"eval_rouge1": 68.6765, |
|
"eval_rouge2": 52.713, |
|
"eval_rougeL": 53.5339, |
|
"eval_rougeLsum": 57.7006, |
|
"eval_runtime": 6102.6451, |
|
"eval_samples_per_second": 0.058, |
|
"eval_steps_per_second": 0.003, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 34.56, |
|
"learning_rate": 1.9633286318758817e-06, |
|
"loss": 0.0647, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 1.9421720733427362e-06, |
|
"loss": 0.063, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"eval_gen_len": 161.9003, |
|
"eval_loss": 1.1172865629196167, |
|
"eval_rouge1": 69.1911, |
|
"eval_rouge2": 53.1382, |
|
"eval_rougeL": 53.8971, |
|
"eval_rougeLsum": 58.2079, |
|
"eval_runtime": 5980.899, |
|
"eval_samples_per_second": 0.059, |
|
"eval_steps_per_second": 0.003, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 35.97, |
|
"learning_rate": 1.921015514809591e-06, |
|
"loss": 0.0599, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 36.67, |
|
"learning_rate": 1.8998589562764457e-06, |
|
"loss": 0.0566, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 36.67, |
|
"eval_gen_len": 162.3362, |
|
"eval_loss": 1.131187081336975, |
|
"eval_rouge1": 69.0624, |
|
"eval_rouge2": 53.101, |
|
"eval_rougeL": 54.2012, |
|
"eval_rougeLsum": 58.5665, |
|
"eval_runtime": 5792.9256, |
|
"eval_samples_per_second": 0.061, |
|
"eval_steps_per_second": 0.003, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 37.38, |
|
"learning_rate": 1.8787023977433004e-06, |
|
"loss": 0.0557, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 38.08, |
|
"learning_rate": 1.8575458392101554e-06, |
|
"loss": 0.0532, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 38.08, |
|
"eval_gen_len": 161.7009, |
|
"eval_loss": 1.1492334604263306, |
|
"eval_rouge1": 68.8232, |
|
"eval_rouge2": 52.7547, |
|
"eval_rougeL": 53.914, |
|
"eval_rougeLsum": 58.1061, |
|
"eval_runtime": 6017.4685, |
|
"eval_samples_per_second": 0.058, |
|
"eval_steps_per_second": 0.003, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 38.79, |
|
"learning_rate": 1.8363892806770101e-06, |
|
"loss": 0.0518, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 39.49, |
|
"learning_rate": 1.8152327221438644e-06, |
|
"loss": 0.0482, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 39.49, |
|
"eval_gen_len": 161.4046, |
|
"eval_loss": 1.1425426006317139, |
|
"eval_rouge1": 68.8641, |
|
"eval_rouge2": 52.7579, |
|
"eval_rougeL": 54.0604, |
|
"eval_rougeLsum": 58.3338, |
|
"eval_runtime": 5870.7466, |
|
"eval_samples_per_second": 0.06, |
|
"eval_steps_per_second": 0.003, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 40.2, |
|
"learning_rate": 1.7940761636107192e-06, |
|
"loss": 0.0459, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 40.9, |
|
"learning_rate": 1.772919605077574e-06, |
|
"loss": 0.0458, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 40.9, |
|
"eval_gen_len": 156.8718, |
|
"eval_loss": 1.1709474325180054, |
|
"eval_rouge1": 69.9022, |
|
"eval_rouge2": 53.8817, |
|
"eval_rougeL": 55.3029, |
|
"eval_rougeLsum": 59.6658, |
|
"eval_runtime": 5805.0163, |
|
"eval_samples_per_second": 0.06, |
|
"eval_steps_per_second": 0.003, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 41.61, |
|
"learning_rate": 1.7517630465444288e-06, |
|
"loss": 0.0417, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 42.31, |
|
"learning_rate": 1.7306064880112836e-06, |
|
"loss": 0.0421, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 42.31, |
|
"eval_gen_len": 161.1595, |
|
"eval_loss": 1.1962451934814453, |
|
"eval_rouge1": 69.8385, |
|
"eval_rouge2": 53.9521, |
|
"eval_rougeL": 55.043, |
|
"eval_rougeLsum": 59.4319, |
|
"eval_runtime": 5621.1651, |
|
"eval_samples_per_second": 0.062, |
|
"eval_steps_per_second": 0.003, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 43.02, |
|
"learning_rate": 1.7094499294781383e-06, |
|
"loss": 0.0415, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 43.72, |
|
"learning_rate": 1.688293370944993e-06, |
|
"loss": 0.0378, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 43.72, |
|
"eval_gen_len": 161.1368, |
|
"eval_loss": 1.1894584894180298, |
|
"eval_rouge1": 69.2784, |
|
"eval_rouge2": 53.3896, |
|
"eval_rougeL": 54.7309, |
|
"eval_rougeLsum": 58.9909, |
|
"eval_runtime": 5848.8931, |
|
"eval_samples_per_second": 0.06, |
|
"eval_steps_per_second": 0.003, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 44.43, |
|
"learning_rate": 1.6671368124118478e-06, |
|
"loss": 0.0382, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 45.13, |
|
"learning_rate": 1.6459802538787025e-06, |
|
"loss": 0.0351, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 45.13, |
|
"eval_gen_len": 161.0627, |
|
"eval_loss": 1.2170298099517822, |
|
"eval_rouge1": 69.2182, |
|
"eval_rouge2": 53.2021, |
|
"eval_rougeL": 54.8192, |
|
"eval_rougeLsum": 58.858, |
|
"eval_runtime": 5747.8165, |
|
"eval_samples_per_second": 0.061, |
|
"eval_steps_per_second": 0.003, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 45.84, |
|
"learning_rate": 1.6248236953455572e-06, |
|
"loss": 0.0348, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 46.54, |
|
"learning_rate": 1.6036671368124117e-06, |
|
"loss": 0.0328, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 46.54, |
|
"eval_gen_len": 162.3647, |
|
"eval_loss": 1.2242525815963745, |
|
"eval_rouge1": 69.2151, |
|
"eval_rouge2": 53.5006, |
|
"eval_rougeL": 54.6811, |
|
"eval_rougeLsum": 58.899, |
|
"eval_runtime": 5815.6916, |
|
"eval_samples_per_second": 0.06, |
|
"eval_steps_per_second": 0.003, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 47.25, |
|
"learning_rate": 1.5825105782792665e-06, |
|
"loss": 0.0334, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 47.95, |
|
"learning_rate": 1.5613540197461212e-06, |
|
"loss": 0.0316, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 47.95, |
|
"eval_gen_len": 163.4872, |
|
"eval_loss": 1.2345460653305054, |
|
"eval_rouge1": 68.9079, |
|
"eval_rouge2": 52.7923, |
|
"eval_rougeL": 54.7024, |
|
"eval_rougeLsum": 59.1333, |
|
"eval_runtime": 5738.4019, |
|
"eval_samples_per_second": 0.061, |
|
"eval_steps_per_second": 0.003, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 48.66, |
|
"learning_rate": 1.540197461212976e-06, |
|
"loss": 0.0303, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 49.37, |
|
"learning_rate": 1.5190409026798307e-06, |
|
"loss": 0.0289, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 49.37, |
|
"eval_gen_len": 161.2165, |
|
"eval_loss": 1.2448240518569946, |
|
"eval_rouge1": 69.3435, |
|
"eval_rouge2": 53.5007, |
|
"eval_rougeL": 54.6771, |
|
"eval_rougeLsum": 59.067, |
|
"eval_runtime": 5523.8322, |
|
"eval_samples_per_second": 0.064, |
|
"eval_steps_per_second": 0.003, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 50.07, |
|
"learning_rate": 1.4978843441466856e-06, |
|
"loss": 0.0287, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 50.78, |
|
"learning_rate": 1.4767277856135403e-06, |
|
"loss": 0.0279, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 50.78, |
|
"eval_gen_len": 163.7664, |
|
"eval_loss": 1.254445195198059, |
|
"eval_rouge1": 68.8472, |
|
"eval_rouge2": 53.1054, |
|
"eval_rougeL": 54.3767, |
|
"eval_rougeLsum": 58.4479, |
|
"eval_runtime": 5626.7741, |
|
"eval_samples_per_second": 0.062, |
|
"eval_steps_per_second": 0.003, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 51.48, |
|
"learning_rate": 1.455571227080395e-06, |
|
"loss": 0.0266, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 52.19, |
|
"learning_rate": 1.4344146685472496e-06, |
|
"loss": 0.0266, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 52.19, |
|
"eval_gen_len": 163.3675, |
|
"eval_loss": 1.267912745475769, |
|
"eval_rouge1": 69.1523, |
|
"eval_rouge2": 53.3283, |
|
"eval_rougeL": 54.8059, |
|
"eval_rougeLsum": 58.9981, |
|
"eval_runtime": 5635.0806, |
|
"eval_samples_per_second": 0.062, |
|
"eval_steps_per_second": 0.003, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 52.89, |
|
"learning_rate": 1.4132581100141043e-06, |
|
"loss": 0.025, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 53.6, |
|
"learning_rate": 1.392101551480959e-06, |
|
"loss": 0.0237, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 53.6, |
|
"eval_gen_len": 161.943, |
|
"eval_loss": 1.278545618057251, |
|
"eval_rouge1": 69.7623, |
|
"eval_rouge2": 53.9089, |
|
"eval_rougeL": 55.4188, |
|
"eval_rougeLsum": 59.7174, |
|
"eval_runtime": 5517.6551, |
|
"eval_samples_per_second": 0.064, |
|
"eval_steps_per_second": 0.003, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 54.3, |
|
"learning_rate": 1.3709449929478138e-06, |
|
"loss": 0.0243, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 55.01, |
|
"learning_rate": 1.3497884344146687e-06, |
|
"loss": 0.0225, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 55.01, |
|
"eval_gen_len": 162.8234, |
|
"eval_loss": 1.2910034656524658, |
|
"eval_rouge1": 69.4568, |
|
"eval_rouge2": 53.5993, |
|
"eval_rougeL": 55.3547, |
|
"eval_rougeLsum": 59.7055, |
|
"eval_runtime": 5493.2949, |
|
"eval_samples_per_second": 0.064, |
|
"eval_steps_per_second": 0.003, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 55.71, |
|
"learning_rate": 1.3286318758815233e-06, |
|
"loss": 0.0222, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 56.42, |
|
"learning_rate": 1.307475317348378e-06, |
|
"loss": 0.0207, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 56.42, |
|
"eval_gen_len": 161.9145, |
|
"eval_loss": 1.302707314491272, |
|
"eval_rouge1": 69.3521, |
|
"eval_rouge2": 53.657, |
|
"eval_rougeL": 55.4158, |
|
"eval_rougeLsum": 59.7595, |
|
"eval_runtime": 5493.618, |
|
"eval_samples_per_second": 0.064, |
|
"eval_steps_per_second": 0.003, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 57.12, |
|
"learning_rate": 1.2863187588152327e-06, |
|
"loss": 0.0216, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 57.83, |
|
"learning_rate": 1.2651622002820875e-06, |
|
"loss": 0.0207, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 57.83, |
|
"eval_gen_len": 161.4017, |
|
"eval_loss": 1.2961533069610596, |
|
"eval_rouge1": 69.5378, |
|
"eval_rouge2": 53.6568, |
|
"eval_rougeL": 55.4833, |
|
"eval_rougeLsum": 59.8609, |
|
"eval_runtime": 5729.7323, |
|
"eval_samples_per_second": 0.061, |
|
"eval_steps_per_second": 0.003, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 58.53, |
|
"learning_rate": 1.2440056417489422e-06, |
|
"loss": 0.0202, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 59.24, |
|
"learning_rate": 1.222849083215797e-06, |
|
"loss": 0.019, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 59.24, |
|
"eval_gen_len": 161.5954, |
|
"eval_loss": 1.3159691095352173, |
|
"eval_rouge1": 69.6692, |
|
"eval_rouge2": 53.8824, |
|
"eval_rougeL": 56.153, |
|
"eval_rougeLsum": 60.5724, |
|
"eval_runtime": 5770.1856, |
|
"eval_samples_per_second": 0.061, |
|
"eval_steps_per_second": 0.003, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 59.94, |
|
"learning_rate": 1.2016925246826517e-06, |
|
"loss": 0.0193, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 60.65, |
|
"learning_rate": 1.1805359661495064e-06, |
|
"loss": 0.0183, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 60.65, |
|
"eval_gen_len": 159.6097, |
|
"eval_loss": 1.3009204864501953, |
|
"eval_rouge1": 69.8056, |
|
"eval_rouge2": 54.3016, |
|
"eval_rougeL": 56.2054, |
|
"eval_rougeLsum": 60.4544, |
|
"eval_runtime": 5689.5789, |
|
"eval_samples_per_second": 0.062, |
|
"eval_steps_per_second": 0.003, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 61.35, |
|
"learning_rate": 1.1593794076163611e-06, |
|
"loss": 0.0186, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 62.06, |
|
"learning_rate": 1.1382228490832158e-06, |
|
"loss": 0.0179, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 62.06, |
|
"eval_gen_len": 159.4046, |
|
"eval_loss": 1.3091576099395752, |
|
"eval_rouge1": 69.9596, |
|
"eval_rouge2": 54.1221, |
|
"eval_rougeL": 56.3196, |
|
"eval_rougeLsum": 60.6711, |
|
"eval_runtime": 5735.0694, |
|
"eval_samples_per_second": 0.061, |
|
"eval_steps_per_second": 0.003, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 62.76, |
|
"learning_rate": 1.1170662905500706e-06, |
|
"loss": 0.0179, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 63.47, |
|
"learning_rate": 1.0959097320169253e-06, |
|
"loss": 0.0162, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 63.47, |
|
"eval_gen_len": 161.7721, |
|
"eval_loss": 1.3298344612121582, |
|
"eval_rouge1": 69.7464, |
|
"eval_rouge2": 54.0211, |
|
"eval_rougeL": 56.0314, |
|
"eval_rougeLsum": 60.4478, |
|
"eval_runtime": 5746.6407, |
|
"eval_samples_per_second": 0.061, |
|
"eval_steps_per_second": 0.003, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 64.17, |
|
"learning_rate": 1.07475317348378e-06, |
|
"loss": 0.0177, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 64.88, |
|
"learning_rate": 1.0535966149506348e-06, |
|
"loss": 0.0161, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 64.88, |
|
"eval_gen_len": 161.3932, |
|
"eval_loss": 1.3238565921783447, |
|
"eval_rouge1": 69.6076, |
|
"eval_rouge2": 53.7339, |
|
"eval_rougeL": 55.9025, |
|
"eval_rougeLsum": 60.3293, |
|
"eval_runtime": 5658.4636, |
|
"eval_samples_per_second": 0.062, |
|
"eval_steps_per_second": 0.003, |
|
"step": 46000 |
|
} |
|
], |
|
"max_steps": 70900, |
|
"num_train_epochs": 100, |
|
"total_flos": 7.747033211322532e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|