Centrum-multinews / trainer_state.json
ratishsp
initial commit
8b0bf1c
raw
history blame
No virus
45 kB
{
"best_metric": 24.366,
"best_model_checkpoint": "/home/hpcpudu1/rds/hpc-work/data/pretrain-mds/led_pretrain/ver2/gen_model/Centrum_base_multinews_23-7-22.2/checkpoint-23000",
"epoch": 8.896557858222895,
"global_step": 25000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.2000000000000002e-08,
"loss": 3.7881,
"step": 1
},
{
"epoch": 0.04,
"learning_rate": 1.2000000000000002e-06,
"loss": 3.5568,
"step": 100
},
{
"epoch": 0.07,
"learning_rate": 2.4000000000000003e-06,
"loss": 3.5213,
"step": 200
},
{
"epoch": 0.11,
"learning_rate": 3.6e-06,
"loss": 3.49,
"step": 300
},
{
"epoch": 0.14,
"learning_rate": 4.800000000000001e-06,
"loss": 3.4612,
"step": 400
},
{
"epoch": 0.18,
"learning_rate": 6e-06,
"loss": 3.4476,
"step": 500
},
{
"epoch": 0.21,
"learning_rate": 7.2e-06,
"loss": 3.4241,
"step": 600
},
{
"epoch": 0.25,
"learning_rate": 8.400000000000001e-06,
"loss": 3.4164,
"step": 700
},
{
"epoch": 0.28,
"learning_rate": 9.600000000000001e-06,
"loss": 3.4208,
"step": 800
},
{
"epoch": 0.32,
"learning_rate": 1.08e-05,
"loss": 3.4008,
"step": 900
},
{
"epoch": 0.36,
"learning_rate": 1.2e-05,
"loss": 3.4391,
"step": 1000
},
{
"epoch": 0.39,
"learning_rate": 1.32e-05,
"loss": 3.3908,
"step": 1100
},
{
"epoch": 0.43,
"learning_rate": 1.44e-05,
"loss": 3.3961,
"step": 1200
},
{
"epoch": 0.46,
"learning_rate": 1.56e-05,
"loss": 3.4042,
"step": 1300
},
{
"epoch": 0.5,
"learning_rate": 1.6800000000000002e-05,
"loss": 3.3847,
"step": 1400
},
{
"epoch": 0.53,
"learning_rate": 1.8e-05,
"loss": 3.3763,
"step": 1500
},
{
"epoch": 0.57,
"learning_rate": 1.9200000000000003e-05,
"loss": 3.3873,
"step": 1600
},
{
"epoch": 0.6,
"learning_rate": 2.04e-05,
"loss": 3.3817,
"step": 1700
},
{
"epoch": 0.64,
"learning_rate": 2.16e-05,
"loss": 3.363,
"step": 1800
},
{
"epoch": 0.68,
"learning_rate": 2.2800000000000002e-05,
"loss": 3.3631,
"step": 1900
},
{
"epoch": 0.71,
"learning_rate": 2.4e-05,
"loss": 3.3734,
"step": 2000
},
{
"epoch": 0.75,
"learning_rate": 2.52e-05,
"loss": 3.3414,
"step": 2100
},
{
"epoch": 0.78,
"learning_rate": 2.64e-05,
"loss": 3.384,
"step": 2200
},
{
"epoch": 0.82,
"learning_rate": 2.7600000000000003e-05,
"loss": 3.3718,
"step": 2300
},
{
"epoch": 0.85,
"learning_rate": 2.88e-05,
"loss": 3.3669,
"step": 2400
},
{
"epoch": 0.89,
"learning_rate": 3e-05,
"loss": 3.3649,
"step": 2500
},
{
"epoch": 0.93,
"learning_rate": 2.9866666666666666e-05,
"loss": 3.3537,
"step": 2600
},
{
"epoch": 0.96,
"learning_rate": 2.9733333333333336e-05,
"loss": 3.393,
"step": 2700
},
{
"epoch": 1.0,
"learning_rate": 2.96e-05,
"loss": 3.3729,
"step": 2800
},
{
"epoch": 1.03,
"learning_rate": 2.9466666666666667e-05,
"loss": 3.3593,
"step": 2900
},
{
"epoch": 1.07,
"learning_rate": 2.9333333333333333e-05,
"loss": 3.3256,
"step": 3000
},
{
"epoch": 1.1,
"learning_rate": 2.92e-05,
"loss": 3.2975,
"step": 3100
},
{
"epoch": 1.14,
"learning_rate": 2.9066666666666667e-05,
"loss": 3.3259,
"step": 3200
},
{
"epoch": 1.17,
"learning_rate": 2.8933333333333333e-05,
"loss": 3.3079,
"step": 3300
},
{
"epoch": 1.21,
"learning_rate": 2.88e-05,
"loss": 3.302,
"step": 3400
},
{
"epoch": 1.25,
"learning_rate": 2.8666666666666668e-05,
"loss": 3.2971,
"step": 3500
},
{
"epoch": 1.28,
"learning_rate": 2.8533333333333333e-05,
"loss": 3.3297,
"step": 3600
},
{
"epoch": 1.32,
"learning_rate": 2.84e-05,
"loss": 3.3068,
"step": 3700
},
{
"epoch": 1.35,
"learning_rate": 2.8268e-05,
"loss": 3.31,
"step": 3800
},
{
"epoch": 1.39,
"learning_rate": 2.8134666666666667e-05,
"loss": 3.2908,
"step": 3900
},
{
"epoch": 1.42,
"learning_rate": 2.8001333333333332e-05,
"loss": 3.2954,
"step": 4000
},
{
"epoch": 1.46,
"learning_rate": 2.7868e-05,
"loss": 3.3029,
"step": 4100
},
{
"epoch": 1.49,
"learning_rate": 2.7734666666666667e-05,
"loss": 3.3064,
"step": 4200
},
{
"epoch": 1.53,
"learning_rate": 2.7601333333333333e-05,
"loss": 3.2857,
"step": 4300
},
{
"epoch": 1.57,
"learning_rate": 2.7468e-05,
"loss": 3.2781,
"step": 4400
},
{
"epoch": 1.6,
"learning_rate": 2.7334666666666668e-05,
"loss": 3.3019,
"step": 4500
},
{
"epoch": 1.64,
"learning_rate": 2.7201333333333333e-05,
"loss": 3.2978,
"step": 4600
},
{
"epoch": 1.67,
"learning_rate": 2.7068e-05,
"loss": 3.3186,
"step": 4700
},
{
"epoch": 1.71,
"learning_rate": 2.6934666666666665e-05,
"loss": 3.2843,
"step": 4800
},
{
"epoch": 1.74,
"learning_rate": 2.6801333333333334e-05,
"loss": 3.2863,
"step": 4900
},
{
"epoch": 1.78,
"learning_rate": 2.6668000000000003e-05,
"loss": 3.2702,
"step": 5000
},
{
"epoch": 1.78,
"eval_gen_len": 277.1855,
"eval_loss": 3.2853293418884277,
"eval_rouge1": 44.0203,
"eval_rouge2": 16.6061,
"eval_rougeL": 23.3846,
"eval_rougeLsum": 40.3853,
"eval_runtime": 1402.4156,
"eval_samples_per_second": 4.009,
"eval_steps_per_second": 0.251,
"step": 5000
},
{
"epoch": 1.81,
"learning_rate": 2.653466666666667e-05,
"loss": 3.3022,
"step": 5100
},
{
"epoch": 1.85,
"learning_rate": 2.6401333333333334e-05,
"loss": 3.29,
"step": 5200
},
{
"epoch": 1.89,
"learning_rate": 2.6268000000000003e-05,
"loss": 3.2893,
"step": 5300
},
{
"epoch": 1.92,
"learning_rate": 2.613466666666667e-05,
"loss": 3.2849,
"step": 5400
},
{
"epoch": 1.96,
"learning_rate": 2.6001333333333335e-05,
"loss": 3.2762,
"step": 5500
},
{
"epoch": 1.96,
"eval_gen_len": 288.4173,
"eval_loss": 3.2852535247802734,
"eval_rouge1": 44.725,
"eval_rouge2": 16.9262,
"eval_rougeL": 23.475,
"eval_rougeLsum": 41.0003,
"eval_runtime": 1344.5644,
"eval_samples_per_second": 4.181,
"eval_steps_per_second": 0.262,
"step": 5500
},
{
"epoch": 1.99,
"learning_rate": 2.5868e-05,
"loss": 3.2937,
"step": 5600
},
{
"epoch": 2.03,
"learning_rate": 2.573466666666667e-05,
"loss": 3.2614,
"step": 5700
},
{
"epoch": 2.06,
"learning_rate": 2.5601333333333335e-05,
"loss": 3.1995,
"step": 5800
},
{
"epoch": 2.1,
"learning_rate": 2.5468e-05,
"loss": 3.2162,
"step": 5900
},
{
"epoch": 2.14,
"learning_rate": 2.5334666666666666e-05,
"loss": 3.2114,
"step": 6000
},
{
"epoch": 2.14,
"eval_gen_len": 257.2761,
"eval_loss": 3.2857086658477783,
"eval_rouge1": 44.6456,
"eval_rouge2": 17.0245,
"eval_rougeL": 23.7328,
"eval_rougeLsum": 40.9131,
"eval_runtime": 1239.4823,
"eval_samples_per_second": 4.536,
"eval_steps_per_second": 0.284,
"step": 6000
},
{
"epoch": 2.17,
"learning_rate": 2.5201333333333336e-05,
"loss": 3.202,
"step": 6100
},
{
"epoch": 2.21,
"learning_rate": 2.5068e-05,
"loss": 3.2089,
"step": 6200
},
{
"epoch": 2.24,
"learning_rate": 2.4934666666666667e-05,
"loss": 3.2109,
"step": 6300
},
{
"epoch": 2.28,
"learning_rate": 2.4801333333333333e-05,
"loss": 3.2073,
"step": 6400
},
{
"epoch": 2.31,
"learning_rate": 2.4668e-05,
"loss": 3.1981,
"step": 6500
},
{
"epoch": 2.31,
"eval_gen_len": 254.8618,
"eval_loss": 3.2817349433898926,
"eval_rouge1": 44.7869,
"eval_rouge2": 17.0849,
"eval_rougeL": 23.8372,
"eval_rougeLsum": 41.0669,
"eval_runtime": 1197.7293,
"eval_samples_per_second": 4.694,
"eval_steps_per_second": 0.294,
"step": 6500
},
{
"epoch": 2.35,
"learning_rate": 2.4534666666666667e-05,
"loss": 3.2258,
"step": 6600
},
{
"epoch": 2.38,
"learning_rate": 2.4401333333333333e-05,
"loss": 3.2252,
"step": 6700
},
{
"epoch": 2.42,
"learning_rate": 2.4268e-05,
"loss": 3.2252,
"step": 6800
},
{
"epoch": 2.46,
"learning_rate": 2.4134666666666668e-05,
"loss": 3.2218,
"step": 6900
},
{
"epoch": 2.49,
"learning_rate": 2.4001333333333333e-05,
"loss": 3.2298,
"step": 7000
},
{
"epoch": 2.49,
"eval_gen_len": 263.0854,
"eval_loss": 3.2801525592803955,
"eval_rouge1": 45.2657,
"eval_rouge2": 17.2618,
"eval_rougeL": 23.8204,
"eval_rougeLsum": 41.5807,
"eval_runtime": 1199.3644,
"eval_samples_per_second": 4.687,
"eval_steps_per_second": 0.293,
"step": 7000
},
{
"epoch": 2.53,
"learning_rate": 2.3868e-05,
"loss": 3.206,
"step": 7100
},
{
"epoch": 2.56,
"learning_rate": 2.3734666666666665e-05,
"loss": 3.2062,
"step": 7200
},
{
"epoch": 2.6,
"learning_rate": 2.3601333333333334e-05,
"loss": 3.2241,
"step": 7300
},
{
"epoch": 2.63,
"learning_rate": 2.3468e-05,
"loss": 3.2116,
"step": 7400
},
{
"epoch": 2.67,
"learning_rate": 2.3334666666666665e-05,
"loss": 3.2167,
"step": 7500
},
{
"epoch": 2.67,
"eval_gen_len": 244.6939,
"eval_loss": 3.2773149013519287,
"eval_rouge1": 44.9516,
"eval_rouge2": 17.0538,
"eval_rougeL": 23.7894,
"eval_rougeLsum": 41.1673,
"eval_runtime": 1137.295,
"eval_samples_per_second": 4.943,
"eval_steps_per_second": 0.31,
"step": 7500
},
{
"epoch": 2.7,
"learning_rate": 2.3201333333333334e-05,
"loss": 3.219,
"step": 7600
},
{
"epoch": 2.74,
"learning_rate": 2.3068e-05,
"loss": 3.2057,
"step": 7700
},
{
"epoch": 2.78,
"learning_rate": 2.2936e-05,
"loss": 3.2029,
"step": 7800
},
{
"epoch": 2.81,
"learning_rate": 2.2802666666666668e-05,
"loss": 3.2395,
"step": 7900
},
{
"epoch": 2.85,
"learning_rate": 2.2669333333333333e-05,
"loss": 3.2069,
"step": 8000
},
{
"epoch": 2.85,
"eval_gen_len": 245.4036,
"eval_loss": 3.2712182998657227,
"eval_rouge1": 45.2153,
"eval_rouge2": 17.2766,
"eval_rougeL": 23.9883,
"eval_rougeLsum": 41.4558,
"eval_runtime": 1117.2186,
"eval_samples_per_second": 5.032,
"eval_steps_per_second": 0.315,
"step": 8000
},
{
"epoch": 2.88,
"learning_rate": 2.2536e-05,
"loss": 3.2034,
"step": 8100
},
{
"epoch": 2.92,
"learning_rate": 2.2402666666666665e-05,
"loss": 3.2163,
"step": 8200
},
{
"epoch": 2.95,
"learning_rate": 2.2269333333333334e-05,
"loss": 3.2158,
"step": 8300
},
{
"epoch": 2.99,
"learning_rate": 2.2136e-05,
"loss": 3.2205,
"step": 8400
},
{
"epoch": 3.02,
"learning_rate": 2.2002666666666665e-05,
"loss": 3.1822,
"step": 8500
},
{
"epoch": 3.02,
"eval_gen_len": 254.6624,
"eval_loss": 3.2785804271698,
"eval_rouge1": 45.4747,
"eval_rouge2": 17.6754,
"eval_rougeL": 24.1878,
"eval_rougeLsum": 41.7304,
"eval_runtime": 1171.0921,
"eval_samples_per_second": 4.801,
"eval_steps_per_second": 0.301,
"step": 8500
},
{
"epoch": 3.06,
"learning_rate": 2.186933333333333e-05,
"loss": 3.1397,
"step": 8600
},
{
"epoch": 3.1,
"learning_rate": 2.1736e-05,
"loss": 3.1659,
"step": 8700
},
{
"epoch": 3.13,
"learning_rate": 2.1602666666666666e-05,
"loss": 3.1318,
"step": 8800
},
{
"epoch": 3.17,
"learning_rate": 2.1469333333333335e-05,
"loss": 3.162,
"step": 8900
},
{
"epoch": 3.2,
"learning_rate": 2.1336000000000004e-05,
"loss": 3.1529,
"step": 9000
},
{
"epoch": 3.2,
"eval_gen_len": 246.0157,
"eval_loss": 3.2740354537963867,
"eval_rouge1": 44.9033,
"eval_rouge2": 17.1386,
"eval_rougeL": 23.8511,
"eval_rougeLsum": 41.177,
"eval_runtime": 1185.3803,
"eval_samples_per_second": 4.743,
"eval_steps_per_second": 0.297,
"step": 9000
},
{
"epoch": 3.24,
"learning_rate": 2.120266666666667e-05,
"loss": 3.1241,
"step": 9100
},
{
"epoch": 3.27,
"learning_rate": 2.1069333333333335e-05,
"loss": 3.1537,
"step": 9200
},
{
"epoch": 3.31,
"learning_rate": 2.0936e-05,
"loss": 3.1589,
"step": 9300
},
{
"epoch": 3.35,
"learning_rate": 2.080266666666667e-05,
"loss": 3.1415,
"step": 9400
},
{
"epoch": 3.38,
"learning_rate": 2.0669333333333336e-05,
"loss": 3.1407,
"step": 9500
},
{
"epoch": 3.38,
"eval_gen_len": 243.4922,
"eval_loss": 3.270354986190796,
"eval_rouge1": 45.1045,
"eval_rouge2": 17.2335,
"eval_rougeL": 23.9124,
"eval_rougeLsum": 41.3243,
"eval_runtime": 1159.3428,
"eval_samples_per_second": 4.849,
"eval_steps_per_second": 0.304,
"step": 9500
},
{
"epoch": 3.42,
"learning_rate": 2.0536e-05,
"loss": 3.149,
"step": 9600
},
{
"epoch": 3.45,
"learning_rate": 2.0402666666666667e-05,
"loss": 3.1539,
"step": 9700
},
{
"epoch": 3.49,
"learning_rate": 2.027066666666667e-05,
"loss": 3.1539,
"step": 9800
},
{
"epoch": 3.52,
"learning_rate": 2.0137333333333335e-05,
"loss": 3.158,
"step": 9900
},
{
"epoch": 3.56,
"learning_rate": 2.0004e-05,
"loss": 3.1376,
"step": 10000
},
{
"epoch": 3.56,
"eval_gen_len": 243.8396,
"eval_loss": 3.272122621536255,
"eval_rouge1": 45.2694,
"eval_rouge2": 17.4797,
"eval_rougeL": 24.1072,
"eval_rougeLsum": 41.5441,
"eval_runtime": 1143.3015,
"eval_samples_per_second": 4.917,
"eval_steps_per_second": 0.308,
"step": 10000
},
{
"epoch": 3.59,
"learning_rate": 1.987066666666667e-05,
"loss": 3.1475,
"step": 10100
},
{
"epoch": 3.63,
"learning_rate": 1.9737333333333335e-05,
"loss": 3.1553,
"step": 10200
},
{
"epoch": 3.67,
"learning_rate": 1.9604e-05,
"loss": 3.1589,
"step": 10300
},
{
"epoch": 3.7,
"learning_rate": 1.9470666666666666e-05,
"loss": 3.1712,
"step": 10400
},
{
"epoch": 3.74,
"learning_rate": 1.9337333333333335e-05,
"loss": 3.1545,
"step": 10500
},
{
"epoch": 3.74,
"eval_gen_len": 231.1805,
"eval_loss": 3.271965503692627,
"eval_rouge1": 45.3105,
"eval_rouge2": 17.6338,
"eval_rougeL": 24.1547,
"eval_rougeLsum": 41.5731,
"eval_runtime": 1051.5864,
"eval_samples_per_second": 5.346,
"eval_steps_per_second": 0.335,
"step": 10500
},
{
"epoch": 3.77,
"learning_rate": 1.9204e-05,
"loss": 3.1598,
"step": 10600
},
{
"epoch": 3.81,
"learning_rate": 1.9070666666666667e-05,
"loss": 3.1684,
"step": 10700
},
{
"epoch": 3.84,
"learning_rate": 1.8937333333333336e-05,
"loss": 3.1703,
"step": 10800
},
{
"epoch": 3.88,
"learning_rate": 1.8804e-05,
"loss": 3.1506,
"step": 10900
},
{
"epoch": 3.91,
"learning_rate": 1.8670666666666667e-05,
"loss": 3.1307,
"step": 11000
},
{
"epoch": 3.91,
"eval_gen_len": 250.1039,
"eval_loss": 3.268434524536133,
"eval_rouge1": 45.4309,
"eval_rouge2": 17.2665,
"eval_rougeL": 23.8954,
"eval_rougeLsum": 41.6518,
"eval_runtime": 1131.5203,
"eval_samples_per_second": 4.969,
"eval_steps_per_second": 0.311,
"step": 11000
},
{
"epoch": 3.95,
"learning_rate": 1.8537333333333333e-05,
"loss": 3.1651,
"step": 11100
},
{
"epoch": 3.99,
"learning_rate": 1.8404000000000002e-05,
"loss": 3.1617,
"step": 11200
},
{
"epoch": 4.02,
"learning_rate": 1.8270666666666668e-05,
"loss": 3.1417,
"step": 11300
},
{
"epoch": 4.06,
"learning_rate": 1.8137333333333333e-05,
"loss": 3.0753,
"step": 11400
},
{
"epoch": 4.09,
"learning_rate": 1.8004e-05,
"loss": 3.1022,
"step": 11500
},
{
"epoch": 4.09,
"eval_gen_len": 242.5923,
"eval_loss": 3.271904468536377,
"eval_rouge1": 45.1959,
"eval_rouge2": 17.4017,
"eval_rougeL": 24.056,
"eval_rougeLsum": 41.5363,
"eval_runtime": 1125.9262,
"eval_samples_per_second": 4.993,
"eval_steps_per_second": 0.313,
"step": 11500
},
{
"epoch": 4.13,
"learning_rate": 1.7870666666666668e-05,
"loss": 3.0868,
"step": 11600
},
{
"epoch": 4.16,
"learning_rate": 1.7737333333333334e-05,
"loss": 3.1109,
"step": 11700
},
{
"epoch": 4.2,
"learning_rate": 1.7605333333333332e-05,
"loss": 3.0823,
"step": 11800
},
{
"epoch": 4.23,
"learning_rate": 1.7472e-05,
"loss": 3.0932,
"step": 11900
},
{
"epoch": 4.27,
"learning_rate": 1.7338666666666667e-05,
"loss": 3.1139,
"step": 12000
},
{
"epoch": 4.27,
"eval_gen_len": 240.5701,
"eval_loss": 3.27105712890625,
"eval_rouge1": 45.3864,
"eval_rouge2": 17.4653,
"eval_rougeL": 24.028,
"eval_rougeLsum": 41.6797,
"eval_runtime": 1097.9204,
"eval_samples_per_second": 5.121,
"eval_steps_per_second": 0.321,
"step": 12000
},
{
"epoch": 4.31,
"learning_rate": 1.7205333333333333e-05,
"loss": 3.1077,
"step": 12100
},
{
"epoch": 4.34,
"learning_rate": 1.7072000000000002e-05,
"loss": 3.1018,
"step": 12200
},
{
"epoch": 4.38,
"learning_rate": 1.6938666666666668e-05,
"loss": 3.1089,
"step": 12300
},
{
"epoch": 4.41,
"learning_rate": 1.6805333333333333e-05,
"loss": 3.0964,
"step": 12400
},
{
"epoch": 4.45,
"learning_rate": 1.6672e-05,
"loss": 3.0978,
"step": 12500
},
{
"epoch": 4.45,
"eval_gen_len": 232.1149,
"eval_loss": 3.2721784114837646,
"eval_rouge1": 45.5694,
"eval_rouge2": 17.501,
"eval_rougeL": 24.1452,
"eval_rougeLsum": 41.7894,
"eval_runtime": 1018.4617,
"eval_samples_per_second": 5.52,
"eval_steps_per_second": 0.346,
"step": 12500
},
{
"epoch": 4.48,
"learning_rate": 1.6538666666666668e-05,
"loss": 3.0998,
"step": 12600
},
{
"epoch": 4.52,
"learning_rate": 1.6405333333333334e-05,
"loss": 3.1115,
"step": 12700
},
{
"epoch": 4.56,
"learning_rate": 1.6272e-05,
"loss": 3.1083,
"step": 12800
},
{
"epoch": 4.59,
"learning_rate": 1.6138666666666665e-05,
"loss": 3.1112,
"step": 12900
},
{
"epoch": 4.63,
"learning_rate": 1.6005333333333334e-05,
"loss": 3.1082,
"step": 13000
},
{
"epoch": 4.63,
"eval_gen_len": 245.1845,
"eval_loss": 3.2687015533447266,
"eval_rouge1": 45.504,
"eval_rouge2": 17.5137,
"eval_rougeL": 24.1067,
"eval_rougeLsum": 41.7686,
"eval_runtime": 1129.9664,
"eval_samples_per_second": 4.975,
"eval_steps_per_second": 0.312,
"step": 13000
},
{
"epoch": 4.66,
"learning_rate": 1.5872e-05,
"loss": 3.0868,
"step": 13100
},
{
"epoch": 4.7,
"learning_rate": 1.5738666666666666e-05,
"loss": 3.1087,
"step": 13200
},
{
"epoch": 4.73,
"learning_rate": 1.5606666666666667e-05,
"loss": 3.1035,
"step": 13300
},
{
"epoch": 4.77,
"learning_rate": 1.5473333333333333e-05,
"loss": 3.1254,
"step": 13400
},
{
"epoch": 4.8,
"learning_rate": 1.534e-05,
"loss": 3.1059,
"step": 13500
},
{
"epoch": 4.8,
"eval_gen_len": 248.6327,
"eval_loss": 3.268646717071533,
"eval_rouge1": 45.3603,
"eval_rouge2": 17.1619,
"eval_rougeL": 23.8655,
"eval_rougeLsum": 41.5953,
"eval_runtime": 1136.3791,
"eval_samples_per_second": 4.947,
"eval_steps_per_second": 0.31,
"step": 13500
},
{
"epoch": 4.84,
"learning_rate": 1.5206666666666668e-05,
"loss": 3.1107,
"step": 13600
},
{
"epoch": 4.88,
"learning_rate": 1.5073333333333335e-05,
"loss": 3.1008,
"step": 13700
},
{
"epoch": 4.91,
"learning_rate": 1.4940000000000001e-05,
"loss": 3.1116,
"step": 13800
},
{
"epoch": 4.95,
"learning_rate": 1.4806666666666668e-05,
"loss": 3.1173,
"step": 13900
},
{
"epoch": 4.98,
"learning_rate": 1.4673333333333334e-05,
"loss": 3.1141,
"step": 14000
},
{
"epoch": 4.98,
"eval_gen_len": 234.0194,
"eval_loss": 3.265822410583496,
"eval_rouge1": 45.2741,
"eval_rouge2": 17.3814,
"eval_rougeL": 24.0377,
"eval_rougeLsum": 41.5263,
"eval_runtime": 1076.885,
"eval_samples_per_second": 5.221,
"eval_steps_per_second": 0.327,
"step": 14000
},
{
"epoch": 5.02,
"learning_rate": 1.4540000000000001e-05,
"loss": 3.0773,
"step": 14100
},
{
"epoch": 5.05,
"learning_rate": 1.4406666666666667e-05,
"loss": 3.0749,
"step": 14200
},
{
"epoch": 5.09,
"learning_rate": 1.4273333333333334e-05,
"loss": 3.0473,
"step": 14300
},
{
"epoch": 5.12,
"learning_rate": 1.414e-05,
"loss": 3.0415,
"step": 14400
},
{
"epoch": 5.16,
"learning_rate": 1.4006666666666668e-05,
"loss": 3.0294,
"step": 14500
},
{
"epoch": 5.16,
"eval_gen_len": 244.4207,
"eval_loss": 3.2715883255004883,
"eval_rouge1": 45.7203,
"eval_rouge2": 17.5962,
"eval_rougeL": 24.1367,
"eval_rougeLsum": 41.9119,
"eval_runtime": 1108.4185,
"eval_samples_per_second": 5.072,
"eval_steps_per_second": 0.318,
"step": 14500
},
{
"epoch": 5.2,
"learning_rate": 1.3873333333333333e-05,
"loss": 3.068,
"step": 14600
},
{
"epoch": 5.23,
"learning_rate": 1.374e-05,
"loss": 3.0561,
"step": 14700
},
{
"epoch": 5.27,
"learning_rate": 1.3606666666666666e-05,
"loss": 3.0738,
"step": 14800
},
{
"epoch": 5.3,
"learning_rate": 1.3473333333333334e-05,
"loss": 3.0854,
"step": 14900
},
{
"epoch": 5.34,
"learning_rate": 1.334e-05,
"loss": 3.0613,
"step": 15000
},
{
"epoch": 5.34,
"eval_gen_len": 242.0381,
"eval_loss": 3.26971435546875,
"eval_rouge1": 45.775,
"eval_rouge2": 17.6959,
"eval_rougeL": 24.1867,
"eval_rougeLsum": 42.0018,
"eval_runtime": 1110.4898,
"eval_samples_per_second": 5.063,
"eval_steps_per_second": 0.317,
"step": 15000
},
{
"epoch": 5.37,
"learning_rate": 1.3206666666666667e-05,
"loss": 3.0704,
"step": 15100
},
{
"epoch": 5.41,
"learning_rate": 1.3073333333333334e-05,
"loss": 3.0419,
"step": 15200
},
{
"epoch": 5.44,
"learning_rate": 1.2940000000000001e-05,
"loss": 3.0748,
"step": 15300
},
{
"epoch": 5.48,
"learning_rate": 1.2806666666666667e-05,
"loss": 3.0509,
"step": 15400
},
{
"epoch": 5.52,
"learning_rate": 1.2673333333333335e-05,
"loss": 3.0549,
"step": 15500
},
{
"epoch": 5.52,
"eval_gen_len": 242.5493,
"eval_loss": 3.2702813148498535,
"eval_rouge1": 45.8193,
"eval_rouge2": 17.686,
"eval_rougeL": 24.1997,
"eval_rougeLsum": 42.0109,
"eval_runtime": 1089.9866,
"eval_samples_per_second": 5.158,
"eval_steps_per_second": 0.323,
"step": 15500
},
{
"epoch": 5.55,
"learning_rate": 1.254e-05,
"loss": 3.0902,
"step": 15600
},
{
"epoch": 5.59,
"learning_rate": 1.2406666666666668e-05,
"loss": 3.0697,
"step": 15700
},
{
"epoch": 5.62,
"learning_rate": 1.2273333333333333e-05,
"loss": 3.0793,
"step": 15800
},
{
"epoch": 5.66,
"learning_rate": 1.214e-05,
"loss": 3.076,
"step": 15900
},
{
"epoch": 5.69,
"learning_rate": 1.2006666666666666e-05,
"loss": 3.0725,
"step": 16000
},
{
"epoch": 5.69,
"eval_gen_len": 240.2812,
"eval_loss": 3.2654964923858643,
"eval_rouge1": 45.3515,
"eval_rouge2": 17.3438,
"eval_rougeL": 24.0586,
"eval_rougeLsum": 41.6126,
"eval_runtime": 1107.0741,
"eval_samples_per_second": 5.078,
"eval_steps_per_second": 0.318,
"step": 16000
},
{
"epoch": 5.73,
"learning_rate": 1.1873333333333334e-05,
"loss": 3.0904,
"step": 16100
},
{
"epoch": 5.76,
"learning_rate": 1.174e-05,
"loss": 3.082,
"step": 16200
},
{
"epoch": 5.8,
"learning_rate": 1.1608000000000001e-05,
"loss": 3.0543,
"step": 16300
},
{
"epoch": 5.84,
"learning_rate": 1.1474666666666667e-05,
"loss": 3.0445,
"step": 16400
},
{
"epoch": 5.87,
"learning_rate": 1.1341333333333334e-05,
"loss": 3.0728,
"step": 16500
},
{
"epoch": 5.87,
"eval_gen_len": 250.455,
"eval_loss": 3.2671351432800293,
"eval_rouge1": 45.6791,
"eval_rouge2": 17.5028,
"eval_rougeL": 24.0691,
"eval_rougeLsum": 41.9219,
"eval_runtime": 1163.6259,
"eval_samples_per_second": 4.831,
"eval_steps_per_second": 0.303,
"step": 16500
},
{
"epoch": 5.91,
"learning_rate": 1.1208e-05,
"loss": 3.0764,
"step": 16600
},
{
"epoch": 5.94,
"learning_rate": 1.1074666666666667e-05,
"loss": 3.0822,
"step": 16700
},
{
"epoch": 5.98,
"learning_rate": 1.0941333333333333e-05,
"loss": 3.0452,
"step": 16800
},
{
"epoch": 6.01,
"learning_rate": 1.0808e-05,
"loss": 3.0631,
"step": 16900
},
{
"epoch": 6.05,
"learning_rate": 1.0674666666666666e-05,
"loss": 3.0142,
"step": 17000
},
{
"epoch": 6.05,
"eval_gen_len": 245.6204,
"eval_loss": 3.270817279815674,
"eval_rouge1": 46.0287,
"eval_rouge2": 17.8079,
"eval_rougeL": 24.2916,
"eval_rougeLsum": 42.2369,
"eval_runtime": 1077.587,
"eval_samples_per_second": 5.217,
"eval_steps_per_second": 0.327,
"step": 17000
},
{
"epoch": 6.09,
"learning_rate": 1.0541333333333334e-05,
"loss": 3.0106,
"step": 17100
},
{
"epoch": 6.12,
"learning_rate": 1.0408e-05,
"loss": 3.0208,
"step": 17200
},
{
"epoch": 6.16,
"learning_rate": 1.0274666666666667e-05,
"loss": 3.0455,
"step": 17300
},
{
"epoch": 6.19,
"learning_rate": 1.0141333333333332e-05,
"loss": 3.0404,
"step": 17400
},
{
"epoch": 6.23,
"learning_rate": 1.0008e-05,
"loss": 3.0312,
"step": 17500
},
{
"epoch": 6.23,
"eval_gen_len": 236.2234,
"eval_loss": 3.270146608352661,
"eval_rouge1": 45.5731,
"eval_rouge2": 17.5404,
"eval_rougeL": 24.0925,
"eval_rougeLsum": 41.7584,
"eval_runtime": 1079.8219,
"eval_samples_per_second": 5.206,
"eval_steps_per_second": 0.326,
"step": 17500
},
{
"epoch": 6.26,
"learning_rate": 9.874666666666667e-06,
"loss": 3.033,
"step": 17600
},
{
"epoch": 6.3,
"learning_rate": 9.741333333333334e-06,
"loss": 3.045,
"step": 17700
},
{
"epoch": 6.33,
"learning_rate": 9.608e-06,
"loss": 3.0339,
"step": 17800
},
{
"epoch": 6.37,
"learning_rate": 9.474666666666668e-06,
"loss": 3.034,
"step": 17900
},
{
"epoch": 6.41,
"learning_rate": 9.341333333333333e-06,
"loss": 3.0231,
"step": 18000
},
{
"epoch": 6.41,
"eval_gen_len": 260.1686,
"eval_loss": 3.271860361099243,
"eval_rouge1": 46.1094,
"eval_rouge2": 17.7117,
"eval_rougeL": 24.1117,
"eval_rougeLsum": 42.2882,
"eval_runtime": 1163.8918,
"eval_samples_per_second": 4.83,
"eval_steps_per_second": 0.302,
"step": 18000
},
{
"epoch": 6.44,
"learning_rate": 9.208e-06,
"loss": 3.0454,
"step": 18100
},
{
"epoch": 6.48,
"learning_rate": 9.074666666666666e-06,
"loss": 3.0343,
"step": 18200
},
{
"epoch": 6.51,
"learning_rate": 8.941333333333334e-06,
"loss": 3.0386,
"step": 18300
},
{
"epoch": 6.55,
"learning_rate": 8.808000000000001e-06,
"loss": 3.0429,
"step": 18400
},
{
"epoch": 6.58,
"learning_rate": 8.674666666666667e-06,
"loss": 3.0414,
"step": 18500
},
{
"epoch": 6.58,
"eval_gen_len": 245.0961,
"eval_loss": 3.2702643871307373,
"eval_rouge1": 45.9178,
"eval_rouge2": 17.6987,
"eval_rougeL": 24.1882,
"eval_rougeLsum": 42.1382,
"eval_runtime": 1111.098,
"eval_samples_per_second": 5.06,
"eval_steps_per_second": 0.317,
"step": 18500
},
{
"epoch": 6.62,
"learning_rate": 8.541333333333334e-06,
"loss": 3.0242,
"step": 18600
},
{
"epoch": 6.65,
"learning_rate": 8.408e-06,
"loss": 3.0449,
"step": 18700
},
{
"epoch": 6.69,
"learning_rate": 8.274666666666667e-06,
"loss": 3.0392,
"step": 18800
},
{
"epoch": 6.73,
"learning_rate": 8.141333333333333e-06,
"loss": 3.0336,
"step": 18900
},
{
"epoch": 6.76,
"learning_rate": 8.008e-06,
"loss": 3.0434,
"step": 19000
},
{
"epoch": 6.76,
"eval_gen_len": 247.8225,
"eval_loss": 3.2714767456054688,
"eval_rouge1": 46.0129,
"eval_rouge2": 17.7545,
"eval_rougeL": 24.2235,
"eval_rougeLsum": 42.245,
"eval_runtime": 1091.4396,
"eval_samples_per_second": 5.151,
"eval_steps_per_second": 0.323,
"step": 19000
},
{
"epoch": 6.8,
"learning_rate": 7.874666666666666e-06,
"loss": 3.0021,
"step": 19100
},
{
"epoch": 6.83,
"learning_rate": 7.741333333333335e-06,
"loss": 3.0498,
"step": 19200
},
{
"epoch": 6.87,
"learning_rate": 7.608e-06,
"loss": 3.0314,
"step": 19300
},
{
"epoch": 6.9,
"learning_rate": 7.476e-06,
"loss": 3.0437,
"step": 19400
},
{
"epoch": 6.94,
"learning_rate": 7.342666666666667e-06,
"loss": 3.0456,
"step": 19500
},
{
"epoch": 6.94,
"eval_gen_len": 256.9835,
"eval_loss": 3.2681996822357178,
"eval_rouge1": 45.8634,
"eval_rouge2": 17.6462,
"eval_rougeL": 24.1366,
"eval_rougeLsum": 42.1194,
"eval_runtime": 1174.1005,
"eval_samples_per_second": 4.788,
"eval_steps_per_second": 0.3,
"step": 19500
},
{
"epoch": 6.97,
"learning_rate": 7.209333333333334e-06,
"loss": 3.0172,
"step": 19600
},
{
"epoch": 7.01,
"learning_rate": 7.077333333333333e-06,
"loss": 3.0364,
"step": 19700
},
{
"epoch": 7.05,
"learning_rate": 6.944e-06,
"loss": 3.0109,
"step": 19800
},
{
"epoch": 7.08,
"learning_rate": 6.8106666666666665e-06,
"loss": 2.998,
"step": 19900
},
{
"epoch": 7.12,
"learning_rate": 6.677333333333334e-06,
"loss": 3.0188,
"step": 20000
},
{
"epoch": 7.12,
"eval_gen_len": 240.1866,
"eval_loss": 3.2752106189727783,
"eval_rouge1": 45.8366,
"eval_rouge2": 17.6771,
"eval_rougeL": 24.165,
"eval_rougeLsum": 42.0438,
"eval_runtime": 1085.4389,
"eval_samples_per_second": 5.179,
"eval_steps_per_second": 0.324,
"step": 20000
},
{
"epoch": 7.15,
"learning_rate": 6.544e-06,
"loss": 3.0119,
"step": 20100
},
{
"epoch": 7.19,
"learning_rate": 6.410666666666667e-06,
"loss": 3.0091,
"step": 20200
},
{
"epoch": 7.22,
"learning_rate": 6.2773333333333334e-06,
"loss": 3.0077,
"step": 20300
},
{
"epoch": 7.26,
"learning_rate": 6.144000000000001e-06,
"loss": 2.9942,
"step": 20400
},
{
"epoch": 7.3,
"learning_rate": 6.010666666666667e-06,
"loss": 3.0227,
"step": 20500
},
{
"epoch": 7.3,
"eval_gen_len": 245.8337,
"eval_loss": 3.2722229957580566,
"eval_rouge1": 46.0509,
"eval_rouge2": 17.8248,
"eval_rougeL": 24.2389,
"eval_rougeLsum": 42.2681,
"eval_runtime": 1093.3258,
"eval_samples_per_second": 5.142,
"eval_steps_per_second": 0.322,
"step": 20500
},
{
"epoch": 7.33,
"learning_rate": 5.877333333333334e-06,
"loss": 2.9996,
"step": 20600
},
{
"epoch": 7.37,
"learning_rate": 5.744e-06,
"loss": 3.0046,
"step": 20700
},
{
"epoch": 7.4,
"learning_rate": 5.610666666666667e-06,
"loss": 3.0018,
"step": 20800
},
{
"epoch": 7.44,
"learning_rate": 5.4773333333333335e-06,
"loss": 3.0096,
"step": 20900
},
{
"epoch": 7.47,
"learning_rate": 5.344e-06,
"loss": 2.9895,
"step": 21000
},
{
"epoch": 7.47,
"eval_gen_len": 243.867,
"eval_loss": 3.2725987434387207,
"eval_rouge1": 45.7896,
"eval_rouge2": 17.5833,
"eval_rougeL": 24.1226,
"eval_rougeLsum": 42.016,
"eval_runtime": 1110.8794,
"eval_samples_per_second": 5.061,
"eval_steps_per_second": 0.317,
"step": 21000
},
{
"epoch": 7.51,
"learning_rate": 5.2106666666666665e-06,
"loss": 3.0186,
"step": 21100
},
{
"epoch": 7.54,
"learning_rate": 5.077333333333334e-06,
"loss": 3.0337,
"step": 21200
},
{
"epoch": 7.58,
"learning_rate": 4.9440000000000004e-06,
"loss": 3.0136,
"step": 21300
},
{
"epoch": 7.62,
"learning_rate": 4.810666666666667e-06,
"loss": 3.0109,
"step": 21400
},
{
"epoch": 7.65,
"learning_rate": 4.6773333333333335e-06,
"loss": 3.0146,
"step": 21500
},
{
"epoch": 7.65,
"eval_gen_len": 244.0598,
"eval_loss": 3.269317865371704,
"eval_rouge1": 46.0179,
"eval_rouge2": 17.6952,
"eval_rougeL": 24.2204,
"eval_rougeLsum": 42.2436,
"eval_runtime": 1075.6479,
"eval_samples_per_second": 5.227,
"eval_steps_per_second": 0.327,
"step": 21500
},
{
"epoch": 7.69,
"learning_rate": 4.544e-06,
"loss": 3.0195,
"step": 21600
},
{
"epoch": 7.72,
"learning_rate": 4.4106666666666666e-06,
"loss": 3.0103,
"step": 21700
},
{
"epoch": 7.76,
"learning_rate": 4.277333333333333e-06,
"loss": 3.0117,
"step": 21800
},
{
"epoch": 7.79,
"learning_rate": 4.144e-06,
"loss": 3.0012,
"step": 21900
},
{
"epoch": 7.83,
"learning_rate": 4.010666666666667e-06,
"loss": 3.014,
"step": 22000
},
{
"epoch": 7.83,
"eval_gen_len": 240.4804,
"eval_loss": 3.2708346843719482,
"eval_rouge1": 46.0704,
"eval_rouge2": 17.75,
"eval_rougeL": 24.2308,
"eval_rougeLsum": 42.2591,
"eval_runtime": 1055.0456,
"eval_samples_per_second": 5.329,
"eval_steps_per_second": 0.334,
"step": 22000
},
{
"epoch": 7.86,
"learning_rate": 3.8773333333333335e-06,
"loss": 3.001,
"step": 22100
},
{
"epoch": 7.9,
"learning_rate": 3.744e-06,
"loss": 3.0133,
"step": 22200
},
{
"epoch": 7.94,
"learning_rate": 3.6106666666666666e-06,
"loss": 3.0007,
"step": 22300
},
{
"epoch": 7.97,
"learning_rate": 3.4773333333333336e-06,
"loss": 3.0189,
"step": 22400
},
{
"epoch": 8.01,
"learning_rate": 3.344e-06,
"loss": 3.0427,
"step": 22500
},
{
"epoch": 8.01,
"eval_gen_len": 242.4203,
"eval_loss": 3.27339243888855,
"eval_rouge1": 46.0662,
"eval_rouge2": 17.7231,
"eval_rougeL": 24.1915,
"eval_rougeLsum": 42.2227,
"eval_runtime": 1083.3051,
"eval_samples_per_second": 5.19,
"eval_steps_per_second": 0.325,
"step": 22500
},
{
"epoch": 8.04,
"learning_rate": 3.210666666666667e-06,
"loss": 2.995,
"step": 22600
},
{
"epoch": 8.08,
"learning_rate": 3.0773333333333336e-06,
"loss": 2.9946,
"step": 22700
},
{
"epoch": 8.11,
"learning_rate": 2.944e-06,
"loss": 3.0003,
"step": 22800
},
{
"epoch": 8.15,
"learning_rate": 2.8106666666666666e-06,
"loss": 2.9959,
"step": 22900
},
{
"epoch": 8.19,
"learning_rate": 2.6773333333333336e-06,
"loss": 2.9835,
"step": 23000
},
{
"epoch": 8.19,
"eval_gen_len": 236.6266,
"eval_loss": 3.273963212966919,
"eval_rouge1": 46.165,
"eval_rouge2": 17.8947,
"eval_rougeL": 24.366,
"eval_rougeLsum": 42.3521,
"eval_runtime": 1047.6593,
"eval_samples_per_second": 5.366,
"eval_steps_per_second": 0.336,
"step": 23000
},
{
"epoch": 8.22,
"learning_rate": 2.544e-06,
"loss": 2.9922,
"step": 23100
},
{
"epoch": 8.26,
"learning_rate": 2.4106666666666667e-06,
"loss": 2.9937,
"step": 23200
},
{
"epoch": 8.29,
"learning_rate": 2.277333333333333e-06,
"loss": 2.9933,
"step": 23300
},
{
"epoch": 8.33,
"learning_rate": 2.144e-06,
"loss": 2.9921,
"step": 23400
},
{
"epoch": 8.36,
"learning_rate": 2.0106666666666667e-06,
"loss": 2.987,
"step": 23500
},
{
"epoch": 8.36,
"eval_gen_len": 238.479,
"eval_loss": 3.2719457149505615,
"eval_rouge1": 45.9025,
"eval_rouge2": 17.7625,
"eval_rougeL": 24.2432,
"eval_rougeLsum": 42.1257,
"eval_runtime": 1104.1087,
"eval_samples_per_second": 5.092,
"eval_steps_per_second": 0.319,
"step": 23500
},
{
"epoch": 8.4,
"learning_rate": 1.8773333333333332e-06,
"loss": 2.9925,
"step": 23600
},
{
"epoch": 8.43,
"learning_rate": 1.7440000000000002e-06,
"loss": 2.9924,
"step": 23700
},
{
"epoch": 8.47,
"learning_rate": 1.6106666666666667e-06,
"loss": 3.0224,
"step": 23800
},
{
"epoch": 8.51,
"learning_rate": 1.4773333333333334e-06,
"loss": 3.0137,
"step": 23900
},
{
"epoch": 8.54,
"learning_rate": 1.344e-06,
"loss": 2.9922,
"step": 24000
},
{
"epoch": 8.54,
"eval_gen_len": 245.2081,
"eval_loss": 3.2731070518493652,
"eval_rouge1": 46.1971,
"eval_rouge2": 17.7962,
"eval_rougeL": 24.2279,
"eval_rougeLsum": 42.3853,
"eval_runtime": 1087.3894,
"eval_samples_per_second": 5.17,
"eval_steps_per_second": 0.324,
"step": 24000
},
{
"epoch": 8.58,
"learning_rate": 1.2106666666666667e-06,
"loss": 2.9974,
"step": 24100
},
{
"epoch": 8.61,
"learning_rate": 1.0773333333333332e-06,
"loss": 2.9955,
"step": 24200
},
{
"epoch": 8.65,
"learning_rate": 9.44e-07,
"loss": 2.9914,
"step": 24300
},
{
"epoch": 8.68,
"learning_rate": 8.106666666666667e-07,
"loss": 2.9803,
"step": 24400
},
{
"epoch": 8.72,
"learning_rate": 6.773333333333334e-07,
"loss": 2.9788,
"step": 24500
},
{
"epoch": 8.72,
"eval_gen_len": 240.1747,
"eval_loss": 3.2718217372894287,
"eval_rouge1": 46.0806,
"eval_rouge2": 17.8417,
"eval_rougeL": 24.3261,
"eval_rougeLsum": 42.264,
"eval_runtime": 1088.0734,
"eval_samples_per_second": 5.167,
"eval_steps_per_second": 0.324,
"step": 24500
},
{
"epoch": 8.75,
"learning_rate": 5.44e-07,
"loss": 3.001,
"step": 24600
},
{
"epoch": 8.79,
"learning_rate": 4.106666666666667e-07,
"loss": 2.9962,
"step": 24700
},
{
"epoch": 8.83,
"learning_rate": 2.7733333333333333e-07,
"loss": 2.9987,
"step": 24800
},
{
"epoch": 8.86,
"learning_rate": 1.44e-07,
"loss": 2.9703,
"step": 24900
},
{
"epoch": 8.9,
"learning_rate": 1.0666666666666668e-08,
"loss": 2.9878,
"step": 25000
},
{
"epoch": 8.9,
"eval_gen_len": 242.5598,
"eval_loss": 3.2715346813201904,
"eval_rouge1": 46.0618,
"eval_rouge2": 17.7725,
"eval_rougeL": 24.2234,
"eval_rougeLsum": 42.2574,
"eval_runtime": 1097.8453,
"eval_samples_per_second": 5.121,
"eval_steps_per_second": 0.321,
"step": 25000
},
{
"epoch": 8.9,
"step": 25000,
"total_flos": 5.0265269514797056e+17,
"train_loss": 3.1441598370170594,
"train_runtime": 61182.0827,
"train_samples_per_second": 6.538,
"train_steps_per_second": 0.409
}
],
"max_steps": 25000,
"num_train_epochs": 9,
"total_flos": 5.0265269514797056e+17,
"trial_name": null,
"trial_params": null
}