|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 724, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 4.2523, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 3.4255, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8e-05, |
|
"loss": 2.5412, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.400000000000001e-05, |
|
"loss": 2.0169, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8e-05, |
|
"loss": 1.8547, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.999646594434211e-05, |
|
"loss": 1.8217, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.998586440184589e-05, |
|
"loss": 1.7177, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.996819724583341e-05, |
|
"loss": 1.6448, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.99434675981403e-05, |
|
"loss": 1.6419, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.991167982856416e-05, |
|
"loss": 1.595, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.987283955409229e-05, |
|
"loss": 1.7162, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.982695363790929e-05, |
|
"loss": 1.6641, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.977403018818425e-05, |
|
"loss": 1.5268, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.971407855663803e-05, |
|
"loss": 1.5187, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.964710933689073e-05, |
|
"loss": 1.6022, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.95731343625899e-05, |
|
"loss": 1.5216, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.94921667053193e-05, |
|
"loss": 1.5416, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.940422067228933e-05, |
|
"loss": 1.5302, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.930931180380879e-05, |
|
"loss": 1.4887, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.920745687053881e-05, |
|
"loss": 1.5794, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.909867387052959e-05, |
|
"loss": 1.4197, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.898298202603996e-05, |
|
"loss": 1.5554, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.886040178014079e-05, |
|
"loss": 1.4569, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.873095479310265e-05, |
|
"loss": 1.4795, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.859466393856842e-05, |
|
"loss": 1.4088, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.845155329951134e-05, |
|
"loss": 1.4127, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.830164816397961e-05, |
|
"loss": 1.3549, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.814497502062784e-05, |
|
"loss": 1.4085, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.798156155403649e-05, |
|
"loss": 1.4669, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.781143663981985e-05, |
|
"loss": 1.3935, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.76346303395237e-05, |
|
"loss": 1.4601, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.745117389531335e-05, |
|
"loss": 1.3549, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.726109972445301e-05, |
|
"loss": 1.4819, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.706444141357764e-05, |
|
"loss": 1.4633, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.686123371275806e-05, |
|
"loss": 1.4074, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.665151252936049e-05, |
|
"loss": 1.3739, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.643531492170168e-05, |
|
"loss": 1.4575, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.621267909250057e-05, |
|
"loss": 1.4109, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.598364438212773e-05, |
|
"loss": 1.261, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.574825126165386e-05, |
|
"loss": 1.251, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.550654132569846e-05, |
|
"loss": 1.3583, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.525855728507984e-05, |
|
"loss": 1.3513, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.500434295926807e-05, |
|
"loss": 1.4371, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.474394326864201e-05, |
|
"loss": 1.5398, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.447740422655164e-05, |
|
"loss": 1.3364, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.420477293118745e-05, |
|
"loss": 1.2326, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.392609755725803e-05, |
|
"loss": 1.3098, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.36414273474775e-05, |
|
"loss": 1.3438, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.33508126038641e-05, |
|
"loss": 1.3329, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.305430467885182e-05, |
|
"loss": 1.4064, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.275195596621611e-05, |
|
"loss": 1.3769, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.244381989181594e-05, |
|
"loss": 1.3437, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.212995090415312e-05, |
|
"loss": 1.2524, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.181040446475129e-05, |
|
"loss": 1.4045, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.148523703835553e-05, |
|
"loss": 1.207, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.115450608295498e-05, |
|
"loss": 1.2996, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.081827003962987e-05, |
|
"loss": 1.2952, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.047658832222475e-05, |
|
"loss": 1.4254, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 7.012952130684995e-05, |
|
"loss": 1.3879, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 6.977713032121295e-05, |
|
"loss": 1.3536, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 6.941947763378157e-05, |
|
"loss": 1.2768, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 6.905662644278099e-05, |
|
"loss": 1.3828, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 6.868864086502643e-05, |
|
"loss": 1.2058, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 6.831558592459356e-05, |
|
"loss": 1.2856, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 6.793752754132852e-05, |
|
"loss": 1.2367, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 6.755453251919973e-05, |
|
"loss": 1.3201, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 6.716666853449342e-05, |
|
"loss": 1.3446, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 6.67740041238551e-05, |
|
"loss": 1.2449, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 6.637660867217884e-05, |
|
"loss": 1.2529, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 6.59745524003469e-05, |
|
"loss": 1.3266, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.556790635282136e-05, |
|
"loss": 1.2609, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.515674238509048e-05, |
|
"loss": 1.231, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.474113315097161e-05, |
|
"loss": 1.3123, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.432115208977297e-05, |
|
"loss": 1.2846, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.389687341331688e-05, |
|
"loss": 1.3302, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.346837209282615e-05, |
|
"loss": 1.386, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.303572384567662e-05, |
|
"loss": 1.332, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.259900512201756e-05, |
|
"loss": 1.3034, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.215829309126279e-05, |
|
"loss": 1.4256, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.17136656284546e-05, |
|
"loss": 1.2721, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.1265201300503e-05, |
|
"loss": 1.3103, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.081297935230281e-05, |
|
"loss": 1.2451, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.035707969273072e-05, |
|
"loss": 1.1757, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.989758288052531e-05, |
|
"loss": 1.3589, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.9434570110052036e-05, |
|
"loss": 1.275, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.8968123196955955e-05, |
|
"loss": 1.3078, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.8498324563704676e-05, |
|
"loss": 1.3537, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.80252572250241e-05, |
|
"loss": 1.1313, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.7549004773229474e-05, |
|
"loss": 1.2557, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.706965136345439e-05, |
|
"loss": 1.208, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.658728169878033e-05, |
|
"loss": 1.0945, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.6101981015269436e-05, |
|
"loss": 1.3753, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5.561383506690303e-05, |
|
"loss": 1.2593, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5.512293011042863e-05, |
|
"loss": 1.2717, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 5.462935289011821e-05, |
|
"loss": 1.2859, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.4133190622440153e-05, |
|
"loss": 1.1984, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.363453098064792e-05, |
|
"loss": 1.3106, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.313346207928795e-05, |
|
"loss": 1.2159, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.2630072458629526e-05, |
|
"loss": 1.1897, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.2124451069019495e-05, |
|
"loss": 1.1248, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5.161668725516451e-05, |
|
"loss": 1.2375, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.110687074034351e-05, |
|
"loss": 1.2367, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.059509161055343e-05, |
|
"loss": 1.2432, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.008144029859074e-05, |
|
"loss": 1.2744, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.956600756807172e-05, |
|
"loss": 1.2705, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.904888449739422e-05, |
|
"loss": 1.2448, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.8530162463643935e-05, |
|
"loss": 1.1841, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.800993312644778e-05, |
|
"loss": 1.3046, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.748828841177738e-05, |
|
"loss": 1.1876, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.6965320495705504e-05, |
|
"loss": 1.2061, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.644112178811828e-05, |
|
"loss": 1.2982, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.591578491638613e-05, |
|
"loss": 1.2507, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.538940270899625e-05, |
|
"loss": 1.2313, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.4862068179149546e-05, |
|
"loss": 1.2507, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.4333874508324964e-05, |
|
"loss": 1.274, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.3804915029814054e-05, |
|
"loss": 1.2024, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.327528321222869e-05, |
|
"loss": 1.2804, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.274507264298496e-05, |
|
"loss": 1.1524, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.2214377011765956e-05, |
|
"loss": 1.236, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.1683290093966603e-05, |
|
"loss": 1.2723, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 14.044312630844383, |
|
"eval_loss": 1.032505989074707, |
|
"eval_rouge1": 61.6206, |
|
"eval_rouge2": 45.1199, |
|
"eval_rougeL": 59.6467, |
|
"eval_rougeLsum": 59.7534, |
|
"eval_runtime": 315.6416, |
|
"eval_samples_per_second": 9.08, |
|
"eval_steps_per_second": 9.08, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.115190573412321e-05, |
|
"loss": 1.1678, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.062031782933099e-05, |
|
"loss": 1.1245, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.008862031265205e-05, |
|
"loss": 1.0944, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.955690713651723e-05, |
|
"loss": 1.0338, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.902527225612447e-05, |
|
"loss": 1.1155, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.849380961283661e-05, |
|
"loss": 1.0579, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.796261311758174e-05, |
|
"loss": 1.0659, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.743177663425883e-05, |
|
"loss": 1.0031, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.690139396315174e-05, |
|
"loss": 1.0458, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.637155882435446e-05, |
|
"loss": 0.9631, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5842364841210466e-05, |
|
"loss": 1.0156, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.53139055237693e-05, |
|
"loss": 1.0457, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.478627425226299e-05, |
|
"loss": 1.0843, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.4259564260605564e-05, |
|
"loss": 1.1081, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.373386861991832e-05, |
|
"loss": 1.0335, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.320928022208392e-05, |
|
"loss": 1.0572, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.268589176333213e-05, |
|
"loss": 1.0875, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.216379572786015e-05, |
|
"loss": 1.0419, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.1643084371490394e-05, |
|
"loss": 1.0162, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.112384970536862e-05, |
|
"loss": 1.1311, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.060618347970529e-05, |
|
"loss": 1.0091, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0090177167563106e-05, |
|
"loss": 1.0725, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.9575921948693394e-05, |
|
"loss": 0.9811, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.906350869342447e-05, |
|
"loss": 1.1393, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.8553027946604523e-05, |
|
"loss": 1.0846, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.8044569911602134e-05, |
|
"loss": 1.0058, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.7538224434367063e-05, |
|
"loss": 1.0463, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.703408098755424e-05, |
|
"loss": 1.006, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.6532228654713706e-05, |
|
"loss": 1.0281, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.603275611454928e-05, |
|
"loss": 1.1484, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.5535751625248784e-05, |
|
"loss": 1.1257, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.5041303008888593e-05, |
|
"loss": 1.1191, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.454949763591521e-05, |
|
"loss": 1.0099, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.406042240970668e-05, |
|
"loss": 1.0287, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.3574163751216513e-05, |
|
"loss": 0.9936, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.30908075837029e-05, |
|
"loss": 0.959, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.2610439317545723e-05, |
|
"loss": 1.0438, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.213314383515447e-05, |
|
"loss": 0.9786, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.1659005475969125e-05, |
|
"loss": 1.0546, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.1188108021557236e-05, |
|
"loss": 1.058, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.0720534680809452e-05, |
|
"loss": 1.0402, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.0256368075236296e-05, |
|
"loss": 1.0674, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.979569022436869e-05, |
|
"loss": 0.9716, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.9338582531264908e-05, |
|
"loss": 0.9702, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.8885125768126405e-05, |
|
"loss": 1.0379, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.843540006202513e-05, |
|
"loss": 0.9711, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.7989484880744917e-05, |
|
"loss": 1.0055, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.754745901873923e-05, |
|
"loss": 0.943, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.7109400583207977e-05, |
|
"loss": 1.0675, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.667538698029581e-05, |
|
"loss": 1.0679, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.624549490141417e-05, |
|
"loss": 0.9835, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.581980030968974e-05, |
|
"loss": 1.0194, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.5398378426541535e-05, |
|
"loss": 1.1163, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.4981303718389088e-05, |
|
"loss": 1.0588, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.4568649883494001e-05, |
|
"loss": 0.9728, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.416048983893727e-05, |
|
"loss": 1.0166, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.3756895707734637e-05, |
|
"loss": 1.0569, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.3357938806092245e-05, |
|
"loss": 1.0751, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.2963689630804854e-05, |
|
"loss": 0.9486, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.2574217846798921e-05, |
|
"loss": 1.0278, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.2189592274822526e-05, |
|
"loss": 1.0522, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.1809880879284608e-05, |
|
"loss": 1.0255, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.1435150756245439e-05, |
|
"loss": 1.0388, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.1065468121560627e-05, |
|
"loss": 0.9952, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.0700898299180493e-05, |
|
"loss": 1.084, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.034150570960721e-05, |
|
"loss": 0.9855, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.987353858511506e-06, |
|
"loss": 1.0436, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.638505325511041e-06, |
|
"loss": 1.0048, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.295021753112402e-06, |
|
"loss": 1.0402, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 8.956963835818708e-06, |
|
"loss": 1.0207, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 8.62439130940472e-06, |
|
"loss": 0.9986, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 8.297362940361386e-06, |
|
"loss": 0.9838, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 7.975936515511598e-06, |
|
"loss": 1.0181, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 7.660168831799115e-06, |
|
"loss": 1.0555, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 7.350115686252399e-06, |
|
"loss": 0.9745, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 7.045831866125117e-06, |
|
"loss": 1.0337, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 6.747371139215069e-06, |
|
"loss": 1.0722, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 6.454786244363292e-06, |
|
"loss": 1.1129, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 6.168128882134934e-06, |
|
"loss": 1.0271, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5.887449705683632e-06, |
|
"loss": 1.074, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5.61279831180098e-06, |
|
"loss": 0.9695, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5.344223232152596e-06, |
|
"loss": 1.0043, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 5.081771924702468e-06, |
|
"loss": 1.0169, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.825490765327003e-06, |
|
"loss": 0.9531, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.575425039620265e-06, |
|
"loss": 1.0418, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.3316189348918855e-06, |
|
"loss": 1.1068, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.094115532359064e-06, |
|
"loss": 0.9387, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.862956799533977e-06, |
|
"loss": 1.0124, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.6381835828079946e-06, |
|
"loss": 0.9843, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.4198356002340405e-06, |
|
"loss": 1.0601, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.2079514345082764e-06, |
|
"loss": 1.0091, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.0025685261524297e-06, |
|
"loss": 1.04, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.803723166897965e-06, |
|
"loss": 0.9952, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.611450493273244e-06, |
|
"loss": 0.9844, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.4257844803947573e-06, |
|
"loss": 1.0642, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2467579359636726e-06, |
|
"loss": 0.9943, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0744024944685968e-06, |
|
"loss": 0.9946, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9087486115956987e-06, |
|
"loss": 0.9849, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.7498255588470803e-06, |
|
"loss": 1.0182, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.5976614183684214e-06, |
|
"loss": 1.0031, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.452283077986807e-06, |
|
"loss": 1.0455, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.3137162264595493e-06, |
|
"loss": 0.9796, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.181985348934931e-06, |
|
"loss": 1.1242, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.0571137226256067e-06, |
|
"loss": 1.0918, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 9.391234126954463e-07, |
|
"loss": 0.9396, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 8.280352683605764e-07, |
|
"loss": 1.0158, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.238689192052439e-07, |
|
"loss": 1.0224, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.266427717132218e-07, |
|
"loss": 0.9853, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5.363740060153522e-07, |
|
"loss": 1.0104, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.530785728537401e-07, |
|
"loss": 0.9941, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.76771190763221e-07, |
|
"loss": 1.0154, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.074653434705699e-07, |
|
"loss": 1.0677, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.4517327751187423e-07, |
|
"loss": 1.0048, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.8990600006854488e-07, |
|
"loss": 1.0934, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.4167327702230283e-07, |
|
"loss": 1.0325, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.0048363122954208e-07, |
|
"loss": 1.1128, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.634434101529863e-08, |
|
"loss": 1.0901, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.926143888715484e-08, |
|
"loss": 0.945, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.9239710469296512e-08, |
|
"loss": 0.9584, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.282693656842753e-09, |
|
"loss": 0.9839, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.9267799072817415e-10, |
|
"loss": 1.0157, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 14.180739706908584, |
|
"eval_loss": 1.003404140472412, |
|
"eval_rouge1": 62.4433, |
|
"eval_rouge2": 46.0114, |
|
"eval_rougeL": 60.5355, |
|
"eval_rougeLsum": 60.6392, |
|
"eval_runtime": 315.924, |
|
"eval_samples_per_second": 9.072, |
|
"eval_steps_per_second": 9.072, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 724, |
|
"total_flos": 2.8251529060286464e+16, |
|
"train_loss": 1.2205451955782116, |
|
"train_runtime": 2405.2096, |
|
"train_samples_per_second": 19.264, |
|
"train_steps_per_second": 0.301 |
|
} |
|
], |
|
"max_steps": 724, |
|
"num_train_epochs": 2, |
|
"total_flos": 2.8251529060286464e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|