{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 724, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.6000000000000003e-05, "loss": 4.2523, "step": 3 }, { "epoch": 0.02, "learning_rate": 3.2000000000000005e-05, "loss": 3.4255, "step": 6 }, { "epoch": 0.02, "learning_rate": 4.8e-05, "loss": 2.5412, "step": 9 }, { "epoch": 0.03, "learning_rate": 6.400000000000001e-05, "loss": 2.0169, "step": 12 }, { "epoch": 0.04, "learning_rate": 8e-05, "loss": 1.8547, "step": 15 }, { "epoch": 0.05, "learning_rate": 7.999646594434211e-05, "loss": 1.8217, "step": 18 }, { "epoch": 0.06, "learning_rate": 7.998586440184589e-05, "loss": 1.7177, "step": 21 }, { "epoch": 0.07, "learning_rate": 7.996819724583341e-05, "loss": 1.6448, "step": 24 }, { "epoch": 0.07, "learning_rate": 7.99434675981403e-05, "loss": 1.6419, "step": 27 }, { "epoch": 0.08, "learning_rate": 7.991167982856416e-05, "loss": 1.595, "step": 30 }, { "epoch": 0.09, "learning_rate": 7.987283955409229e-05, "loss": 1.7162, "step": 33 }, { "epoch": 0.1, "learning_rate": 7.982695363790929e-05, "loss": 1.6641, "step": 36 }, { "epoch": 0.11, "learning_rate": 7.977403018818425e-05, "loss": 1.5268, "step": 39 }, { "epoch": 0.12, "learning_rate": 7.971407855663803e-05, "loss": 1.5187, "step": 42 }, { "epoch": 0.12, "learning_rate": 7.964710933689073e-05, "loss": 1.6022, "step": 45 }, { "epoch": 0.13, "learning_rate": 7.95731343625899e-05, "loss": 1.5216, "step": 48 }, { "epoch": 0.14, "learning_rate": 7.94921667053193e-05, "loss": 1.5416, "step": 51 }, { "epoch": 0.15, "learning_rate": 7.940422067228933e-05, "loss": 1.5302, "step": 54 }, { "epoch": 0.16, "learning_rate": 7.930931180380879e-05, "loss": 1.4887, "step": 57 }, { "epoch": 0.17, "learning_rate": 7.920745687053881e-05, "loss": 1.5794, "step": 60 }, { "epoch": 0.17, "learning_rate": 7.909867387052959e-05, "loss": 1.4197, "step": 63 }, { "epoch": 0.18, "learning_rate": 7.898298202603996e-05, "loss": 1.5554, "step": 66 }, { "epoch": 0.19, "learning_rate": 7.886040178014079e-05, "loss": 1.4569, "step": 69 }, { "epoch": 0.2, "learning_rate": 7.873095479310265e-05, "loss": 1.4795, "step": 72 }, { "epoch": 0.21, "learning_rate": 7.859466393856842e-05, "loss": 1.4088, "step": 75 }, { "epoch": 0.22, "learning_rate": 7.845155329951134e-05, "loss": 1.4127, "step": 78 }, { "epoch": 0.22, "learning_rate": 7.830164816397961e-05, "loss": 1.3549, "step": 81 }, { "epoch": 0.23, "learning_rate": 7.814497502062784e-05, "loss": 1.4085, "step": 84 }, { "epoch": 0.24, "learning_rate": 7.798156155403649e-05, "loss": 1.4669, "step": 87 }, { "epoch": 0.25, "learning_rate": 7.781143663981985e-05, "loss": 1.3935, "step": 90 }, { "epoch": 0.26, "learning_rate": 7.76346303395237e-05, "loss": 1.4601, "step": 93 }, { "epoch": 0.27, "learning_rate": 7.745117389531335e-05, "loss": 1.3549, "step": 96 }, { "epoch": 0.27, "learning_rate": 7.726109972445301e-05, "loss": 1.4819, "step": 99 }, { "epoch": 0.28, "learning_rate": 7.706444141357764e-05, "loss": 1.4633, "step": 102 }, { "epoch": 0.29, "learning_rate": 7.686123371275806e-05, "loss": 1.4074, "step": 105 }, { "epoch": 0.3, "learning_rate": 7.665151252936049e-05, "loss": 1.3739, "step": 108 }, { "epoch": 0.31, "learning_rate": 7.643531492170168e-05, "loss": 1.4575, "step": 111 }, { "epoch": 0.31, "learning_rate": 7.621267909250057e-05, "loss": 1.4109, "step": 114 }, { "epoch": 0.32, "learning_rate": 7.598364438212773e-05, "loss": 1.261, "step": 117 }, { "epoch": 0.33, "learning_rate": 7.574825126165386e-05, "loss": 1.251, "step": 120 }, { "epoch": 0.34, "learning_rate": 7.550654132569846e-05, "loss": 1.3583, "step": 123 }, { "epoch": 0.35, "learning_rate": 7.525855728507984e-05, "loss": 1.3513, "step": 126 }, { "epoch": 0.36, "learning_rate": 7.500434295926807e-05, "loss": 1.4371, "step": 129 }, { "epoch": 0.36, "learning_rate": 7.474394326864201e-05, "loss": 1.5398, "step": 132 }, { "epoch": 0.37, "learning_rate": 7.447740422655164e-05, "loss": 1.3364, "step": 135 }, { "epoch": 0.38, "learning_rate": 7.420477293118745e-05, "loss": 1.2326, "step": 138 }, { "epoch": 0.39, "learning_rate": 7.392609755725803e-05, "loss": 1.3098, "step": 141 }, { "epoch": 0.4, "learning_rate": 7.36414273474775e-05, "loss": 1.3438, "step": 144 }, { "epoch": 0.41, "learning_rate": 7.33508126038641e-05, "loss": 1.3329, "step": 147 }, { "epoch": 0.41, "learning_rate": 7.305430467885182e-05, "loss": 1.4064, "step": 150 }, { "epoch": 0.42, "learning_rate": 7.275195596621611e-05, "loss": 1.3769, "step": 153 }, { "epoch": 0.43, "learning_rate": 7.244381989181594e-05, "loss": 1.3437, "step": 156 }, { "epoch": 0.44, "learning_rate": 7.212995090415312e-05, "loss": 1.2524, "step": 159 }, { "epoch": 0.45, "learning_rate": 7.181040446475129e-05, "loss": 1.4045, "step": 162 }, { "epoch": 0.46, "learning_rate": 7.148523703835553e-05, "loss": 1.207, "step": 165 }, { "epoch": 0.46, "learning_rate": 7.115450608295498e-05, "loss": 1.2996, "step": 168 }, { "epoch": 0.47, "learning_rate": 7.081827003962987e-05, "loss": 1.2952, "step": 171 }, { "epoch": 0.48, "learning_rate": 7.047658832222475e-05, "loss": 1.4254, "step": 174 }, { "epoch": 0.49, "learning_rate": 7.012952130684995e-05, "loss": 1.3879, "step": 177 }, { "epoch": 0.5, "learning_rate": 6.977713032121295e-05, "loss": 1.3536, "step": 180 }, { "epoch": 0.51, "learning_rate": 6.941947763378157e-05, "loss": 1.2768, "step": 183 }, { "epoch": 0.51, "learning_rate": 6.905662644278099e-05, "loss": 1.3828, "step": 186 }, { "epoch": 0.52, "learning_rate": 6.868864086502643e-05, "loss": 1.2058, "step": 189 }, { "epoch": 0.53, "learning_rate": 6.831558592459356e-05, "loss": 1.2856, "step": 192 }, { "epoch": 0.54, "learning_rate": 6.793752754132852e-05, "loss": 1.2367, "step": 195 }, { "epoch": 0.55, "learning_rate": 6.755453251919973e-05, "loss": 1.3201, "step": 198 }, { "epoch": 0.56, "learning_rate": 6.716666853449342e-05, "loss": 1.3446, "step": 201 }, { "epoch": 0.56, "learning_rate": 6.67740041238551e-05, "loss": 1.2449, "step": 204 }, { "epoch": 0.57, "learning_rate": 6.637660867217884e-05, "loss": 1.2529, "step": 207 }, { "epoch": 0.58, "learning_rate": 6.59745524003469e-05, "loss": 1.3266, "step": 210 }, { "epoch": 0.59, "learning_rate": 6.556790635282136e-05, "loss": 1.2609, "step": 213 }, { "epoch": 0.6, "learning_rate": 6.515674238509048e-05, "loss": 1.231, "step": 216 }, { "epoch": 0.6, "learning_rate": 6.474113315097161e-05, "loss": 1.3123, "step": 219 }, { "epoch": 0.61, "learning_rate": 6.432115208977297e-05, "loss": 1.2846, "step": 222 }, { "epoch": 0.62, "learning_rate": 6.389687341331688e-05, "loss": 1.3302, "step": 225 }, { "epoch": 0.63, "learning_rate": 6.346837209282615e-05, "loss": 1.386, "step": 228 }, { "epoch": 0.64, "learning_rate": 6.303572384567662e-05, "loss": 1.332, "step": 231 }, { "epoch": 0.65, "learning_rate": 6.259900512201756e-05, "loss": 1.3034, "step": 234 }, { "epoch": 0.65, "learning_rate": 6.215829309126279e-05, "loss": 1.4256, "step": 237 }, { "epoch": 0.66, "learning_rate": 6.17136656284546e-05, "loss": 1.2721, "step": 240 }, { "epoch": 0.67, "learning_rate": 6.1265201300503e-05, "loss": 1.3103, "step": 243 }, { "epoch": 0.68, "learning_rate": 6.081297935230281e-05, "loss": 1.2451, "step": 246 }, { "epoch": 0.69, "learning_rate": 6.035707969273072e-05, "loss": 1.1757, "step": 249 }, { "epoch": 0.7, "learning_rate": 5.989758288052531e-05, "loss": 1.3589, "step": 252 }, { "epoch": 0.7, "learning_rate": 5.9434570110052036e-05, "loss": 1.275, "step": 255 }, { "epoch": 0.71, "learning_rate": 5.8968123196955955e-05, "loss": 1.3078, "step": 258 }, { "epoch": 0.72, "learning_rate": 5.8498324563704676e-05, "loss": 1.3537, "step": 261 }, { "epoch": 0.73, "learning_rate": 5.80252572250241e-05, "loss": 1.1313, "step": 264 }, { "epoch": 0.74, "learning_rate": 5.7549004773229474e-05, "loss": 1.2557, "step": 267 }, { "epoch": 0.75, "learning_rate": 5.706965136345439e-05, "loss": 1.208, "step": 270 }, { "epoch": 0.75, "learning_rate": 5.658728169878033e-05, "loss": 1.0945, "step": 273 }, { "epoch": 0.76, "learning_rate": 5.6101981015269436e-05, "loss": 1.3753, "step": 276 }, { "epoch": 0.77, "learning_rate": 5.561383506690303e-05, "loss": 1.2593, "step": 279 }, { "epoch": 0.78, "learning_rate": 5.512293011042863e-05, "loss": 1.2717, "step": 282 }, { "epoch": 0.79, "learning_rate": 5.462935289011821e-05, "loss": 1.2859, "step": 285 }, { "epoch": 0.8, "learning_rate": 5.4133190622440153e-05, "loss": 1.1984, "step": 288 }, { "epoch": 0.8, "learning_rate": 5.363453098064792e-05, "loss": 1.3106, "step": 291 }, { "epoch": 0.81, "learning_rate": 5.313346207928795e-05, "loss": 1.2159, "step": 294 }, { "epoch": 0.82, "learning_rate": 5.2630072458629526e-05, "loss": 1.1897, "step": 297 }, { "epoch": 0.83, "learning_rate": 5.2124451069019495e-05, "loss": 1.1248, "step": 300 }, { "epoch": 0.84, "learning_rate": 5.161668725516451e-05, "loss": 1.2375, "step": 303 }, { "epoch": 0.85, "learning_rate": 5.110687074034351e-05, "loss": 1.2367, "step": 306 }, { "epoch": 0.85, "learning_rate": 5.059509161055343e-05, "loss": 1.2432, "step": 309 }, { "epoch": 0.86, "learning_rate": 5.008144029859074e-05, "loss": 1.2744, "step": 312 }, { "epoch": 0.87, "learning_rate": 4.956600756807172e-05, "loss": 1.2705, "step": 315 }, { "epoch": 0.88, "learning_rate": 4.904888449739422e-05, "loss": 1.2448, "step": 318 }, { "epoch": 0.89, "learning_rate": 4.8530162463643935e-05, "loss": 1.1841, "step": 321 }, { "epoch": 0.9, "learning_rate": 4.800993312644778e-05, "loss": 1.3046, "step": 324 }, { "epoch": 0.9, "learning_rate": 4.748828841177738e-05, "loss": 1.1876, "step": 327 }, { "epoch": 0.91, "learning_rate": 4.6965320495705504e-05, "loss": 1.2061, "step": 330 }, { "epoch": 0.92, "learning_rate": 4.644112178811828e-05, "loss": 1.2982, "step": 333 }, { "epoch": 0.93, "learning_rate": 4.591578491638613e-05, "loss": 1.2507, "step": 336 }, { "epoch": 0.94, "learning_rate": 4.538940270899625e-05, "loss": 1.2313, "step": 339 }, { "epoch": 0.94, "learning_rate": 4.4862068179149546e-05, "loss": 1.2507, "step": 342 }, { "epoch": 0.95, "learning_rate": 4.4333874508324964e-05, "loss": 1.274, "step": 345 }, { "epoch": 0.96, "learning_rate": 4.3804915029814054e-05, "loss": 1.2024, "step": 348 }, { "epoch": 0.97, "learning_rate": 4.327528321222869e-05, "loss": 1.2804, "step": 351 }, { "epoch": 0.98, "learning_rate": 4.274507264298496e-05, "loss": 1.1524, "step": 354 }, { "epoch": 0.99, "learning_rate": 4.2214377011765956e-05, "loss": 1.236, "step": 357 }, { "epoch": 0.99, "learning_rate": 4.1683290093966603e-05, "loss": 1.2723, "step": 360 }, { "epoch": 1.0, "eval_gen_len": 14.044312630844383, "eval_loss": 1.032505989074707, "eval_rouge1": 61.6206, "eval_rouge2": 45.1199, "eval_rougeL": 59.6467, "eval_rougeLsum": 59.7534, "eval_runtime": 315.6416, "eval_samples_per_second": 9.08, "eval_steps_per_second": 9.08, "step": 362 }, { "epoch": 1.0, "learning_rate": 4.115190573412321e-05, "loss": 1.1678, "step": 363 }, { "epoch": 1.01, "learning_rate": 4.062031782933099e-05, "loss": 1.1245, "step": 366 }, { "epoch": 1.02, "learning_rate": 4.008862031265205e-05, "loss": 1.0944, "step": 369 }, { "epoch": 1.03, "learning_rate": 3.955690713651723e-05, "loss": 1.0338, "step": 372 }, { "epoch": 1.04, "learning_rate": 3.902527225612447e-05, "loss": 1.1155, "step": 375 }, { "epoch": 1.04, "learning_rate": 3.849380961283661e-05, "loss": 1.0579, "step": 378 }, { "epoch": 1.05, "learning_rate": 3.796261311758174e-05, "loss": 1.0659, "step": 381 }, { "epoch": 1.06, "learning_rate": 3.743177663425883e-05, "loss": 1.0031, "step": 384 }, { "epoch": 1.07, "learning_rate": 3.690139396315174e-05, "loss": 1.0458, "step": 387 }, { "epoch": 1.08, "learning_rate": 3.637155882435446e-05, "loss": 0.9631, "step": 390 }, { "epoch": 1.09, "learning_rate": 3.5842364841210466e-05, "loss": 1.0156, "step": 393 }, { "epoch": 1.09, "learning_rate": 3.53139055237693e-05, "loss": 1.0457, "step": 396 }, { "epoch": 1.1, "learning_rate": 3.478627425226299e-05, "loss": 1.0843, "step": 399 }, { "epoch": 1.11, "learning_rate": 3.4259564260605564e-05, "loss": 1.1081, "step": 402 }, { "epoch": 1.12, "learning_rate": 3.373386861991832e-05, "loss": 1.0335, "step": 405 }, { "epoch": 1.13, "learning_rate": 3.320928022208392e-05, "loss": 1.0572, "step": 408 }, { "epoch": 1.14, "learning_rate": 3.268589176333213e-05, "loss": 1.0875, "step": 411 }, { "epoch": 1.14, "learning_rate": 3.216379572786015e-05, "loss": 1.0419, "step": 414 }, { "epoch": 1.15, "learning_rate": 3.1643084371490394e-05, "loss": 1.0162, "step": 417 }, { "epoch": 1.16, "learning_rate": 3.112384970536862e-05, "loss": 1.1311, "step": 420 }, { "epoch": 1.17, "learning_rate": 3.060618347970529e-05, "loss": 1.0091, "step": 423 }, { "epoch": 1.18, "learning_rate": 3.0090177167563106e-05, "loss": 1.0725, "step": 426 }, { "epoch": 1.19, "learning_rate": 2.9575921948693394e-05, "loss": 0.9811, "step": 429 }, { "epoch": 1.19, "learning_rate": 2.906350869342447e-05, "loss": 1.1393, "step": 432 }, { "epoch": 1.2, "learning_rate": 2.8553027946604523e-05, "loss": 1.0846, "step": 435 }, { "epoch": 1.21, "learning_rate": 2.8044569911602134e-05, "loss": 1.0058, "step": 438 }, { "epoch": 1.22, "learning_rate": 2.7538224434367063e-05, "loss": 1.0463, "step": 441 }, { "epoch": 1.23, "learning_rate": 2.703408098755424e-05, "loss": 1.006, "step": 444 }, { "epoch": 1.23, "learning_rate": 2.6532228654713706e-05, "loss": 1.0281, "step": 447 }, { "epoch": 1.24, "learning_rate": 2.603275611454928e-05, "loss": 1.1484, "step": 450 }, { "epoch": 1.25, "learning_rate": 2.5535751625248784e-05, "loss": 1.1257, "step": 453 }, { "epoch": 1.26, "learning_rate": 2.5041303008888593e-05, "loss": 1.1191, "step": 456 }, { "epoch": 1.27, "learning_rate": 2.454949763591521e-05, "loss": 1.0099, "step": 459 }, { "epoch": 1.28, "learning_rate": 2.406042240970668e-05, "loss": 1.0287, "step": 462 }, { "epoch": 1.28, "learning_rate": 2.3574163751216513e-05, "loss": 0.9936, "step": 465 }, { "epoch": 1.29, "learning_rate": 2.30908075837029e-05, "loss": 0.959, "step": 468 }, { "epoch": 1.3, "learning_rate": 2.2610439317545723e-05, "loss": 1.0438, "step": 471 }, { "epoch": 1.31, "learning_rate": 2.213314383515447e-05, "loss": 0.9786, "step": 474 }, { "epoch": 1.32, "learning_rate": 2.1659005475969125e-05, "loss": 1.0546, "step": 477 }, { "epoch": 1.33, "learning_rate": 2.1188108021557236e-05, "loss": 1.058, "step": 480 }, { "epoch": 1.33, "learning_rate": 2.0720534680809452e-05, "loss": 1.0402, "step": 483 }, { "epoch": 1.34, "learning_rate": 2.0256368075236296e-05, "loss": 1.0674, "step": 486 }, { "epoch": 1.35, "learning_rate": 1.979569022436869e-05, "loss": 0.9716, "step": 489 }, { "epoch": 1.36, "learning_rate": 1.9338582531264908e-05, "loss": 0.9702, "step": 492 }, { "epoch": 1.37, "learning_rate": 1.8885125768126405e-05, "loss": 1.0379, "step": 495 }, { "epoch": 1.38, "learning_rate": 1.843540006202513e-05, "loss": 0.9711, "step": 498 }, { "epoch": 1.38, "learning_rate": 1.7989484880744917e-05, "loss": 1.0055, "step": 501 }, { "epoch": 1.39, "learning_rate": 1.754745901873923e-05, "loss": 0.943, "step": 504 }, { "epoch": 1.4, "learning_rate": 1.7109400583207977e-05, "loss": 1.0675, "step": 507 }, { "epoch": 1.41, "learning_rate": 1.667538698029581e-05, "loss": 1.0679, "step": 510 }, { "epoch": 1.42, "learning_rate": 1.624549490141417e-05, "loss": 0.9835, "step": 513 }, { "epoch": 1.43, "learning_rate": 1.581980030968974e-05, "loss": 1.0194, "step": 516 }, { "epoch": 1.43, "learning_rate": 1.5398378426541535e-05, "loss": 1.1163, "step": 519 }, { "epoch": 1.44, "learning_rate": 1.4981303718389088e-05, "loss": 1.0588, "step": 522 }, { "epoch": 1.45, "learning_rate": 1.4568649883494001e-05, "loss": 0.9728, "step": 525 }, { "epoch": 1.46, "learning_rate": 1.416048983893727e-05, "loss": 1.0166, "step": 528 }, { "epoch": 1.47, "learning_rate": 1.3756895707734637e-05, "loss": 1.0569, "step": 531 }, { "epoch": 1.48, "learning_rate": 1.3357938806092245e-05, "loss": 1.0751, "step": 534 }, { "epoch": 1.48, "learning_rate": 1.2963689630804854e-05, "loss": 0.9486, "step": 537 }, { "epoch": 1.49, "learning_rate": 1.2574217846798921e-05, "loss": 1.0278, "step": 540 }, { "epoch": 1.5, "learning_rate": 1.2189592274822526e-05, "loss": 1.0522, "step": 543 }, { "epoch": 1.51, "learning_rate": 1.1809880879284608e-05, "loss": 1.0255, "step": 546 }, { "epoch": 1.52, "learning_rate": 1.1435150756245439e-05, "loss": 1.0388, "step": 549 }, { "epoch": 1.52, "learning_rate": 1.1065468121560627e-05, "loss": 0.9952, "step": 552 }, { "epoch": 1.53, "learning_rate": 1.0700898299180493e-05, "loss": 1.084, "step": 555 }, { "epoch": 1.54, "learning_rate": 1.034150570960721e-05, "loss": 0.9855, "step": 558 }, { "epoch": 1.55, "learning_rate": 9.987353858511506e-06, "loss": 1.0436, "step": 561 }, { "epoch": 1.56, "learning_rate": 9.638505325511041e-06, "loss": 1.0048, "step": 564 }, { "epoch": 1.57, "learning_rate": 9.295021753112402e-06, "loss": 1.0402, "step": 567 }, { "epoch": 1.57, "learning_rate": 8.956963835818708e-06, "loss": 1.0207, "step": 570 }, { "epoch": 1.58, "learning_rate": 8.62439130940472e-06, "loss": 0.9986, "step": 573 }, { "epoch": 1.59, "learning_rate": 8.297362940361386e-06, "loss": 0.9838, "step": 576 }, { "epoch": 1.6, "learning_rate": 7.975936515511598e-06, "loss": 1.0181, "step": 579 }, { "epoch": 1.61, "learning_rate": 7.660168831799115e-06, "loss": 1.0555, "step": 582 }, { "epoch": 1.62, "learning_rate": 7.350115686252399e-06, "loss": 0.9745, "step": 585 }, { "epoch": 1.62, "learning_rate": 7.045831866125117e-06, "loss": 1.0337, "step": 588 }, { "epoch": 1.63, "learning_rate": 6.747371139215069e-06, "loss": 1.0722, "step": 591 }, { "epoch": 1.64, "learning_rate": 6.454786244363292e-06, "loss": 1.1129, "step": 594 }, { "epoch": 1.65, "learning_rate": 6.168128882134934e-06, "loss": 1.0271, "step": 597 }, { "epoch": 1.66, "learning_rate": 5.887449705683632e-06, "loss": 1.074, "step": 600 }, { "epoch": 1.67, "learning_rate": 5.61279831180098e-06, "loss": 0.9695, "step": 603 }, { "epoch": 1.67, "learning_rate": 5.344223232152596e-06, "loss": 1.0043, "step": 606 }, { "epoch": 1.68, "learning_rate": 5.081771924702468e-06, "loss": 1.0169, "step": 609 }, { "epoch": 1.69, "learning_rate": 4.825490765327003e-06, "loss": 0.9531, "step": 612 }, { "epoch": 1.7, "learning_rate": 4.575425039620265e-06, "loss": 1.0418, "step": 615 }, { "epoch": 1.71, "learning_rate": 4.3316189348918855e-06, "loss": 1.1068, "step": 618 }, { "epoch": 1.72, "learning_rate": 4.094115532359064e-06, "loss": 0.9387, "step": 621 }, { "epoch": 1.72, "learning_rate": 3.862956799533977e-06, "loss": 1.0124, "step": 624 }, { "epoch": 1.73, "learning_rate": 3.6381835828079946e-06, "loss": 0.9843, "step": 627 }, { "epoch": 1.74, "learning_rate": 3.4198356002340405e-06, "loss": 1.0601, "step": 630 }, { "epoch": 1.75, "learning_rate": 3.2079514345082764e-06, "loss": 1.0091, "step": 633 }, { "epoch": 1.76, "learning_rate": 3.0025685261524297e-06, "loss": 1.04, "step": 636 }, { "epoch": 1.77, "learning_rate": 2.803723166897965e-06, "loss": 0.9952, "step": 639 }, { "epoch": 1.77, "learning_rate": 2.611450493273244e-06, "loss": 0.9844, "step": 642 }, { "epoch": 1.78, "learning_rate": 2.4257844803947573e-06, "loss": 1.0642, "step": 645 }, { "epoch": 1.79, "learning_rate": 2.2467579359636726e-06, "loss": 0.9943, "step": 648 }, { "epoch": 1.8, "learning_rate": 2.0744024944685968e-06, "loss": 0.9946, "step": 651 }, { "epoch": 1.81, "learning_rate": 1.9087486115956987e-06, "loss": 0.9849, "step": 654 }, { "epoch": 1.81, "learning_rate": 1.7498255588470803e-06, "loss": 1.0182, "step": 657 }, { "epoch": 1.82, "learning_rate": 1.5976614183684214e-06, "loss": 1.0031, "step": 660 }, { "epoch": 1.83, "learning_rate": 1.452283077986807e-06, "loss": 1.0455, "step": 663 }, { "epoch": 1.84, "learning_rate": 1.3137162264595493e-06, "loss": 0.9796, "step": 666 }, { "epoch": 1.85, "learning_rate": 1.181985348934931e-06, "loss": 1.1242, "step": 669 }, { "epoch": 1.86, "learning_rate": 1.0571137226256067e-06, "loss": 1.0918, "step": 672 }, { "epoch": 1.86, "learning_rate": 9.391234126954463e-07, "loss": 0.9396, "step": 675 }, { "epoch": 1.87, "learning_rate": 8.280352683605764e-07, "loss": 1.0158, "step": 678 }, { "epoch": 1.88, "learning_rate": 7.238689192052439e-07, "loss": 1.0224, "step": 681 }, { "epoch": 1.89, "learning_rate": 6.266427717132218e-07, "loss": 0.9853, "step": 684 }, { "epoch": 1.9, "learning_rate": 5.363740060153522e-07, "loss": 1.0104, "step": 687 }, { "epoch": 1.91, "learning_rate": 4.530785728537401e-07, "loss": 0.9941, "step": 690 }, { "epoch": 1.91, "learning_rate": 3.76771190763221e-07, "loss": 1.0154, "step": 693 }, { "epoch": 1.92, "learning_rate": 3.074653434705699e-07, "loss": 1.0677, "step": 696 }, { "epoch": 1.93, "learning_rate": 2.4517327751187423e-07, "loss": 1.0048, "step": 699 }, { "epoch": 1.94, "learning_rate": 1.8990600006854488e-07, "loss": 1.0934, "step": 702 }, { "epoch": 1.95, "learning_rate": 1.4167327702230283e-07, "loss": 1.0325, "step": 705 }, { "epoch": 1.96, "learning_rate": 1.0048363122954208e-07, "loss": 1.1128, "step": 708 }, { "epoch": 1.96, "learning_rate": 6.634434101529863e-08, "loss": 1.0901, "step": 711 }, { "epoch": 1.97, "learning_rate": 3.926143888715484e-08, "loss": 0.945, "step": 714 }, { "epoch": 1.98, "learning_rate": 1.9239710469296512e-08, "loss": 0.9584, "step": 717 }, { "epoch": 1.99, "learning_rate": 6.282693656842753e-09, "loss": 0.9839, "step": 720 }, { "epoch": 2.0, "learning_rate": 3.9267799072817415e-10, "loss": 1.0157, "step": 723 }, { "epoch": 2.0, "eval_gen_len": 14.180739706908584, "eval_loss": 1.003404140472412, "eval_rouge1": 62.4433, "eval_rouge2": 46.0114, "eval_rougeL": 60.5355, "eval_rougeLsum": 60.6392, "eval_runtime": 315.924, "eval_samples_per_second": 9.072, "eval_steps_per_second": 9.072, "step": 724 }, { "epoch": 2.0, "step": 724, "total_flos": 2.8251529060286464e+16, "train_loss": 1.2205451955782116, "train_runtime": 2405.2096, "train_samples_per_second": 19.264, "train_steps_per_second": 0.301 } ], "max_steps": 724, "num_train_epochs": 2, "total_flos": 2.8251529060286464e+16, "trial_name": null, "trial_params": null }