|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.975609756097561, |
|
"eval_steps": 500, |
|
"global_step": 800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.4390243902439027e-06, |
|
"loss": 2.6767, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.2195121951219513e-05, |
|
"loss": 2.7823, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.4390243902439026e-05, |
|
"loss": 2.4361, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.6585365853658535e-05, |
|
"loss": 2.535, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.878048780487805e-05, |
|
"loss": 2.5652, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.097560975609756e-05, |
|
"loss": 2.6216, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.317073170731707e-05, |
|
"loss": 2.1459, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.53658536585366e-05, |
|
"loss": 2.5116, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.75609756097561e-05, |
|
"loss": 2.5704, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00010975609756097563, |
|
"loss": 2.3636, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00012195121951219512, |
|
"loss": 2.6786, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00013414634146341464, |
|
"loss": 2.4003, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00014634146341463414, |
|
"loss": 2.4267, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00015853658536585366, |
|
"loss": 2.4149, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001707317073170732, |
|
"loss": 2.1072, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001829268292682927, |
|
"loss": 2.3557, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001951219512195122, |
|
"loss": 2.3392, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019999184556954776, |
|
"loss": 2.2043, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001999420177550043, |
|
"loss": 2.2218, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019984691491033906, |
|
"loss": 2.4057, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019970658011837404, |
|
"loss": 2.3682, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019952107695258992, |
|
"loss": 2.718, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019929048944832638, |
|
"loss": 2.4517, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019901492206471325, |
|
"loss": 2.5616, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019869449963734893, |
|
"loss": 2.496, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019832936732174834, |
|
"loss": 2.1452, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019791969052758562, |
|
"loss": 2.2789, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019746565484376132, |
|
"loss": 2.0612, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019696746595432828, |
|
"loss": 2.2248, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001964253495453141, |
|
"loss": 2.6121, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019583955120248237, |
|
"loss": 2.4211, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019521033630007928, |
|
"loss": 2.5135, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019453798988061535, |
|
"loss": 2.343, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019382281652573785, |
|
"loss": 2.5214, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019306514021825118, |
|
"loss": 2.6032, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019226530419534833, |
|
"loss": 2.7405, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019142367079312021, |
|
"loss": 2.2154, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019054062128241264, |
|
"loss": 2.5134, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00018961655569610557, |
|
"loss": 2.3687, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001886518926478932, |
|
"loss": 2.4462, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00018764706914264635, |
|
"loss": 2.0328, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 2.5844, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00018551877954036162, |
|
"loss": 2.5333, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00018439627758611385, |
|
"loss": 2.3423, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018323554302364272, |
|
"loss": 2.5999, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018203710168075788, |
|
"loss": 2.421, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001808014964669293, |
|
"loss": 2.5597, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00017952928712734268, |
|
"loss": 2.48, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00017822104998932713, |
|
"loss": 2.6025, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00017687737770127185, |
|
"loss": 2.3616, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00017549887896414851, |
|
"loss": 2.4311, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001740861782557618, |
|
"loss": 2.5858, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001726399155478529, |
|
"loss": 2.4726, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00017116074601618417, |
|
"loss": 2.4097, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001696493397437357, |
|
"loss": 2.4452, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016810638141714934, |
|
"loss": 2.341, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016653257001655652, |
|
"loss": 2.2435, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001649286184989315, |
|
"loss": 2.3826, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001632952534751122, |
|
"loss": 2.3505, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016163321488063637, |
|
"loss": 2.5566, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00015994325564054122, |
|
"loss": 2.3983, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00015822614132827837, |
|
"loss": 2.0597, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00015648264981889934, |
|
"loss": 2.3717, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00015471357093666804, |
|
"loss": 2.6116, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00015291970609726007, |
|
"loss": 2.3766, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00015110186794471103, |
|
"loss": 2.2623, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00014926087998327837, |
|
"loss": 2.3868, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00014739757620438307, |
|
"loss": 2.6517, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001455128007088009, |
|
"loss": 2.355, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00014360740732427367, |
|
"loss": 2.3603, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00014168225921871433, |
|
"loss": 2.4304, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00013973822850918055, |
|
"loss": 2.3809, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001377761958667946, |
|
"loss": 2.6495, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00013579705011778766, |
|
"loss": 2.4582, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00013380168784085027, |
|
"loss": 2.4106, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00013179101296097035, |
|
"loss": 2.1831, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00012976593633994346, |
|
"loss": 2.5222, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001277273753637408, |
|
"loss": 2.4411, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00012567625352692127, |
|
"loss": 2.468, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001236135000142765, |
|
"loss": 2.5124, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00012154004927989815, |
|
"loss": 2.2386, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00011945684062385803, |
|
"loss": 2.3717, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00011736481776669306, |
|
"loss": 2.4957, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00011526492842188745, |
|
"loss": 2.5043, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001131581238665465, |
|
"loss": 2.4179, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00011104535851045539, |
|
"loss": 2.3551, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00010892758946371944, |
|
"loss": 2.3083, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00010680577610318072, |
|
"loss": 2.5975, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00010468087963780789, |
|
"loss": 2.2378, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00010255386267325602, |
|
"loss": 2.2045, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00010042568877579388, |
|
"loss": 2.0761, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.829732203579584e-05, |
|
"loss": 2.3797, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.616972663099647e-05, |
|
"loss": 2.457, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.404386638970542e-05, |
|
"loss": 2.4869, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.192070435418079e-05, |
|
"loss": 2.3134, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.980120234435849e-05, |
|
"loss": 2.6018, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.768632052213531e-05, |
|
"loss": 2.3912, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.557701695640321e-05, |
|
"loss": 2.273, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.347424718903151e-05, |
|
"loss": 2.4595, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.13789638019942e-05, |
|
"loss": 2.4639, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.929211598583794e-05, |
|
"loss": 2.3833, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.721464910968627e-05, |
|
"loss": 2.1531, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.514750429297528e-05, |
|
"loss": 1.9377, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.309161797911441e-05, |
|
"loss": 2.3244, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.104792151126515e-05, |
|
"loss": 2.452, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.901734071043071e-05, |
|
"loss": 2.5114, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.700079545604708e-05, |
|
"loss": 2.4173, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.499919926926566e-05, |
|
"loss": 2.3595, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.301345889911637e-05, |
|
"loss": 2.5965, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.104447391173858e-05, |
|
"loss": 2.4791, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.909313628286601e-05, |
|
"loss": 2.3998, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.716032999375006e-05, |
|
"loss": 2.5947, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.524693063070492e-05, |
|
"loss": 2.5223, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5.335380498845559e-05, |
|
"loss": 2.4139, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.148181067746862e-05, |
|
"loss": 2.4851, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.963179573544357e-05, |
|
"loss": 2.3187, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.7804598243140666e-05, |
|
"loss": 2.5466, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.60010459447196e-05, |
|
"loss": 2.5622, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.422195587276058e-05, |
|
"loss": 2.3284, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.2468133978137945e-05, |
|
"loss": 2.1308, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.0740374764914136e-05, |
|
"loss": 2.5981, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.903946093041877e-05, |
|
"loss": 2.1314, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.736616301067694e-05, |
|
"loss": 2.2348, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.5721239031346066e-05, |
|
"loss": 2.5269, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.410543416432069e-05, |
|
"loss": 2.3657, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.2519480390159806e-05, |
|
"loss": 2.4194, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.096409616649023e-05, |
|
"loss": 2.4472, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.9439986102536043e-05, |
|
"loss": 2.0381, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.794784063992131e-05, |
|
"loss": 1.9038, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.6488335739891178e-05, |
|
"loss": 2.5467, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.50621325770927e-05, |
|
"loss": 2.3751, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.366987724005404e-05, |
|
"loss": 2.2535, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.2312200438498043e-05, |
|
"loss": 2.5885, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.0989717217622652e-05, |
|
"loss": 2.4353, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.9703026679477256e-05, |
|
"loss": 2.3635, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8452711711561842e-05, |
|
"loss": 2.4849, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.7239338722771327e-05, |
|
"loss": 2.3917, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.6063457386805004e-05, |
|
"loss": 2.7249, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4925600393157324e-05, |
|
"loss": 2.5142, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.3826283205802427e-05, |
|
"loss": 2.5111, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.2766003829682505e-05, |
|
"loss": 2.1423, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1745242585104955e-05, |
|
"loss": 2.2201, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.0764461890151112e-05, |
|
"loss": 2.5609, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.824106051194859e-06, |
|
"loss": 2.339, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.924601061626048e-06, |
|
"loss": 2.5648, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.066354408870048e-06, |
|
"loss": 2.3712, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.249754889790539e-06, |
|
"loss": 2.4474, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.475172434559573e-06, |
|
"loss": 2.4237, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.742957939074412e-06, |
|
"loss": 2.401, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.0534431059970685e-06, |
|
"loss": 2.6402, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.40694029448877e-06, |
|
"loss": 2.2489, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.803742378707198e-06, |
|
"loss": 2.1774, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.2441226151306404e-06, |
|
"loss": 2.4729, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.7283345187693264e-06, |
|
"loss": 2.1403, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.256611748319792e-06, |
|
"loss": 2.818, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.8291680003145073e-06, |
|
"loss": 2.8155, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.4461969123145457e-06, |
|
"loss": 2.2691, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.107871975189234e-06, |
|
"loss": 2.3321, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.143464545226298e-07, |
|
"loss": 2.3751, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.657533211820942e-07, |
|
"loss": 2.5208, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.622051910808666e-07, |
|
"loss": 1.9324, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"step": 800, |
|
"total_flos": 5.73623349805056e+16, |
|
"train_loss": 0.0, |
|
"train_runtime": 12.6248, |
|
"train_samples_per_second": 60.674, |
|
"train_steps_per_second": 60.674 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 766, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 5.73623349805056e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|