|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.36951013513513514, |
|
"eval_steps": 406, |
|
"global_step": 1500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 10.780741691589355, |
|
"eval_runtime": 947.394, |
|
"eval_samples_per_second": 79.833, |
|
"eval_steps_per_second": 4.435, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.25e-05, |
|
"loss": 10.7434, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 9.9589, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.75e-05, |
|
"loss": 8.6595, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9e-05, |
|
"loss": 7.9676, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001125, |
|
"loss": 7.4358, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000135, |
|
"loss": 7.1452, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015749999999999998, |
|
"loss": 6.9396, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00018, |
|
"loss": 6.8089, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002025, |
|
"loss": 6.6897, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000225, |
|
"loss": 6.4553, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00022499913644577618, |
|
"loss": 6.3166, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002249965457963621, |
|
"loss": 6.1718, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00022499222809152964, |
|
"loss": 6.0497, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00022498618339756446, |
|
"loss": 5.9306, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00022497841180726518, |
|
"loss": 5.825, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00022496891343994188, |
|
"loss": 5.7394, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00022495768844141414, |
|
"loss": 5.6247, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000224944736984009, |
|
"loss": 5.5139, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00022493005926655827, |
|
"loss": 5.3914, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002249136555143953, |
|
"loss": 5.369, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00022489552597935173, |
|
"loss": 5.3003, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00022487567093975358, |
|
"loss": 5.2256, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00022485409070041688, |
|
"loss": 5.141, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00022483078559264308, |
|
"loss": 5.0825, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00022480575597421393, |
|
"loss": 5.0306, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002247790022293861, |
|
"loss": 4.9651, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000224750524768885, |
|
"loss": 4.9608, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00022472032402989878, |
|
"loss": 4.9164, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00022468840047607143, |
|
"loss": 4.8746, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00022465475459749576, |
|
"loss": 4.7804, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00022461938691070582, |
|
"loss": 4.7936, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000224582297958669, |
|
"loss": 4.7272, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00022454348831077767, |
|
"loss": 4.7058, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00022450295856284047, |
|
"loss": 4.6486, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002244607093370731, |
|
"loss": 4.6536, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002244167412820889, |
|
"loss": 4.5721, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00022437105507288872, |
|
"loss": 4.5511, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00022432365141085068, |
|
"loss": 4.4932, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00022427453102371933, |
|
"loss": 4.5251, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002242236946655946, |
|
"loss": 4.4763, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00022417114311692, |
|
"loss": 4.5116, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00022411687718447093, |
|
"loss": 4.4102, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00022406089770134205, |
|
"loss": 4.4343, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00022400320552693452, |
|
"loss": 4.4403, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000223943801546943, |
|
"loss": 4.4068, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002238826866733418, |
|
"loss": 4.3988, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00022381986184437112, |
|
"loss": 4.3703, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00022375532802452238, |
|
"loss": 4.303, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00022368908620452367, |
|
"loss": 4.2979, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00022362113740132436, |
|
"loss": 4.2974, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00022355148265807966, |
|
"loss": 4.2652, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00022348012304413426, |
|
"loss": 4.203, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00022340705965500642, |
|
"loss": 4.2045, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00022333229361237082, |
|
"loss": 4.2342, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00022325582606404126, |
|
"loss": 4.1869, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00022317765818395332, |
|
"loss": 4.0939, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00022309779117214617, |
|
"loss": 4.1401, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00022301622625474417, |
|
"loss": 4.0744, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00022293296468393808, |
|
"loss": 4.0818, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002228480077379657, |
|
"loss": 4.029, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00022276135672109258, |
|
"loss": 4.0574, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00022267301296359155, |
|
"loss": 4.0003, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00022258297782172258, |
|
"loss": 4.0143, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000222491252677712, |
|
"loss": 3.9962, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002223978389397311, |
|
"loss": 3.9994, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00022230273804187456, |
|
"loss": 3.9424, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00022220595144413854, |
|
"loss": 3.9215, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00022210748063239815, |
|
"loss": 3.9483, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00022200732711838466, |
|
"loss": 3.9037, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00022190549243966234, |
|
"loss": 3.8959, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002218019781596049, |
|
"loss": 3.8066, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00022169678586737127, |
|
"loss": 3.8306, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00022158991717788137, |
|
"loss": 3.7961, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00022148137373179146, |
|
"loss": 3.7739, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002213711571954686, |
|
"loss": 3.7727, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00022125926926096538, |
|
"loss": 3.7895, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002211457116459937, |
|
"loss": 3.7839, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00022103048609389868, |
|
"loss": 3.7261, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00022091359437363157, |
|
"loss": 3.7129, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00022079503827972293, |
|
"loss": 3.6765, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002206748196322547, |
|
"loss": 3.6463, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 3.6351399421691895, |
|
"eval_runtime": 955.1583, |
|
"eval_samples_per_second": 79.184, |
|
"eval_steps_per_second": 4.399, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00022055294027683266, |
|
"loss": 3.6138, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002204294020845578, |
|
"loss": 3.6069, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00022030420695199774, |
|
"loss": 3.5781, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00022017735680115755, |
|
"loss": 3.5925, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00022004885357945026, |
|
"loss": 3.6038, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000219918699259667, |
|
"loss": 3.5368, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00021978689583994666, |
|
"loss": 3.5982, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00021965344534374522, |
|
"loss": 3.5501, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002195183498198047, |
|
"loss": 3.5315, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00021938161134212177, |
|
"loss": 3.5229, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00021924323200991577, |
|
"loss": 3.5106, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00021910321394759662, |
|
"loss": 3.4851, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00021896155930473216, |
|
"loss": 3.4405, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00021881827025601504, |
|
"loss": 3.5036, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00021867334900122954, |
|
"loss": 3.4158, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002185267977652176, |
|
"loss": 3.465, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00021837861879784484, |
|
"loss": 3.3843, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002182288143739659, |
|
"loss": 3.3678, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00021807738679338953, |
|
"loss": 3.4079, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002179243383808433, |
|
"loss": 3.3826, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00021776967148593793, |
|
"loss": 3.4016, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00021761338848313123, |
|
"loss": 3.3715, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002174554917716916, |
|
"loss": 3.4046, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00021729598377566122, |
|
"loss": 3.3304, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00021713486694381875, |
|
"loss": 3.3419, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00021697214374964195, |
|
"loss": 3.3681, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002168078166912695, |
|
"loss": 3.3012, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00021664188829146277, |
|
"loss": 3.3551, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000216474361097567, |
|
"loss": 3.2824, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00021630523768147218, |
|
"loss": 3.3024, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00021613452063957379, |
|
"loss": 3.2661, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00021596221259273266, |
|
"loss": 3.2882, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002157883161862348, |
|
"loss": 3.3181, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00021561283408975097, |
|
"loss": 3.2574, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00021543576899729543, |
|
"loss": 3.2339, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00021525712362718483, |
|
"loss": 3.2554, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00021507690072199625, |
|
"loss": 3.1892, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00021489510304852536, |
|
"loss": 3.2946, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00021471173339774363, |
|
"loss": 3.2303, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00021452679458475567, |
|
"loss": 3.2891, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00021434028944875607, |
|
"loss": 3.224, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00021415222085298573, |
|
"loss": 3.2337, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00021396259168468773, |
|
"loss": 3.2158, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002137714048550634, |
|
"loss": 3.2126, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002135786632992273, |
|
"loss": 3.1934, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00021338436997616223, |
|
"loss": 3.2051, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00021318852786867388, |
|
"loss": 3.2667, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00021299113998334503, |
|
"loss": 3.1956, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00021279220935048926, |
|
"loss": 3.1771, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002125917390241046, |
|
"loss": 3.1467, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00021238973208182659, |
|
"loss": 3.1788, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00021218619162488095, |
|
"loss": 3.1967, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00021198112077803607, |
|
"loss": 3.149, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00021177452268955496, |
|
"loss": 3.154, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000211566400531147, |
|
"loss": 3.1652, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00021135675749791924, |
|
"loss": 3.1433, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00021114559680832722, |
|
"loss": 3.1893, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002109329217041257, |
|
"loss": 3.1788, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00021071873545031885, |
|
"loss": 3.1549, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00021050304133511018, |
|
"loss": 3.1294, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00021028584266985186, |
|
"loss": 3.1109, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00021006714278899415, |
|
"loss": 3.1713, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00020984694505003402, |
|
"loss": 3.1304, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00020962525283346376, |
|
"loss": 3.1285, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002094020695427188, |
|
"loss": 3.0582, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00020917739860412592, |
|
"loss": 3.1063, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00020895124346685017, |
|
"loss": 3.1309, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00020872360760284219, |
|
"loss": 3.1125, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002084944945067849, |
|
"loss": 3.1108, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00020826390769603968, |
|
"loss": 3.0765, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00020803185071059267, |
|
"loss": 3.0634, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000207798327113, |
|
"loss": 3.0993, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002075633404883336, |
|
"loss": 3.1127, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00020732689444412573, |
|
"loss": 3.0502, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002070889926103138, |
|
"loss": 3.0436, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002068496386391846, |
|
"loss": 3.0305, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002066088362053184, |
|
"loss": 3.0996, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00020636658900553213, |
|
"loss": 3.0584, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00020612290075882296, |
|
"loss": 3.0508, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00020587777520631126, |
|
"loss": 3.0578, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00020563121611118286, |
|
"loss": 3.0308, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 3.0524611473083496, |
|
"eval_runtime": 955.714, |
|
"eval_samples_per_second": 79.138, |
|
"eval_steps_per_second": 4.397, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00020538322725863146, |
|
"loss": 3.122, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00020513381245580064, |
|
"loss": 2.9886, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00020488297553172515, |
|
"loss": 3.0606, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00020463072033727225, |
|
"loss": 2.993, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00020437705074508264, |
|
"loss": 2.9999, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00020412197064951097, |
|
"loss": 3.0143, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.000203865483966566, |
|
"loss": 2.9886, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00020360759463385053, |
|
"loss": 3.0219, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00020334830661050102, |
|
"loss": 2.9888, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00020308762387712662, |
|
"loss": 3.0271, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00020282555043574823, |
|
"loss": 3.0063, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00020256209030973708, |
|
"loss": 3.0198, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00020229724754375266, |
|
"loss": 3.0135, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00020203102620368113, |
|
"loss": 3.0008, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00020176343037657242, |
|
"loss": 3.0168, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00020149446417057782, |
|
"loss": 3.042, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00020122413171488667, |
|
"loss": 2.9954, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00020095243715966316, |
|
"loss": 3.0191, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002006793846759825, |
|
"loss": 3.0343, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002004049784557669, |
|
"loss": 2.9843, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00020012922271172128, |
|
"loss": 3.0263, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019985212167726853, |
|
"loss": 2.975, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001995736796064845, |
|
"loss": 2.9858, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001992939007740328, |
|
"loss": 2.9526, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001990127894750991, |
|
"loss": 2.971, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019873035002532512, |
|
"loss": 2.9635, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019844658676074255, |
|
"loss": 2.9807, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001981615040377063, |
|
"loss": 2.9822, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019787510623282776, |
|
"loss": 2.9552, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019758739774290753, |
|
"loss": 2.9877, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019729838298486793, |
|
"loss": 2.974, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019700806639568524, |
|
"loss": 2.9613, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019671645243232155, |
|
"loss": 2.9949, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019642354557165633, |
|
"loss": 2.9876, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019612935031041768, |
|
"loss": 2.9644, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019583387116511335, |
|
"loss": 2.9204, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019553711267196136, |
|
"loss": 2.9849, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019523907938682038, |
|
"loss": 2.9622, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019493977588511978, |
|
"loss": 2.9457, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001946392067617894, |
|
"loss": 2.9558, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019433737663118898, |
|
"loss": 2.9107, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001940342901270374, |
|
"loss": 2.963, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019372995190234136, |
|
"loss": 2.8945, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019342436662932416, |
|
"loss": 2.9756, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019311753899935389, |
|
"loss": 2.9392, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019280947372287132, |
|
"loss": 2.9293, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019250017552931774, |
|
"loss": 2.947, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019218964916706223, |
|
"loss": 2.9317, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019187789940332882, |
|
"loss": 2.8816, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001915649310241233, |
|
"loss": 2.9644, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001912507488341597, |
|
"loss": 2.9219, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019093535765678663, |
|
"loss": 2.8983, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019061876233391313, |
|
"loss": 2.8816, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019030096772593432, |
|
"loss": 2.8844, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018998197871165692, |
|
"loss": 2.9207, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018966180018822423, |
|
"loss": 2.9553, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018934043707104098, |
|
"loss": 2.893, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001890178942936979, |
|
"loss": 2.9049, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018869417680789587, |
|
"loss": 2.8872, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018836928958337009, |
|
"loss": 2.9219, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018804323760781362, |
|
"loss": 2.9005, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00018771602588680083, |
|
"loss": 2.8814, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00018738765944371067, |
|
"loss": 2.8915, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00018705814331964945, |
|
"loss": 2.8777, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001867274825733734, |
|
"loss": 2.8821, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001863956822812112, |
|
"loss": 2.8803, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00018606274753698576, |
|
"loss": 2.8787, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00018572868345193632, |
|
"loss": 2.9365, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001853934951546398, |
|
"loss": 2.8642, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00018505718779093206, |
|
"loss": 2.8333, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001847197665238291, |
|
"loss": 2.9019, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00018438123653344746, |
|
"loss": 2.8726, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00018404160301692504, |
|
"loss": 2.8655, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00018370087118834102, |
|
"loss": 2.8639, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00018335904627863605, |
|
"loss": 2.8742, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00018301613353553182, |
|
"loss": 2.9013, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001826721382234505, |
|
"loss": 2.9131, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.000182327065623434, |
|
"loss": 2.9068, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001819809210330627, |
|
"loss": 2.9222, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001816337097663744, |
|
"loss": 2.8718, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00018128543715378252, |
|
"loss": 2.8504, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 2.8658618927001953, |
|
"eval_runtime": 957.687, |
|
"eval_samples_per_second": 78.975, |
|
"eval_steps_per_second": 4.388, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00018093610854199438, |
|
"loss": 2.8823, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00018058572929392902, |
|
"loss": 2.8528, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.000180234304788635, |
|
"loss": 2.9415, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001798818404212077, |
|
"loss": 2.8599, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00017952834160270655, |
|
"loss": 2.8218, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.000179173813760072, |
|
"loss": 2.8258, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00017881826233604204, |
|
"loss": 2.8397, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00017846169278906888, |
|
"loss": 2.8817, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00017810411059323498, |
|
"loss": 2.8424, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00017774552123816904, |
|
"loss": 2.845, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00017738593022896177, |
|
"loss": 2.8272, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00017702534308608133, |
|
"loss": 2.8452, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00017666376534528866, |
|
"loss": 2.8805, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00017630120255755235, |
|
"loss": 2.8824, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00017593766028896357, |
|
"loss": 2.8669, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001755731441206505, |
|
"loss": 2.8883, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001752076596486927, |
|
"loss": 2.813, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001748412124840353, |
|
"loss": 2.8275, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00017447380825240264, |
|
"loss": 2.8017, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00017410545259421208, |
|
"loss": 2.8368, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00017373615116448736, |
|
"loss": 2.8448, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00017336590963277173, |
|
"loss": 2.8353, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00017299473368304102, |
|
"loss": 2.8326, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00017262262901361627, |
|
"loss": 2.7981, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00017224960133707627, |
|
"loss": 2.828, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00017187565638017, |
|
"loss": 2.8209, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00017150079988372842, |
|
"loss": 2.8166, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001711250376025767, |
|
"loss": 2.7783, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00017074837530544557, |
|
"loss": 2.7564, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00017037081877488284, |
|
"loss": 2.7957, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001699923738071648, |
|
"loss": 2.8139, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016961304621220696, |
|
"loss": 2.7938, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016923284181347506, |
|
"loss": 2.8097, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016885176644789557, |
|
"loss": 2.8043, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016846982596576614, |
|
"loss": 2.7577, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001680870262306657, |
|
"loss": 2.7921, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00016770337311936456, |
|
"loss": 2.7836, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016731887252173408, |
|
"loss": 2.7963, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001669335303406563, |
|
"loss": 2.8547, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016654735249193334, |
|
"loss": 2.808, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016616034490419648, |
|
"loss": 2.7782, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016577251351881532, |
|
"loss": 2.76, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00016538386428980638, |
|
"loss": 2.7957, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001649944031837418, |
|
"loss": 2.819, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001646041361796578, |
|
"loss": 2.7574, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016421306926896266, |
|
"loss": 2.7939, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016382120845534497, |
|
"loss": 2.7814, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016342855975468135, |
|
"loss": 2.7271, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001630351291949442, |
|
"loss": 2.8333, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001626409228161089, |
|
"loss": 2.8128, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001622459466700615, |
|
"loss": 2.7752, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016185020682050541, |
|
"loss": 2.8474, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001614537093428685, |
|
"loss": 2.8078, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016105646032420982, |
|
"loss": 2.7696, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016065846586312617, |
|
"loss": 2.8652, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001602597320696584, |
|
"loss": 2.7888, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00015986026506519755, |
|
"loss": 2.7349, |
|
"step": 1500 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4059, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 1.8026854820610048e+19, |
|
"train_batch_size": 18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|