|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.98769987699877, |
|
"eval_steps": 500, |
|
"global_step": 6090, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016400164001640016, |
|
"grad_norm": 0.49903953075408936, |
|
"learning_rate": 0.00019999866944080257, |
|
"loss": 1.1198, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03280032800328003, |
|
"grad_norm": 0.38635629415512085, |
|
"learning_rate": 0.00019999467779861793, |
|
"loss": 0.957, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04920049200492005, |
|
"grad_norm": 0.3574012815952301, |
|
"learning_rate": 0.0001999880251796685, |
|
"loss": 0.8963, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06560065600656007, |
|
"grad_norm": 0.36165040731430054, |
|
"learning_rate": 0.00019997871176098827, |
|
"loss": 0.8893, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08200082000820008, |
|
"grad_norm": 0.3633221983909607, |
|
"learning_rate": 0.0001999667377904184, |
|
"loss": 0.8788, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0984009840098401, |
|
"grad_norm": 0.3308263123035431, |
|
"learning_rate": 0.00019995210358660038, |
|
"loss": 0.8597, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11480114801148011, |
|
"grad_norm": 0.37404152750968933, |
|
"learning_rate": 0.0001999348095389677, |
|
"loss": 0.8394, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13120131201312013, |
|
"grad_norm": 0.36840370297431946, |
|
"learning_rate": 0.0001999148561077355, |
|
"loss": 0.8305, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14760147601476015, |
|
"grad_norm": 0.33561620116233826, |
|
"learning_rate": 0.00019989224382388813, |
|
"loss": 0.836, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.16400164001640016, |
|
"grad_norm": 0.34898167848587036, |
|
"learning_rate": 0.00019986697328916526, |
|
"loss": 0.8259, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18040180401804018, |
|
"grad_norm": 0.32518813014030457, |
|
"learning_rate": 0.00019983904517604576, |
|
"loss": 0.8255, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1968019680196802, |
|
"grad_norm": 0.34965017437934875, |
|
"learning_rate": 0.00019980846022772978, |
|
"loss": 0.8192, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2132021320213202, |
|
"grad_norm": 0.3208518326282501, |
|
"learning_rate": 0.00019977521925811903, |
|
"loss": 0.8177, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22960229602296023, |
|
"grad_norm": 0.3215318024158478, |
|
"learning_rate": 0.000199739323151795, |
|
"loss": 0.8046, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.24600246002460024, |
|
"grad_norm": 0.3197932541370392, |
|
"learning_rate": 0.0001997007728639956, |
|
"loss": 0.8031, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.26240262402624026, |
|
"grad_norm": 0.3127509355545044, |
|
"learning_rate": 0.0001996595694205897, |
|
"loss": 0.7911, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2788027880278803, |
|
"grad_norm": 0.3898150324821472, |
|
"learning_rate": 0.00019961571391804962, |
|
"loss": 0.8013, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2952029520295203, |
|
"grad_norm": 0.32089415192604065, |
|
"learning_rate": 0.00019956920752342225, |
|
"loss": 0.7959, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3116031160311603, |
|
"grad_norm": 0.3503674864768982, |
|
"learning_rate": 0.0001995200514742978, |
|
"loss": 0.7869, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3280032800328003, |
|
"grad_norm": 0.36325424909591675, |
|
"learning_rate": 0.00019946824707877693, |
|
"loss": 0.7813, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.34440344403444034, |
|
"grad_norm": 0.3318350613117218, |
|
"learning_rate": 0.00019941379571543596, |
|
"loss": 0.796, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.36080360803608036, |
|
"grad_norm": 0.3133643865585327, |
|
"learning_rate": 0.00019935669883329013, |
|
"loss": 0.7832, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3772037720377204, |
|
"grad_norm": 0.3414137363433838, |
|
"learning_rate": 0.00019929695795175507, |
|
"loss": 0.7907, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3936039360393604, |
|
"grad_norm": 0.32717064023017883, |
|
"learning_rate": 0.00019923457466060636, |
|
"loss": 0.7824, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4100041000410004, |
|
"grad_norm": 0.3150332272052765, |
|
"learning_rate": 0.00019916955061993725, |
|
"loss": 0.7992, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4264042640426404, |
|
"grad_norm": 0.3353354334831238, |
|
"learning_rate": 0.00019910188756011446, |
|
"loss": 0.7782, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.44280442804428044, |
|
"grad_norm": 0.2986792325973511, |
|
"learning_rate": 0.00019903158728173205, |
|
"loss": 0.7609, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.45920459204592046, |
|
"grad_norm": 0.3223719596862793, |
|
"learning_rate": 0.00019895865165556377, |
|
"loss": 0.7785, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4756047560475605, |
|
"grad_norm": 0.3108189105987549, |
|
"learning_rate": 0.00019888308262251285, |
|
"loss": 0.7762, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4920049200492005, |
|
"grad_norm": 0.3153323829174042, |
|
"learning_rate": 0.00019880488219356087, |
|
"loss": 0.7797, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5084050840508405, |
|
"grad_norm": 0.33102768659591675, |
|
"learning_rate": 0.00019872405244971374, |
|
"loss": 0.7731, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5248052480524805, |
|
"grad_norm": 0.29496216773986816, |
|
"learning_rate": 0.00019864059554194669, |
|
"loss": 0.7648, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5412054120541205, |
|
"grad_norm": 0.3221481740474701, |
|
"learning_rate": 0.00019855451369114676, |
|
"loss": 0.7762, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5576055760557606, |
|
"grad_norm": 0.3474864065647125, |
|
"learning_rate": 0.000198465809188054, |
|
"loss": 0.7691, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5740057400574006, |
|
"grad_norm": 0.36715272068977356, |
|
"learning_rate": 0.00019837448439320027, |
|
"loss": 0.7751, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5904059040590406, |
|
"grad_norm": 0.33955511450767517, |
|
"learning_rate": 0.00019828054173684644, |
|
"loss": 0.7791, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6068060680606806, |
|
"grad_norm": 0.3461228311061859, |
|
"learning_rate": 0.00019818398371891784, |
|
"loss": 0.7728, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6232062320623206, |
|
"grad_norm": 0.32821011543273926, |
|
"learning_rate": 0.0001980848129089376, |
|
"loss": 0.7736, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6396063960639606, |
|
"grad_norm": 0.3576536476612091, |
|
"learning_rate": 0.00019798303194595846, |
|
"loss": 0.756, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6560065600656007, |
|
"grad_norm": 0.3349688649177551, |
|
"learning_rate": 0.00019787864353849232, |
|
"loss": 0.765, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6724067240672407, |
|
"grad_norm": 0.2960722744464874, |
|
"learning_rate": 0.00019777165046443824, |
|
"loss": 0.7676, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6888068880688807, |
|
"grad_norm": 0.31184637546539307, |
|
"learning_rate": 0.00019766205557100868, |
|
"loss": 0.7366, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7052070520705207, |
|
"grad_norm": 0.3365538418292999, |
|
"learning_rate": 0.00019754986177465344, |
|
"loss": 0.7647, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7216072160721607, |
|
"grad_norm": 0.3039809763431549, |
|
"learning_rate": 0.00019743507206098233, |
|
"loss": 0.741, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7380073800738007, |
|
"grad_norm": 0.32564470171928406, |
|
"learning_rate": 0.00019731768948468549, |
|
"loss": 0.7576, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7544075440754408, |
|
"grad_norm": 0.33124250173568726, |
|
"learning_rate": 0.00019719771716945227, |
|
"loss": 0.7392, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7708077080770808, |
|
"grad_norm": 0.3306678235530853, |
|
"learning_rate": 0.00019707515830788802, |
|
"loss": 0.7513, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7872078720787208, |
|
"grad_norm": 0.31717410683631897, |
|
"learning_rate": 0.00019695001616142915, |
|
"loss": 0.7419, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8036080360803608, |
|
"grad_norm": 0.3117675483226776, |
|
"learning_rate": 0.00019682229406025635, |
|
"loss": 0.7543, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8200082000820008, |
|
"grad_norm": 0.3408834636211395, |
|
"learning_rate": 0.0001966919954032059, |
|
"loss": 0.7356, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8364083640836408, |
|
"grad_norm": 0.3456558883190155, |
|
"learning_rate": 0.0001965591236576794, |
|
"loss": 0.7509, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8528085280852808, |
|
"grad_norm": 0.30214881896972656, |
|
"learning_rate": 0.00019642368235955125, |
|
"loss": 0.7483, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8692086920869209, |
|
"grad_norm": 0.3334808945655823, |
|
"learning_rate": 0.0001962856751130748, |
|
"loss": 0.7532, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8856088560885609, |
|
"grad_norm": 0.3257792890071869, |
|
"learning_rate": 0.00019614510559078625, |
|
"loss": 0.7465, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9020090200902009, |
|
"grad_norm": 0.4038065969944, |
|
"learning_rate": 0.00019600197753340699, |
|
"loss": 0.7555, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9184091840918409, |
|
"grad_norm": 0.32042616605758667, |
|
"learning_rate": 0.00019585629474974415, |
|
"loss": 0.7395, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9348093480934809, |
|
"grad_norm": 0.3661757707595825, |
|
"learning_rate": 0.00019570806111658898, |
|
"loss": 0.7447, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.951209512095121, |
|
"grad_norm": 0.31112104654312134, |
|
"learning_rate": 0.0001955572805786141, |
|
"loss": 0.7333, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.967609676096761, |
|
"grad_norm": 0.31758418679237366, |
|
"learning_rate": 0.000195403957148268, |
|
"loss": 0.7243, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.984009840098401, |
|
"grad_norm": 0.3203936219215393, |
|
"learning_rate": 0.00019524809490566877, |
|
"loss": 0.744, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.000410004100041, |
|
"grad_norm": 0.3630532920360565, |
|
"learning_rate": 0.00019508969799849523, |
|
"loss": 0.7249, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.016810168101681, |
|
"grad_norm": 0.31734269857406616, |
|
"learning_rate": 0.00019492877064187654, |
|
"loss": 0.7088, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.033210332103321, |
|
"grad_norm": 0.3637007176876068, |
|
"learning_rate": 0.00019476531711828027, |
|
"loss": 0.7139, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.049610496104961, |
|
"grad_norm": 0.3162935972213745, |
|
"learning_rate": 0.00019459934177739813, |
|
"loss": 0.7064, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.066010660106601, |
|
"grad_norm": 0.32640552520751953, |
|
"learning_rate": 0.0001944308490360305, |
|
"loss": 0.7161, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.082410824108241, |
|
"grad_norm": 0.34453850984573364, |
|
"learning_rate": 0.0001942598433779687, |
|
"loss": 0.7095, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.098810988109881, |
|
"grad_norm": 0.3382568061351776, |
|
"learning_rate": 0.00019408632935387577, |
|
"loss": 0.7092, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.115211152111521, |
|
"grad_norm": 0.3298267722129822, |
|
"learning_rate": 0.0001939103115811653, |
|
"loss": 0.7154, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1316113161131611, |
|
"grad_norm": 0.33924031257629395, |
|
"learning_rate": 0.00019373179474387858, |
|
"loss": 0.6945, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1480114801148011, |
|
"grad_norm": 0.3305610418319702, |
|
"learning_rate": 0.0001935507835925601, |
|
"loss": 0.7156, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1644116441164412, |
|
"grad_norm": 0.3400373160839081, |
|
"learning_rate": 0.00019336728294413083, |
|
"loss": 0.7091, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.1808118081180812, |
|
"grad_norm": 0.3354150056838989, |
|
"learning_rate": 0.00019318129768176032, |
|
"loss": 0.7019, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1972119721197212, |
|
"grad_norm": 0.3427719175815582, |
|
"learning_rate": 0.00019299283275473657, |
|
"loss": 0.7157, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.2136121361213612, |
|
"grad_norm": 0.3088740408420563, |
|
"learning_rate": 0.00019280189317833445, |
|
"loss": 0.7115, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2300123001230012, |
|
"grad_norm": 0.34402570128440857, |
|
"learning_rate": 0.0001926084840336821, |
|
"loss": 0.7197, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2464124641246412, |
|
"grad_norm": 0.3542528450489044, |
|
"learning_rate": 0.0001924126104676259, |
|
"loss": 0.7103, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2628126281262813, |
|
"grad_norm": 0.3533455431461334, |
|
"learning_rate": 0.00019221427769259333, |
|
"loss": 0.7209, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.2792127921279213, |
|
"grad_norm": 0.3470947742462158, |
|
"learning_rate": 0.00019201349098645434, |
|
"loss": 0.7137, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.2956129561295613, |
|
"grad_norm": 0.3277159035205841, |
|
"learning_rate": 0.0001918102556923809, |
|
"loss": 0.7036, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.3120131201312013, |
|
"grad_norm": 0.35741642117500305, |
|
"learning_rate": 0.0001916045772187048, |
|
"loss": 0.7013, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3284132841328413, |
|
"grad_norm": 0.33222687244415283, |
|
"learning_rate": 0.0001913964610387738, |
|
"loss": 0.7052, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.3448134481344813, |
|
"grad_norm": 0.3761126399040222, |
|
"learning_rate": 0.00019118591269080574, |
|
"loss": 0.695, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.3612136121361214, |
|
"grad_norm": 0.34687796235084534, |
|
"learning_rate": 0.00019097293777774153, |
|
"loss": 0.7053, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.3776137761377614, |
|
"grad_norm": 0.3391115367412567, |
|
"learning_rate": 0.00019075754196709572, |
|
"loss": 0.6842, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.3940139401394014, |
|
"grad_norm": 0.35560715198516846, |
|
"learning_rate": 0.00019053973099080585, |
|
"loss": 0.6925, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.4104141041410414, |
|
"grad_norm": 0.35144075751304626, |
|
"learning_rate": 0.00019031951064507987, |
|
"loss": 0.7063, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4268142681426814, |
|
"grad_norm": 0.3776865005493164, |
|
"learning_rate": 0.0001900968867902419, |
|
"loss": 0.7157, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.4432144321443214, |
|
"grad_norm": 0.3723245859146118, |
|
"learning_rate": 0.00018987186535057634, |
|
"loss": 0.7064, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.4596145961459615, |
|
"grad_norm": 0.3623642921447754, |
|
"learning_rate": 0.0001896444523141701, |
|
"loss": 0.7056, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.4760147601476015, |
|
"grad_norm": 0.34806951880455017, |
|
"learning_rate": 0.0001894146537327533, |
|
"loss": 0.7086, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4924149241492415, |
|
"grad_norm": 0.36614471673965454, |
|
"learning_rate": 0.00018918247572153823, |
|
"loss": 0.7032, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.5088150881508815, |
|
"grad_norm": 0.351053923368454, |
|
"learning_rate": 0.00018894792445905674, |
|
"loss": 0.6988, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.5252152521525215, |
|
"grad_norm": 0.37270405888557434, |
|
"learning_rate": 0.00018871100618699554, |
|
"loss": 0.6926, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.5416154161541615, |
|
"grad_norm": 0.3521835207939148, |
|
"learning_rate": 0.00018847172721003043, |
|
"loss": 0.7004, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.5580155801558015, |
|
"grad_norm": 0.37083443999290466, |
|
"learning_rate": 0.00018823009389565818, |
|
"loss": 0.7141, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.5744157441574416, |
|
"grad_norm": 0.36266541481018066, |
|
"learning_rate": 0.00018798611267402746, |
|
"loss": 0.7029, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.5908159081590816, |
|
"grad_norm": 0.34403783082962036, |
|
"learning_rate": 0.0001877397900377674, |
|
"loss": 0.7022, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.6072160721607216, |
|
"grad_norm": 0.37788769602775574, |
|
"learning_rate": 0.00018749113254181498, |
|
"loss": 0.7049, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.6236162361623616, |
|
"grad_norm": 0.35607653856277466, |
|
"learning_rate": 0.00018724014680324057, |
|
"loss": 0.6957, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.6400164001640016, |
|
"grad_norm": 0.34468725323677063, |
|
"learning_rate": 0.00018698683950107184, |
|
"loss": 0.7148, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6564165641656416, |
|
"grad_norm": 0.36872032284736633, |
|
"learning_rate": 0.00018673121737611598, |
|
"loss": 0.6922, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.6728167281672817, |
|
"grad_norm": 0.34786421060562134, |
|
"learning_rate": 0.00018647328723078038, |
|
"loss": 0.7021, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.6892168921689217, |
|
"grad_norm": 0.370241641998291, |
|
"learning_rate": 0.00018621305592889163, |
|
"loss": 0.6789, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.7056170561705617, |
|
"grad_norm": 0.3378838300704956, |
|
"learning_rate": 0.00018595053039551274, |
|
"loss": 0.6741, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.7220172201722017, |
|
"grad_norm": 0.3661263883113861, |
|
"learning_rate": 0.00018568571761675893, |
|
"loss": 0.6808, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.7384173841738417, |
|
"grad_norm": 0.3620483875274658, |
|
"learning_rate": 0.0001854186246396118, |
|
"loss": 0.6895, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.7548175481754817, |
|
"grad_norm": 0.3466210663318634, |
|
"learning_rate": 0.00018514925857173177, |
|
"loss": 0.6876, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.7712177121771218, |
|
"grad_norm": 0.36353030800819397, |
|
"learning_rate": 0.0001848776265812687, |
|
"loss": 0.6876, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.7876178761787618, |
|
"grad_norm": 0.3688850998878479, |
|
"learning_rate": 0.00018460373589667154, |
|
"loss": 0.6881, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.8040180401804018, |
|
"grad_norm": 0.35844072699546814, |
|
"learning_rate": 0.00018432759380649562, |
|
"loss": 0.7038, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8204182041820418, |
|
"grad_norm": 0.38838833570480347, |
|
"learning_rate": 0.00018404920765920896, |
|
"loss": 0.6961, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.8368183681836818, |
|
"grad_norm": 0.34356170892715454, |
|
"learning_rate": 0.00018376858486299647, |
|
"loss": 0.6849, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.8532185321853218, |
|
"grad_norm": 0.3590626120567322, |
|
"learning_rate": 0.00018348573288556308, |
|
"loss": 0.68, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.8696186961869619, |
|
"grad_norm": 0.3639477491378784, |
|
"learning_rate": 0.00018320065925393468, |
|
"loss": 0.6996, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.8860188601886019, |
|
"grad_norm": 0.39745190739631653, |
|
"learning_rate": 0.00018291337155425821, |
|
"loss": 0.6901, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.902419024190242, |
|
"grad_norm": 0.37302690744400024, |
|
"learning_rate": 0.0001826238774315995, |
|
"loss": 0.6784, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.918819188191882, |
|
"grad_norm": 0.37480470538139343, |
|
"learning_rate": 0.00018233218458973984, |
|
"loss": 0.6806, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.935219352193522, |
|
"grad_norm": 0.37153083086013794, |
|
"learning_rate": 0.00018203830079097113, |
|
"loss": 0.6853, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.951619516195162, |
|
"grad_norm": 0.3530969023704529, |
|
"learning_rate": 0.00018174223385588917, |
|
"loss": 0.6785, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.968019680196802, |
|
"grad_norm": 0.37324872612953186, |
|
"learning_rate": 0.00018144399166318572, |
|
"loss": 0.6733, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.984419844198442, |
|
"grad_norm": 0.3680344223976135, |
|
"learning_rate": 0.00018114358214943853, |
|
"loss": 0.6885, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.000820008200082, |
|
"grad_norm": 0.42146405577659607, |
|
"learning_rate": 0.00018084101330890048, |
|
"loss": 0.6759, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.0172201722017222, |
|
"grad_norm": 0.3808436989784241, |
|
"learning_rate": 0.00018053629319328662, |
|
"loss": 0.655, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.033620336203362, |
|
"grad_norm": 0.39610400795936584, |
|
"learning_rate": 0.00018022942991156, |
|
"loss": 0.6675, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.0500205002050023, |
|
"grad_norm": 0.3906943202018738, |
|
"learning_rate": 0.00017992043162971588, |
|
"loss": 0.6451, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.066420664206642, |
|
"grad_norm": 0.4022831916809082, |
|
"learning_rate": 0.00017960930657056438, |
|
"loss": 0.6699, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.0828208282082823, |
|
"grad_norm": 0.41200950741767883, |
|
"learning_rate": 0.00017929606301351168, |
|
"loss": 0.6559, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.099220992209922, |
|
"grad_norm": 0.37226754426956177, |
|
"learning_rate": 0.00017898070929433965, |
|
"loss": 0.655, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.1156211562115623, |
|
"grad_norm": 0.3640751540660858, |
|
"learning_rate": 0.00017866325380498416, |
|
"loss": 0.6471, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.132021320213202, |
|
"grad_norm": 0.37838858366012573, |
|
"learning_rate": 0.00017834370499331166, |
|
"loss": 0.651, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.1484214842148424, |
|
"grad_norm": 0.3831733167171478, |
|
"learning_rate": 0.0001780220713628943, |
|
"loss": 0.6504, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.164821648216482, |
|
"grad_norm": 0.39556068181991577, |
|
"learning_rate": 0.0001776983614727838, |
|
"loss": 0.6501, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.181221812218122, |
|
"grad_norm": 0.3833105266094208, |
|
"learning_rate": 0.00017737258393728364, |
|
"loss": 0.6598, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.197621976219762, |
|
"grad_norm": 0.40265655517578125, |
|
"learning_rate": 0.00017704474742571969, |
|
"loss": 0.663, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.2140221402214024, |
|
"grad_norm": 0.4098280072212219, |
|
"learning_rate": 0.00017671486066220965, |
|
"loss": 0.6567, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.230422304223042, |
|
"grad_norm": 0.4019940197467804, |
|
"learning_rate": 0.0001763829324254309, |
|
"loss": 0.6717, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.246822468224682, |
|
"grad_norm": 0.3930509388446808, |
|
"learning_rate": 0.00017604897154838685, |
|
"loss": 0.6544, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.2632226322263223, |
|
"grad_norm": 0.40629473328590393, |
|
"learning_rate": 0.00017571298691817177, |
|
"loss": 0.65, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.2796227962279625, |
|
"grad_norm": 0.41943445801734924, |
|
"learning_rate": 0.00017537498747573443, |
|
"loss": 0.6612, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.2960229602296023, |
|
"grad_norm": 0.3832157552242279, |
|
"learning_rate": 0.00017503498221564025, |
|
"loss": 0.6599, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.312423124231242, |
|
"grad_norm": 0.3786883056163788, |
|
"learning_rate": 0.0001746929801858317, |
|
"loss": 0.6636, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.3288232882328823, |
|
"grad_norm": 0.40903323888778687, |
|
"learning_rate": 0.0001743489904873877, |
|
"loss": 0.6462, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.3452234522345226, |
|
"grad_norm": 0.3858684003353119, |
|
"learning_rate": 0.00017400302227428143, |
|
"loss": 0.6556, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.3616236162361623, |
|
"grad_norm": 0.43149909377098083, |
|
"learning_rate": 0.0001736550847531366, |
|
"loss": 0.6591, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.378023780237802, |
|
"grad_norm": 0.43920159339904785, |
|
"learning_rate": 0.00017330518718298264, |
|
"loss": 0.6609, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.3944239442394424, |
|
"grad_norm": 0.39907246828079224, |
|
"learning_rate": 0.0001729533388750081, |
|
"loss": 0.6572, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.4108241082410826, |
|
"grad_norm": 0.3920808434486389, |
|
"learning_rate": 0.0001725995491923131, |
|
"loss": 0.6497, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.4272242722427224, |
|
"grad_norm": 0.39895495772361755, |
|
"learning_rate": 0.00017224382754965989, |
|
"loss": 0.646, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.443624436244362, |
|
"grad_norm": 0.41222187876701355, |
|
"learning_rate": 0.00017188618341322254, |
|
"loss": 0.6458, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.4600246002460024, |
|
"grad_norm": 0.41394343972206116, |
|
"learning_rate": 0.00017152662630033505, |
|
"loss": 0.6528, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4764247642476427, |
|
"grad_norm": 0.4161871373653412, |
|
"learning_rate": 0.00017116516577923775, |
|
"loss": 0.6414, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.4928249282492825, |
|
"grad_norm": 0.43303370475769043, |
|
"learning_rate": 0.00017080181146882317, |
|
"loss": 0.6573, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.5092250922509223, |
|
"grad_norm": 0.3973419964313507, |
|
"learning_rate": 0.00017043657303837963, |
|
"loss": 0.6541, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.5256252562525625, |
|
"grad_norm": 0.3919370472431183, |
|
"learning_rate": 0.00017006946020733425, |
|
"loss": 0.6626, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.5420254202542027, |
|
"grad_norm": 0.4177446663379669, |
|
"learning_rate": 0.00016970048274499408, |
|
"loss": 0.6478, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.5584255842558425, |
|
"grad_norm": 0.39967086911201477, |
|
"learning_rate": 0.0001693296504702862, |
|
"loss": 0.6679, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.5748257482574823, |
|
"grad_norm": 0.4124162495136261, |
|
"learning_rate": 0.00016895697325149657, |
|
"loss": 0.6597, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.5912259122591226, |
|
"grad_norm": 0.37739571928977966, |
|
"learning_rate": 0.00016858246100600715, |
|
"loss": 0.6578, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.607626076260763, |
|
"grad_norm": 0.4335273802280426, |
|
"learning_rate": 0.00016820612370003221, |
|
"loss": 0.6608, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.6240262402624026, |
|
"grad_norm": 0.3718789517879486, |
|
"learning_rate": 0.00016782797134835305, |
|
"loss": 0.6447, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.6404264042640424, |
|
"grad_norm": 0.41410380601882935, |
|
"learning_rate": 0.0001674480140140514, |
|
"loss": 0.6562, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.6568265682656826, |
|
"grad_norm": 0.4115227460861206, |
|
"learning_rate": 0.00016706626180824186, |
|
"loss": 0.6451, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.673226732267323, |
|
"grad_norm": 0.4145969748497009, |
|
"learning_rate": 0.00016668272488980254, |
|
"loss": 0.6326, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.6896268962689627, |
|
"grad_norm": 0.42359670996665955, |
|
"learning_rate": 0.00016629741346510496, |
|
"loss": 0.6324, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.7060270602706025, |
|
"grad_norm": 0.38336479663848877, |
|
"learning_rate": 0.0001659103377877423, |
|
"loss": 0.6508, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.7224272242722427, |
|
"grad_norm": 0.4128510057926178, |
|
"learning_rate": 0.00016552150815825662, |
|
"loss": 0.6538, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.738827388273883, |
|
"grad_norm": 0.47073811292648315, |
|
"learning_rate": 0.0001651309349238647, |
|
"loss": 0.6574, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.7552275522755227, |
|
"grad_norm": 0.4234805703163147, |
|
"learning_rate": 0.00016473862847818277, |
|
"loss": 0.6643, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.7716277162771625, |
|
"grad_norm": 0.4076490104198456, |
|
"learning_rate": 0.0001643445992609498, |
|
"loss": 0.6509, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.7880278802788028, |
|
"grad_norm": 0.4320470988750458, |
|
"learning_rate": 0.00016394885775774975, |
|
"loss": 0.643, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.804428044280443, |
|
"grad_norm": 0.409150630235672, |
|
"learning_rate": 0.00016355141449973256, |
|
"loss": 0.6452, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.820828208282083, |
|
"grad_norm": 0.46668359637260437, |
|
"learning_rate": 0.0001631522800633339, |
|
"loss": 0.645, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.8372283722837226, |
|
"grad_norm": 0.4314485192298889, |
|
"learning_rate": 0.00016275146506999365, |
|
"loss": 0.6374, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.853628536285363, |
|
"grad_norm": 0.41864487528800964, |
|
"learning_rate": 0.00016234898018587337, |
|
"loss": 0.6343, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.870028700287003, |
|
"grad_norm": 0.4255831241607666, |
|
"learning_rate": 0.0001619448361215723, |
|
"loss": 0.6614, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.886428864288643, |
|
"grad_norm": 0.411807656288147, |
|
"learning_rate": 0.0001615390436318425, |
|
"loss": 0.6372, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.9028290282902827, |
|
"grad_norm": 0.41166943311691284, |
|
"learning_rate": 0.0001611316135153026, |
|
"loss": 0.6505, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.919229192291923, |
|
"grad_norm": 0.41985031962394714, |
|
"learning_rate": 0.0001607225566141503, |
|
"loss": 0.6515, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.935629356293563, |
|
"grad_norm": 0.40023133158683777, |
|
"learning_rate": 0.0001603118838138741, |
|
"loss": 0.6466, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.952029520295203, |
|
"grad_norm": 0.4138244688510895, |
|
"learning_rate": 0.0001598996060429634, |
|
"loss": 0.6475, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.9684296842968427, |
|
"grad_norm": 0.39823102951049805, |
|
"learning_rate": 0.0001594857342726178, |
|
"loss": 0.6419, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.984829848298483, |
|
"grad_norm": 0.3888455033302307, |
|
"learning_rate": 0.0001590702795164551, |
|
"loss": 0.6416, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 3.001230012300123, |
|
"grad_norm": 0.5585771799087524, |
|
"learning_rate": 0.0001586532528302183, |
|
"loss": 0.6565, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 3.017630176301763, |
|
"grad_norm": 0.4332481026649475, |
|
"learning_rate": 0.00015823466531148124, |
|
"loss": 0.619, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 3.0340303403034032, |
|
"grad_norm": 0.44493553042411804, |
|
"learning_rate": 0.0001578145280993533, |
|
"loss": 0.6057, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 3.050430504305043, |
|
"grad_norm": 0.43917590379714966, |
|
"learning_rate": 0.0001573928523741832, |
|
"loss": 0.6058, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 3.0668306683066833, |
|
"grad_norm": 0.40770649909973145, |
|
"learning_rate": 0.0001569696493572612, |
|
"loss": 0.6092, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 3.083230832308323, |
|
"grad_norm": 0.44579923152923584, |
|
"learning_rate": 0.0001565449303105207, |
|
"loss": 0.6272, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 3.0996309963099633, |
|
"grad_norm": 0.4329695403575897, |
|
"learning_rate": 0.00015611870653623825, |
|
"loss": 0.6238, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 3.116031160311603, |
|
"grad_norm": 0.44702407717704773, |
|
"learning_rate": 0.00015569098937673318, |
|
"loss": 0.6124, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.1324313243132433, |
|
"grad_norm": 0.43535304069519043, |
|
"learning_rate": 0.00015526179021406553, |
|
"loss": 0.6179, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 3.148831488314883, |
|
"grad_norm": 0.4820752739906311, |
|
"learning_rate": 0.0001548311204697331, |
|
"loss": 0.6228, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 3.1652316523165234, |
|
"grad_norm": 0.4629499614238739, |
|
"learning_rate": 0.00015439899160436772, |
|
"loss": 0.6189, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 3.181631816318163, |
|
"grad_norm": 0.4845089316368103, |
|
"learning_rate": 0.00015396541511743012, |
|
"loss": 0.6229, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 3.1980319803198034, |
|
"grad_norm": 0.46592435240745544, |
|
"learning_rate": 0.00015353040254690393, |
|
"loss": 0.6178, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 3.214432144321443, |
|
"grad_norm": 0.44851964712142944, |
|
"learning_rate": 0.0001530939654689887, |
|
"loss": 0.6246, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 3.2308323083230834, |
|
"grad_norm": 0.4483705461025238, |
|
"learning_rate": 0.00015265611549779176, |
|
"loss": 0.6204, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 3.2472324723247232, |
|
"grad_norm": 0.45693984627723694, |
|
"learning_rate": 0.00015221686428501928, |
|
"loss": 0.6061, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 3.2636326363263635, |
|
"grad_norm": 0.46255984902381897, |
|
"learning_rate": 0.0001517762235196661, |
|
"loss": 0.6058, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 3.2800328003280033, |
|
"grad_norm": 0.471983939409256, |
|
"learning_rate": 0.00015133420492770462, |
|
"loss": 0.6157, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.296432964329643, |
|
"grad_norm": 0.49640896916389465, |
|
"learning_rate": 0.0001508908202717729, |
|
"loss": 0.6237, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 3.3128331283312833, |
|
"grad_norm": 0.4903806149959564, |
|
"learning_rate": 0.00015044608135086164, |
|
"loss": 0.6339, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 3.3292332923329235, |
|
"grad_norm": 0.47123804688453674, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.6319, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 3.3456334563345633, |
|
"grad_norm": 0.47124335169792175, |
|
"learning_rate": 0.00014955258808994096, |
|
"loss": 0.6119, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 3.362033620336203, |
|
"grad_norm": 0.4579004645347595, |
|
"learning_rate": 0.00014910385752684506, |
|
"loss": 0.6042, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 3.3784337843378434, |
|
"grad_norm": 0.4778992235660553, |
|
"learning_rate": 0.0001486538202519639, |
|
"loss": 0.6187, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 3.3948339483394836, |
|
"grad_norm": 0.4603955149650574, |
|
"learning_rate": 0.0001482024882413222, |
|
"loss": 0.6166, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 3.4112341123411234, |
|
"grad_norm": 0.4642026126384735, |
|
"learning_rate": 0.00014774987350539911, |
|
"loss": 0.6183, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 3.427634276342763, |
|
"grad_norm": 0.5194045305252075, |
|
"learning_rate": 0.00014729598808880861, |
|
"loss": 0.617, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 3.4440344403444034, |
|
"grad_norm": 0.4535214900970459, |
|
"learning_rate": 0.00014684084406997903, |
|
"loss": 0.6301, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.4604346043460437, |
|
"grad_norm": 0.47860169410705566, |
|
"learning_rate": 0.00014638445356083155, |
|
"loss": 0.6113, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 3.4768347683476835, |
|
"grad_norm": 0.4803871810436249, |
|
"learning_rate": 0.000145926828706458, |
|
"loss": 0.6122, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 3.4932349323493233, |
|
"grad_norm": 0.47383010387420654, |
|
"learning_rate": 0.00014546798168479756, |
|
"loss": 0.6137, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 3.5096350963509635, |
|
"grad_norm": 0.47791990637779236, |
|
"learning_rate": 0.0001450079247063127, |
|
"loss": 0.6215, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 3.5260352603526037, |
|
"grad_norm": 0.4606546461582184, |
|
"learning_rate": 0.0001445466700136643, |
|
"loss": 0.6022, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 3.5424354243542435, |
|
"grad_norm": 0.4335190951824188, |
|
"learning_rate": 0.00014408422988138584, |
|
"loss": 0.6089, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 3.5588355883558833, |
|
"grad_norm": 0.45156562328338623, |
|
"learning_rate": 0.00014362061661555675, |
|
"loss": 0.6251, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 3.5752357523575236, |
|
"grad_norm": 0.43693843483924866, |
|
"learning_rate": 0.00014315584255347494, |
|
"loss": 0.62, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 3.591635916359164, |
|
"grad_norm": 0.46648937463760376, |
|
"learning_rate": 0.00014268992006332846, |
|
"loss": 0.6336, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 3.6080360803608036, |
|
"grad_norm": 0.47864094376564026, |
|
"learning_rate": 0.00014222286154386642, |
|
"loss": 0.6079, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.6244362443624434, |
|
"grad_norm": 0.43990063667297363, |
|
"learning_rate": 0.00014175467942406894, |
|
"loss": 0.6321, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 3.6408364083640836, |
|
"grad_norm": 0.4700396656990051, |
|
"learning_rate": 0.0001412853861628166, |
|
"loss": 0.6195, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 3.657236572365724, |
|
"grad_norm": 0.45286816358566284, |
|
"learning_rate": 0.00014081499424855863, |
|
"loss": 0.6236, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 3.6736367363673637, |
|
"grad_norm": 0.4623110294342041, |
|
"learning_rate": 0.00014034351619898088, |
|
"loss": 0.6269, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 3.6900369003690034, |
|
"grad_norm": 0.5022940039634705, |
|
"learning_rate": 0.00013987096456067236, |
|
"loss": 0.6107, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.7064370643706437, |
|
"grad_norm": 0.49338841438293457, |
|
"learning_rate": 0.00013939735190879172, |
|
"loss": 0.5996, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 3.722837228372284, |
|
"grad_norm": 0.4769245982170105, |
|
"learning_rate": 0.0001389226908467323, |
|
"loss": 0.6074, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 3.7392373923739237, |
|
"grad_norm": 0.48450419306755066, |
|
"learning_rate": 0.00013844699400578696, |
|
"loss": 0.6092, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 3.7556375563755635, |
|
"grad_norm": 0.46382635831832886, |
|
"learning_rate": 0.00013797027404481184, |
|
"loss": 0.6154, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 3.7720377203772038, |
|
"grad_norm": 0.4822761118412018, |
|
"learning_rate": 0.00013749254364988956, |
|
"loss": 0.612, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.788437884378844, |
|
"grad_norm": 0.4664019048213959, |
|
"learning_rate": 0.00013701381553399145, |
|
"loss": 0.6003, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 3.804838048380484, |
|
"grad_norm": 0.4592795670032501, |
|
"learning_rate": 0.00013653410243663952, |
|
"loss": 0.6122, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 3.8212382123821236, |
|
"grad_norm": 0.4780454635620117, |
|
"learning_rate": 0.00013605341712356723, |
|
"loss": 0.6216, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 3.837638376383764, |
|
"grad_norm": 0.48119214177131653, |
|
"learning_rate": 0.00013557177238637986, |
|
"loss": 0.6119, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 3.854038540385404, |
|
"grad_norm": 0.44965630769729614, |
|
"learning_rate": 0.00013508918104221412, |
|
"loss": 0.619, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.870438704387044, |
|
"grad_norm": 0.4915274977684021, |
|
"learning_rate": 0.00013460565593339705, |
|
"loss": 0.6101, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 3.8868388683886836, |
|
"grad_norm": 0.4430049955844879, |
|
"learning_rate": 0.00013412120992710425, |
|
"loss": 0.6125, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 3.903239032390324, |
|
"grad_norm": 0.4661041796207428, |
|
"learning_rate": 0.0001336358559150175, |
|
"loss": 0.6177, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 3.919639196391964, |
|
"grad_norm": 0.48300737142562866, |
|
"learning_rate": 0.00013314960681298175, |
|
"loss": 0.6039, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 3.936039360393604, |
|
"grad_norm": 0.45341914892196655, |
|
"learning_rate": 0.00013266247556066122, |
|
"loss": 0.5989, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.9524395243952437, |
|
"grad_norm": 0.46126899123191833, |
|
"learning_rate": 0.00013217447512119533, |
|
"loss": 0.6188, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 3.968839688396884, |
|
"grad_norm": 0.4723737835884094, |
|
"learning_rate": 0.0001316856184808535, |
|
"loss": 0.6091, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 3.985239852398524, |
|
"grad_norm": 0.4723500907421112, |
|
"learning_rate": 0.0001311959186486898, |
|
"loss": 0.6178, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 4.001640016400164, |
|
"grad_norm": 0.4641455113887787, |
|
"learning_rate": 0.00013070538865619642, |
|
"loss": 0.6144, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 4.018040180401804, |
|
"grad_norm": 0.5465942621231079, |
|
"learning_rate": 0.00013021404155695725, |
|
"loss": 0.5762, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 4.0344403444034445, |
|
"grad_norm": 0.5195038318634033, |
|
"learning_rate": 0.00012972189042630044, |
|
"loss": 0.5936, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 4.050840508405084, |
|
"grad_norm": 0.5215120911598206, |
|
"learning_rate": 0.00012922894836095013, |
|
"loss": 0.5797, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 4.067240672406724, |
|
"grad_norm": 0.5008783340454102, |
|
"learning_rate": 0.00012873522847867835, |
|
"loss": 0.5887, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 4.083640836408364, |
|
"grad_norm": 0.47933313250541687, |
|
"learning_rate": 0.0001282407439179557, |
|
"loss": 0.5831, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 4.1000410004100045, |
|
"grad_norm": 0.5141870379447937, |
|
"learning_rate": 0.00012774550783760182, |
|
"loss": 0.5765, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.116441164411644, |
|
"grad_norm": 0.4889928996562958, |
|
"learning_rate": 0.0001272495334164351, |
|
"loss": 0.5834, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 4.132841328413284, |
|
"grad_norm": 0.5281891822814941, |
|
"learning_rate": 0.00012675283385292212, |
|
"loss": 0.5778, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 4.149241492414924, |
|
"grad_norm": 0.5182332396507263, |
|
"learning_rate": 0.00012625542236482628, |
|
"loss": 0.5882, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 4.165641656416565, |
|
"grad_norm": 0.554658055305481, |
|
"learning_rate": 0.00012575731218885625, |
|
"loss": 0.5871, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 4.182041820418204, |
|
"grad_norm": 0.5192410349845886, |
|
"learning_rate": 0.00012525851658031352, |
|
"loss": 0.5768, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 4.198441984419844, |
|
"grad_norm": 0.5177326202392578, |
|
"learning_rate": 0.0001247590488127398, |
|
"loss": 0.5849, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 4.214842148421484, |
|
"grad_norm": 0.5142077803611755, |
|
"learning_rate": 0.0001242589221775637, |
|
"loss": 0.5744, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 4.231242312423125, |
|
"grad_norm": 0.48389384150505066, |
|
"learning_rate": 0.00012375814998374712, |
|
"loss": 0.5977, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 4.2476424764247644, |
|
"grad_norm": 0.5631837248802185, |
|
"learning_rate": 0.00012325674555743106, |
|
"loss": 0.5827, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 4.264042640426404, |
|
"grad_norm": 0.5213803052902222, |
|
"learning_rate": 0.0001227547222415809, |
|
"loss": 0.5815, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.280442804428044, |
|
"grad_norm": 0.5063258409500122, |
|
"learning_rate": 0.00012225209339563145, |
|
"loss": 0.591, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 4.296842968429685, |
|
"grad_norm": 0.503495454788208, |
|
"learning_rate": 0.0001217488723951314, |
|
"loss": 0.5872, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 4.3132431324313245, |
|
"grad_norm": 0.5165495276451111, |
|
"learning_rate": 0.00012124507263138736, |
|
"loss": 0.5739, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 4.329643296432964, |
|
"grad_norm": 0.4980062246322632, |
|
"learning_rate": 0.00012074070751110751, |
|
"loss": 0.5795, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 4.346043460434604, |
|
"grad_norm": 0.5419421792030334, |
|
"learning_rate": 0.00012023579045604485, |
|
"loss": 0.5844, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 4.362443624436244, |
|
"grad_norm": 0.5244302749633789, |
|
"learning_rate": 0.00011973033490264001, |
|
"loss": 0.5777, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 4.378843788437885, |
|
"grad_norm": 0.5156043171882629, |
|
"learning_rate": 0.0001192243543016637, |
|
"loss": 0.5832, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 4.395243952439524, |
|
"grad_norm": 0.5119035840034485, |
|
"learning_rate": 0.00011871786211785876, |
|
"loss": 0.59, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 4.411644116441164, |
|
"grad_norm": 0.5079758167266846, |
|
"learning_rate": 0.00011821087182958186, |
|
"loss": 0.5652, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 4.428044280442805, |
|
"grad_norm": 0.5077067613601685, |
|
"learning_rate": 0.00011770339692844483, |
|
"loss": 0.6014, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 0.5312401056289673, |
|
"learning_rate": 0.00011719545091895564, |
|
"loss": 0.594, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 4.460844608446084, |
|
"grad_norm": 0.5242477655410767, |
|
"learning_rate": 0.00011668704731815892, |
|
"loss": 0.5914, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 4.477244772447724, |
|
"grad_norm": 0.48659422993659973, |
|
"learning_rate": 0.0001161781996552765, |
|
"loss": 0.5927, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 4.493644936449364, |
|
"grad_norm": 0.5395858287811279, |
|
"learning_rate": 0.00011566892147134705, |
|
"loss": 0.5961, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 4.510045100451005, |
|
"grad_norm": 0.503237783908844, |
|
"learning_rate": 0.00011515922631886605, |
|
"loss": 0.5763, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 4.5264452644526445, |
|
"grad_norm": 0.5137144923210144, |
|
"learning_rate": 0.00011464912776142494, |
|
"loss": 0.5912, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 4.542845428454284, |
|
"grad_norm": 0.5336122512817383, |
|
"learning_rate": 0.00011413863937335028, |
|
"loss": 0.5857, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 4.559245592455925, |
|
"grad_norm": 0.5271728038787842, |
|
"learning_rate": 0.00011362777473934248, |
|
"loss": 0.5915, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 4.575645756457565, |
|
"grad_norm": 0.5467897057533264, |
|
"learning_rate": 0.00011311654745411425, |
|
"loss": 0.5871, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 4.592045920459205, |
|
"grad_norm": 0.5065737962722778, |
|
"learning_rate": 0.00011260497112202895, |
|
"loss": 0.5867, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.608446084460844, |
|
"grad_norm": 0.5274237990379333, |
|
"learning_rate": 0.00011209305935673844, |
|
"loss": 0.5885, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 4.624846248462484, |
|
"grad_norm": 0.5007864832878113, |
|
"learning_rate": 0.00011158082578082089, |
|
"loss": 0.5965, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 4.641246412464125, |
|
"grad_norm": 0.48857831954956055, |
|
"learning_rate": 0.0001110682840254182, |
|
"loss": 0.5898, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 4.657646576465765, |
|
"grad_norm": 0.5346599221229553, |
|
"learning_rate": 0.00011055544772987335, |
|
"loss": 0.5941, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 4.674046740467404, |
|
"grad_norm": 0.4968061149120331, |
|
"learning_rate": 0.00011004233054136725, |
|
"loss": 0.5852, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 4.690446904469045, |
|
"grad_norm": 0.5415840744972229, |
|
"learning_rate": 0.00010952894611455584, |
|
"loss": 0.5891, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 4.706847068470685, |
|
"grad_norm": 0.5450642704963684, |
|
"learning_rate": 0.00010901530811120655, |
|
"loss": 0.5758, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 4.723247232472325, |
|
"grad_norm": 0.5052106976509094, |
|
"learning_rate": 0.00010850143019983474, |
|
"loss": 0.5736, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 4.7396473964739645, |
|
"grad_norm": 0.5136608481407166, |
|
"learning_rate": 0.00010798732605534006, |
|
"loss": 0.5769, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 4.756047560475604, |
|
"grad_norm": 0.5369486808776855, |
|
"learning_rate": 0.00010747300935864243, |
|
"loss": 0.5762, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.772447724477245, |
|
"grad_norm": 0.5404406189918518, |
|
"learning_rate": 0.00010695849379631813, |
|
"loss": 0.5862, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 4.788847888478885, |
|
"grad_norm": 0.5491173267364502, |
|
"learning_rate": 0.0001064437930602354, |
|
"loss": 0.5783, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 4.805248052480525, |
|
"grad_norm": 0.5794305801391602, |
|
"learning_rate": 0.00010592892084719024, |
|
"loss": 0.596, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 4.821648216482165, |
|
"grad_norm": 0.5437578558921814, |
|
"learning_rate": 0.00010541389085854176, |
|
"loss": 0.5892, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 4.838048380483805, |
|
"grad_norm": 0.5468413233757019, |
|
"learning_rate": 0.00010489871679984777, |
|
"loss": 0.57, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 4.854448544485445, |
|
"grad_norm": 0.5444806814193726, |
|
"learning_rate": 0.00010438341238049991, |
|
"loss": 0.5754, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 4.870848708487085, |
|
"grad_norm": 0.5279709696769714, |
|
"learning_rate": 0.00010386799131335889, |
|
"loss": 0.5757, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 4.887248872488724, |
|
"grad_norm": 0.5229681134223938, |
|
"learning_rate": 0.00010335246731438948, |
|
"loss": 0.5851, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 4.903649036490365, |
|
"grad_norm": 0.5356857180595398, |
|
"learning_rate": 0.00010283685410229571, |
|
"loss": 0.5846, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 4.920049200492005, |
|
"grad_norm": 0.5306787490844727, |
|
"learning_rate": 0.00010232116539815558, |
|
"loss": 0.5595, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.936449364493645, |
|
"grad_norm": 0.5103681683540344, |
|
"learning_rate": 0.00010180541492505604, |
|
"loss": 0.5878, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 4.952849528495285, |
|
"grad_norm": 0.528273344039917, |
|
"learning_rate": 0.00010128961640772785, |
|
"loss": 0.5684, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 4.969249692496925, |
|
"grad_norm": 0.5309355854988098, |
|
"learning_rate": 0.00010077378357218021, |
|
"loss": 0.5949, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 4.985649856498565, |
|
"grad_norm": 0.5428647994995117, |
|
"learning_rate": 0.00010025793014533558, |
|
"loss": 0.5893, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 5.002050020500205, |
|
"grad_norm": 0.519814133644104, |
|
"learning_rate": 9.974206985466442e-05, |
|
"loss": 0.5898, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 5.018450184501845, |
|
"grad_norm": 0.5455579161643982, |
|
"learning_rate": 9.92262164278198e-05, |
|
"loss": 0.5495, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 5.034850348503485, |
|
"grad_norm": 0.5572001338005066, |
|
"learning_rate": 9.871038359227214e-05, |
|
"loss": 0.543, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 5.051250512505125, |
|
"grad_norm": 0.5723180174827576, |
|
"learning_rate": 9.819458507494394e-05, |
|
"loss": 0.5557, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 5.067650676506765, |
|
"grad_norm": 0.5615038275718689, |
|
"learning_rate": 9.767883460184443e-05, |
|
"loss": 0.5505, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 5.0840508405084055, |
|
"grad_norm": 0.5597648024559021, |
|
"learning_rate": 9.71631458977043e-05, |
|
"loss": 0.5599, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.100451004510045, |
|
"grad_norm": 0.5660073161125183, |
|
"learning_rate": 9.66475326856105e-05, |
|
"loss": 0.5609, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 5.116851168511685, |
|
"grad_norm": 0.5740461945533752, |
|
"learning_rate": 9.613200868664112e-05, |
|
"loss": 0.5505, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 5.133251332513325, |
|
"grad_norm": 0.5641244649887085, |
|
"learning_rate": 9.561658761950007e-05, |
|
"loss": 0.5725, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 5.149651496514966, |
|
"grad_norm": 0.5780383944511414, |
|
"learning_rate": 9.510128320015224e-05, |
|
"loss": 0.5584, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 5.166051660516605, |
|
"grad_norm": 0.5865011215209961, |
|
"learning_rate": 9.458610914145826e-05, |
|
"loss": 0.555, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 5.182451824518245, |
|
"grad_norm": 0.5646878480911255, |
|
"learning_rate": 9.40710791528098e-05, |
|
"loss": 0.5524, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 5.198851988519885, |
|
"grad_norm": 0.6092641353607178, |
|
"learning_rate": 9.355620693976461e-05, |
|
"loss": 0.5704, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 5.215252152521526, |
|
"grad_norm": 0.5585269927978516, |
|
"learning_rate": 9.304150620368188e-05, |
|
"loss": 0.5652, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 5.231652316523165, |
|
"grad_norm": 0.5374350547790527, |
|
"learning_rate": 9.252699064135758e-05, |
|
"loss": 0.5591, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 5.248052480524805, |
|
"grad_norm": 0.5892452001571655, |
|
"learning_rate": 9.201267394465998e-05, |
|
"loss": 0.568, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.264452644526445, |
|
"grad_norm": 0.5627169609069824, |
|
"learning_rate": 9.149856980016529e-05, |
|
"loss": 0.5573, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 5.280852808528086, |
|
"grad_norm": 0.6352980732917786, |
|
"learning_rate": 9.098469188879349e-05, |
|
"loss": 0.5548, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 5.2972529725297255, |
|
"grad_norm": 0.5747233629226685, |
|
"learning_rate": 9.047105388544417e-05, |
|
"loss": 0.5802, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 5.313653136531365, |
|
"grad_norm": 0.66510009765625, |
|
"learning_rate": 8.995766945863277e-05, |
|
"loss": 0.5583, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 5.330053300533005, |
|
"grad_norm": 0.6103816628456116, |
|
"learning_rate": 8.944455227012666e-05, |
|
"loss": 0.5622, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 5.346453464534646, |
|
"grad_norm": 0.53193598985672, |
|
"learning_rate": 8.89317159745818e-05, |
|
"loss": 0.5622, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 5.3628536285362856, |
|
"grad_norm": 0.5971400737762451, |
|
"learning_rate": 8.841917421917912e-05, |
|
"loss": 0.5407, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 5.379253792537925, |
|
"grad_norm": 0.5562443733215332, |
|
"learning_rate": 8.790694064326157e-05, |
|
"loss": 0.5544, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 5.395653956539565, |
|
"grad_norm": 0.6092881560325623, |
|
"learning_rate": 8.739502887797107e-05, |
|
"loss": 0.5543, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 5.412054120541206, |
|
"grad_norm": 0.5391745567321777, |
|
"learning_rate": 8.688345254588578e-05, |
|
"loss": 0.5553, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.428454284542846, |
|
"grad_norm": 0.558480978012085, |
|
"learning_rate": 8.637222526065756e-05, |
|
"loss": 0.5614, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 5.444854448544485, |
|
"grad_norm": 0.5935754776000977, |
|
"learning_rate": 8.586136062664974e-05, |
|
"loss": 0.5569, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 5.461254612546125, |
|
"grad_norm": 0.5984821319580078, |
|
"learning_rate": 8.535087223857508e-05, |
|
"loss": 0.5625, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 5.477654776547766, |
|
"grad_norm": 0.5659565925598145, |
|
"learning_rate": 8.484077368113399e-05, |
|
"loss": 0.5483, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 5.494054940549406, |
|
"grad_norm": 0.5952328443527222, |
|
"learning_rate": 8.433107852865298e-05, |
|
"loss": 0.5546, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 5.5104551045510455, |
|
"grad_norm": 0.5910452604293823, |
|
"learning_rate": 8.382180034472353e-05, |
|
"loss": 0.5371, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 5.526855268552685, |
|
"grad_norm": 0.5623068809509277, |
|
"learning_rate": 8.33129526818411e-05, |
|
"loss": 0.5463, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 5.543255432554325, |
|
"grad_norm": 0.557976484298706, |
|
"learning_rate": 8.280454908104439e-05, |
|
"loss": 0.555, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 5.559655596555966, |
|
"grad_norm": 0.5730746984481812, |
|
"learning_rate": 8.229660307155518e-05, |
|
"loss": 0.5612, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 5.5760557605576055, |
|
"grad_norm": 0.5673303604125977, |
|
"learning_rate": 8.178912817041817e-05, |
|
"loss": 0.5583, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.592455924559245, |
|
"grad_norm": 0.5584287047386169, |
|
"learning_rate": 8.128213788214126e-05, |
|
"loss": 0.5471, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 5.608856088560886, |
|
"grad_norm": 0.5875297784805298, |
|
"learning_rate": 8.077564569833632e-05, |
|
"loss": 0.5576, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 5.625256252562526, |
|
"grad_norm": 0.5899044275283813, |
|
"learning_rate": 8.026966509736001e-05, |
|
"loss": 0.5437, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 5.641656416564166, |
|
"grad_norm": 0.6080020666122437, |
|
"learning_rate": 7.976420954395518e-05, |
|
"loss": 0.5552, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 5.658056580565805, |
|
"grad_norm": 0.594315230846405, |
|
"learning_rate": 7.92592924888925e-05, |
|
"loss": 0.5587, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 5.674456744567445, |
|
"grad_norm": 0.5834687948226929, |
|
"learning_rate": 7.875492736861266e-05, |
|
"loss": 0.57, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 5.690856908569086, |
|
"grad_norm": 0.5709730982780457, |
|
"learning_rate": 7.825112760486861e-05, |
|
"loss": 0.5737, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 5.707257072570726, |
|
"grad_norm": 0.5856568217277527, |
|
"learning_rate": 7.774790660436858e-05, |
|
"loss": 0.5484, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 5.7236572365723655, |
|
"grad_norm": 0.5535131692886353, |
|
"learning_rate": 7.724527775841914e-05, |
|
"loss": 0.5483, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 5.740057400574006, |
|
"grad_norm": 0.5895147323608398, |
|
"learning_rate": 7.674325444256899e-05, |
|
"loss": 0.5463, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.756457564575646, |
|
"grad_norm": 0.57289719581604, |
|
"learning_rate": 7.624185001625292e-05, |
|
"loss": 0.5569, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 5.772857728577286, |
|
"grad_norm": 0.5502966046333313, |
|
"learning_rate": 7.574107782243634e-05, |
|
"loss": 0.5696, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 5.7892578925789255, |
|
"grad_norm": 0.5804471969604492, |
|
"learning_rate": 7.524095118726025e-05, |
|
"loss": 0.5506, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 5.805658056580565, |
|
"grad_norm": 0.5519583225250244, |
|
"learning_rate": 7.474148341968652e-05, |
|
"loss": 0.5571, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 5.822058220582206, |
|
"grad_norm": 0.5806939005851746, |
|
"learning_rate": 7.42426878111438e-05, |
|
"loss": 0.5556, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 5.838458384583846, |
|
"grad_norm": 0.5681191682815552, |
|
"learning_rate": 7.374457763517376e-05, |
|
"loss": 0.5557, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 5.854858548585486, |
|
"grad_norm": 0.6004670262336731, |
|
"learning_rate": 7.324716614707793e-05, |
|
"loss": 0.5686, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 5.871258712587126, |
|
"grad_norm": 0.5577644109725952, |
|
"learning_rate": 7.275046658356494e-05, |
|
"loss": 0.5619, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 5.887658876588766, |
|
"grad_norm": 0.5934208631515503, |
|
"learning_rate": 7.225449216239821e-05, |
|
"loss": 0.5593, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 5.904059040590406, |
|
"grad_norm": 0.5490043759346008, |
|
"learning_rate": 7.175925608204428e-05, |
|
"loss": 0.5404, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.920459204592046, |
|
"grad_norm": 0.662482500076294, |
|
"learning_rate": 7.126477152132164e-05, |
|
"loss": 0.5525, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 5.9368593685936855, |
|
"grad_norm": 0.5766283869743347, |
|
"learning_rate": 7.077105163904987e-05, |
|
"loss": 0.5706, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 5.953259532595326, |
|
"grad_norm": 0.5751728415489197, |
|
"learning_rate": 7.027810957369957e-05, |
|
"loss": 0.5626, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 5.969659696596966, |
|
"grad_norm": 0.5690221190452576, |
|
"learning_rate": 6.978595844304271e-05, |
|
"loss": 0.558, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 5.986059860598606, |
|
"grad_norm": 0.5637174248695374, |
|
"learning_rate": 6.92946113438036e-05, |
|
"loss": 0.5665, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 6.002460024600246, |
|
"grad_norm": 0.5749226808547974, |
|
"learning_rate": 6.880408135131022e-05, |
|
"loss": 0.5687, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 6.018860188601886, |
|
"grad_norm": 0.581462562084198, |
|
"learning_rate": 6.831438151914649e-05, |
|
"loss": 0.5272, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 6.035260352603526, |
|
"grad_norm": 0.585290253162384, |
|
"learning_rate": 6.782552487880468e-05, |
|
"loss": 0.5347, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 6.051660516605166, |
|
"grad_norm": 0.6168552041053772, |
|
"learning_rate": 6.733752443933878e-05, |
|
"loss": 0.5224, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 6.0680606806068065, |
|
"grad_norm": 0.5952621698379517, |
|
"learning_rate": 6.685039318701826e-05, |
|
"loss": 0.5267, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 6.084460844608446, |
|
"grad_norm": 0.708752453327179, |
|
"learning_rate": 6.636414408498249e-05, |
|
"loss": 0.5323, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 6.100861008610086, |
|
"grad_norm": 0.6072937250137329, |
|
"learning_rate": 6.587879007289576e-05, |
|
"loss": 0.5361, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 6.117261172611726, |
|
"grad_norm": 0.6426242589950562, |
|
"learning_rate": 6.539434406660296e-05, |
|
"loss": 0.5439, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 6.1336613366133665, |
|
"grad_norm": 0.6066299080848694, |
|
"learning_rate": 6.491081895778588e-05, |
|
"loss": 0.5256, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 6.150061500615006, |
|
"grad_norm": 0.578899621963501, |
|
"learning_rate": 6.442822761362015e-05, |
|
"loss": 0.5373, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 6.166461664616646, |
|
"grad_norm": 0.6230922341346741, |
|
"learning_rate": 6.394658287643278e-05, |
|
"loss": 0.5249, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 6.182861828618286, |
|
"grad_norm": 0.610723614692688, |
|
"learning_rate": 6.34658975633605e-05, |
|
"loss": 0.5417, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 6.199261992619927, |
|
"grad_norm": 0.6284223198890686, |
|
"learning_rate": 6.298618446600856e-05, |
|
"loss": 0.5361, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 6.215662156621566, |
|
"grad_norm": 0.6320038437843323, |
|
"learning_rate": 6.250745635011048e-05, |
|
"loss": 0.5344, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 6.232062320623206, |
|
"grad_norm": 0.5748194456100464, |
|
"learning_rate": 6.202972595518817e-05, |
|
"loss": 0.5285, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.248462484624846, |
|
"grad_norm": 0.6720646023750305, |
|
"learning_rate": 6.155300599421306e-05, |
|
"loss": 0.5399, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 6.264862648626487, |
|
"grad_norm": 0.6088182330131531, |
|
"learning_rate": 6.107730915326772e-05, |
|
"loss": 0.5387, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 6.2812628126281265, |
|
"grad_norm": 0.6509405970573425, |
|
"learning_rate": 6.0602648091208324e-05, |
|
"loss": 0.5356, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 6.297662976629766, |
|
"grad_norm": 0.6377142667770386, |
|
"learning_rate": 6.012903543932766e-05, |
|
"loss": 0.5328, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 6.314063140631406, |
|
"grad_norm": 0.638660728931427, |
|
"learning_rate": 5.965648380101916e-05, |
|
"loss": 0.5403, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 6.330463304633047, |
|
"grad_norm": 0.6343023180961609, |
|
"learning_rate": 5.918500575144138e-05, |
|
"loss": 0.5316, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 6.3468634686346865, |
|
"grad_norm": 0.6120122075080872, |
|
"learning_rate": 5.871461383718344e-05, |
|
"loss": 0.5475, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 6.363263632636326, |
|
"grad_norm": 0.664526104927063, |
|
"learning_rate": 5.8245320575931085e-05, |
|
"loss": 0.5539, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 6.379663796637966, |
|
"grad_norm": 0.6195924878120422, |
|
"learning_rate": 5.777713845613364e-05, |
|
"loss": 0.5314, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 6.396063960639607, |
|
"grad_norm": 0.6664077639579773, |
|
"learning_rate": 5.7310079936671545e-05, |
|
"loss": 0.5309, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 6.412464124641247, |
|
"grad_norm": 0.6408997178077698, |
|
"learning_rate": 5.684415744652509e-05, |
|
"loss": 0.5346, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 6.428864288642886, |
|
"grad_norm": 0.596552312374115, |
|
"learning_rate": 5.6379383384443255e-05, |
|
"loss": 0.5424, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 6.445264452644526, |
|
"grad_norm": 0.6852882504463196, |
|
"learning_rate": 5.59157701186142e-05, |
|
"loss": 0.5367, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 6.461664616646167, |
|
"grad_norm": 0.6832631826400757, |
|
"learning_rate": 5.545332998633572e-05, |
|
"loss": 0.5311, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 6.478064780647807, |
|
"grad_norm": 0.6201120018959045, |
|
"learning_rate": 5.499207529368734e-05, |
|
"loss": 0.5398, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 6.4944649446494465, |
|
"grad_norm": 0.6140998601913452, |
|
"learning_rate": 5.453201831520245e-05, |
|
"loss": 0.5388, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 6.510865108651086, |
|
"grad_norm": 0.6650799512863159, |
|
"learning_rate": 5.4073171293542016e-05, |
|
"loss": 0.5419, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 6.527265272652727, |
|
"grad_norm": 0.6716285347938538, |
|
"learning_rate": 5.3615546439168485e-05, |
|
"loss": 0.5234, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 6.543665436654367, |
|
"grad_norm": 0.6595677733421326, |
|
"learning_rate": 5.3159155930021e-05, |
|
"loss": 0.5194, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 6.5600656006560065, |
|
"grad_norm": 0.5784692168235779, |
|
"learning_rate": 5.270401191119143e-05, |
|
"loss": 0.5222, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.576465764657646, |
|
"grad_norm": 0.6419820189476013, |
|
"learning_rate": 5.2250126494600916e-05, |
|
"loss": 0.5352, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 6.592865928659286, |
|
"grad_norm": 0.6142451763153076, |
|
"learning_rate": 5.179751175867784e-05, |
|
"loss": 0.5357, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 6.609266092660927, |
|
"grad_norm": 0.6860802173614502, |
|
"learning_rate": 5.1346179748036116e-05, |
|
"loss": 0.531, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 6.625666256662567, |
|
"grad_norm": 0.6106059551239014, |
|
"learning_rate": 5.0896142473154987e-05, |
|
"loss": 0.5333, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 6.642066420664206, |
|
"grad_norm": 0.6644913554191589, |
|
"learning_rate": 5.044741191005908e-05, |
|
"loss": 0.5403, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 6.658466584665847, |
|
"grad_norm": 0.5942371487617493, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 0.5467, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 6.674866748667487, |
|
"grad_norm": 0.617970883846283, |
|
"learning_rate": 4.9553918649138386e-05, |
|
"loss": 0.5325, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 6.691266912669127, |
|
"grad_norm": 0.6382017135620117, |
|
"learning_rate": 4.910917972822713e-05, |
|
"loss": 0.525, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 6.7076670766707664, |
|
"grad_norm": 0.6390166282653809, |
|
"learning_rate": 4.866579507229545e-05, |
|
"loss": 0.5356, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 6.724067240672406, |
|
"grad_norm": 0.6630433201789856, |
|
"learning_rate": 4.822377648033394e-05, |
|
"loss": 0.541, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 6.740467404674047, |
|
"grad_norm": 0.6468001008033752, |
|
"learning_rate": 4.7783135714980744e-05, |
|
"loss": 0.5399, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 6.756867568675687, |
|
"grad_norm": 0.620833694934845, |
|
"learning_rate": 4.734388450220825e-05, |
|
"loss": 0.5265, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 6.7732677326773265, |
|
"grad_norm": 0.61692875623703, |
|
"learning_rate": 4.6906034531011346e-05, |
|
"loss": 0.5303, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 6.789667896678967, |
|
"grad_norm": 0.6430515646934509, |
|
"learning_rate": 4.646959745309609e-05, |
|
"loss": 0.543, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 6.806068060680607, |
|
"grad_norm": 0.653596043586731, |
|
"learning_rate": 4.603458488256992e-05, |
|
"loss": 0.5384, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 6.822468224682247, |
|
"grad_norm": 0.6125525832176208, |
|
"learning_rate": 4.560100839563229e-05, |
|
"loss": 0.5417, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 6.838868388683887, |
|
"grad_norm": 0.5968248844146729, |
|
"learning_rate": 4.516887953026691e-05, |
|
"loss": 0.5326, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 6.855268552685526, |
|
"grad_norm": 0.5980390906333923, |
|
"learning_rate": 4.4738209785934505e-05, |
|
"loss": 0.5313, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 6.871668716687167, |
|
"grad_norm": 0.614548921585083, |
|
"learning_rate": 4.430901062326681e-05, |
|
"loss": 0.5321, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 6.888068880688807, |
|
"grad_norm": 0.5848116278648376, |
|
"learning_rate": 4.388129346376178e-05, |
|
"loss": 0.542, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.904469044690447, |
|
"grad_norm": 0.6172247529029846, |
|
"learning_rate": 4.345506968947931e-05, |
|
"loss": 0.5439, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 6.920869208692087, |
|
"grad_norm": 0.6525529026985168, |
|
"learning_rate": 4.303035064273878e-05, |
|
"loss": 0.5319, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 6.937269372693727, |
|
"grad_norm": 0.614452064037323, |
|
"learning_rate": 4.260714762581677e-05, |
|
"loss": 0.5287, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 6.953669536695367, |
|
"grad_norm": 0.6268100142478943, |
|
"learning_rate": 4.21854719006467e-05, |
|
"loss": 0.5337, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 6.970069700697007, |
|
"grad_norm": 0.6285504698753357, |
|
"learning_rate": 4.1765334688518766e-05, |
|
"loss": 0.5402, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 6.9864698646986465, |
|
"grad_norm": 0.6280767917633057, |
|
"learning_rate": 4.13467471697817e-05, |
|
"loss": 0.533, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 7.002870028700287, |
|
"grad_norm": 0.5814547538757324, |
|
"learning_rate": 4.092972048354491e-05, |
|
"loss": 0.5346, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 7.019270192701927, |
|
"grad_norm": 0.6583351492881775, |
|
"learning_rate": 4.0514265727382215e-05, |
|
"loss": 0.5261, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 7.035670356703567, |
|
"grad_norm": 0.6630005240440369, |
|
"learning_rate": 4.010039395703664e-05, |
|
"loss": 0.5196, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 7.0520705207052075, |
|
"grad_norm": 0.6557034254074097, |
|
"learning_rate": 3.968811618612592e-05, |
|
"loss": 0.5161, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 7.068470684706847, |
|
"grad_norm": 0.6600791215896606, |
|
"learning_rate": 3.927744338584972e-05, |
|
"loss": 0.5066, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 7.084870848708487, |
|
"grad_norm": 0.6123402118682861, |
|
"learning_rate": 3.8868386484697417e-05, |
|
"loss": 0.5195, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 7.101271012710127, |
|
"grad_norm": 0.6454523801803589, |
|
"learning_rate": 3.84609563681575e-05, |
|
"loss": 0.5113, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 7.1176711767117675, |
|
"grad_norm": 0.6421491503715515, |
|
"learning_rate": 3.80551638784277e-05, |
|
"loss": 0.5134, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 7.134071340713407, |
|
"grad_norm": 0.6418097615242004, |
|
"learning_rate": 3.7651019814126654e-05, |
|
"loss": 0.5185, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 7.150471504715047, |
|
"grad_norm": 0.6425509452819824, |
|
"learning_rate": 3.724853493000635e-05, |
|
"loss": 0.5251, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 7.166871668716687, |
|
"grad_norm": 0.6572188138961792, |
|
"learning_rate": 3.6847719936666124e-05, |
|
"loss": 0.5124, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 7.183271832718328, |
|
"grad_norm": 0.6694904565811157, |
|
"learning_rate": 3.6448585500267485e-05, |
|
"loss": 0.5174, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 7.199671996719967, |
|
"grad_norm": 0.6733431816101074, |
|
"learning_rate": 3.605114224225028e-05, |
|
"loss": 0.5227, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 7.216072160721607, |
|
"grad_norm": 0.6532800793647766, |
|
"learning_rate": 3.565540073905025e-05, |
|
"loss": 0.5128, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.232472324723247, |
|
"grad_norm": 0.6475611329078674, |
|
"learning_rate": 3.5261371521817244e-05, |
|
"loss": 0.5282, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 7.248872488724888, |
|
"grad_norm": 0.6134998202323914, |
|
"learning_rate": 3.486906507613531e-05, |
|
"loss": 0.5118, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 7.2652726527265274, |
|
"grad_norm": 0.648802638053894, |
|
"learning_rate": 3.4478491841743397e-05, |
|
"loss": 0.5253, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 7.281672816728167, |
|
"grad_norm": 0.6617629528045654, |
|
"learning_rate": 3.408966221225773e-05, |
|
"loss": 0.5177, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 7.298072980729807, |
|
"grad_norm": 0.6465151906013489, |
|
"learning_rate": 3.370258653489505e-05, |
|
"loss": 0.5133, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 7.314473144731448, |
|
"grad_norm": 0.6327465176582336, |
|
"learning_rate": 3.331727511019749e-05, |
|
"loss": 0.5155, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 7.3308733087330875, |
|
"grad_norm": 0.6604763865470886, |
|
"learning_rate": 3.293373819175816e-05, |
|
"loss": 0.519, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 7.347273472734727, |
|
"grad_norm": 0.681736171245575, |
|
"learning_rate": 3.2551985985948616e-05, |
|
"loss": 0.5243, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 7.363673636736367, |
|
"grad_norm": 0.6531623601913452, |
|
"learning_rate": 3.217202865164697e-05, |
|
"loss": 0.5201, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 7.380073800738008, |
|
"grad_norm": 0.674201250076294, |
|
"learning_rate": 3.1793876299967816e-05, |
|
"loss": 0.5252, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.396473964739648, |
|
"grad_norm": 0.6714997291564941, |
|
"learning_rate": 3.141753899399289e-05, |
|
"loss": 0.5253, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 7.412874128741287, |
|
"grad_norm": 0.6872825622558594, |
|
"learning_rate": 3.104302674850346e-05, |
|
"loss": 0.5311, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 7.429274292742927, |
|
"grad_norm": 0.7338685989379883, |
|
"learning_rate": 3.0670349529713816e-05, |
|
"loss": 0.515, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 7.445674456744568, |
|
"grad_norm": 0.6552906036376953, |
|
"learning_rate": 3.0299517255005937e-05, |
|
"loss": 0.5077, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 7.462074620746208, |
|
"grad_norm": 0.6388695240020752, |
|
"learning_rate": 2.993053979266577e-05, |
|
"loss": 0.5296, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 7.478474784747847, |
|
"grad_norm": 0.6324784159660339, |
|
"learning_rate": 2.9563426961620367e-05, |
|
"loss": 0.5249, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 7.494874948749487, |
|
"grad_norm": 0.6526088714599609, |
|
"learning_rate": 2.9198188531176863e-05, |
|
"loss": 0.5164, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 7.511275112751127, |
|
"grad_norm": 0.6632060408592224, |
|
"learning_rate": 2.883483422076225e-05, |
|
"loss": 0.5185, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 7.527675276752768, |
|
"grad_norm": 0.643341600894928, |
|
"learning_rate": 2.8473373699664997e-05, |
|
"loss": 0.5162, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 7.5440754407544075, |
|
"grad_norm": 0.6466885805130005, |
|
"learning_rate": 2.811381658677744e-05, |
|
"loss": 0.5103, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 7.560475604756047, |
|
"grad_norm": 0.6679097414016724, |
|
"learning_rate": 2.7756172450340134e-05, |
|
"loss": 0.5238, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 7.576875768757688, |
|
"grad_norm": 0.6607965230941772, |
|
"learning_rate": 2.7400450807686938e-05, |
|
"loss": 0.5104, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 7.593275932759328, |
|
"grad_norm": 0.64109867811203, |
|
"learning_rate": 2.70466611249919e-05, |
|
"loss": 0.5199, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 7.609676096760968, |
|
"grad_norm": 0.6699521541595459, |
|
"learning_rate": 2.669481281701739e-05, |
|
"loss": 0.5339, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 7.626076260762607, |
|
"grad_norm": 0.6698071360588074, |
|
"learning_rate": 2.6344915246863412e-05, |
|
"loss": 0.5128, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 7.642476424764247, |
|
"grad_norm": 0.6424985527992249, |
|
"learning_rate": 2.5996977725718607e-05, |
|
"loss": 0.5053, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 7.658876588765888, |
|
"grad_norm": 0.7101566195487976, |
|
"learning_rate": 2.5651009512612312e-05, |
|
"loss": 0.5311, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 7.675276752767528, |
|
"grad_norm": 0.6432877779006958, |
|
"learning_rate": 2.5307019814168342e-05, |
|
"loss": 0.5216, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 7.691676916769167, |
|
"grad_norm": 0.6850785613059998, |
|
"learning_rate": 2.496501778435977e-05, |
|
"loss": 0.5108, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 7.708077080770808, |
|
"grad_norm": 0.6734909415245056, |
|
"learning_rate": 2.462501252426559e-05, |
|
"loss": 0.5186, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 7.724477244772448, |
|
"grad_norm": 0.7187743782997131, |
|
"learning_rate": 2.4287013081828257e-05, |
|
"loss": 0.5182, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 7.740877408774088, |
|
"grad_norm": 0.7147987484931946, |
|
"learning_rate": 2.3951028451613144e-05, |
|
"loss": 0.5193, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 7.7572775727757275, |
|
"grad_norm": 0.7066251635551453, |
|
"learning_rate": 2.3617067574569087e-05, |
|
"loss": 0.5175, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 7.773677736777367, |
|
"grad_norm": 0.7206938862800598, |
|
"learning_rate": 2.328513933779034e-05, |
|
"loss": 0.5255, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 7.790077900779008, |
|
"grad_norm": 0.6903772354125977, |
|
"learning_rate": 2.2955252574280328e-05, |
|
"loss": 0.512, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 7.806478064780648, |
|
"grad_norm": 0.6938795447349548, |
|
"learning_rate": 2.2627416062716366e-05, |
|
"loss": 0.513, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 7.822878228782288, |
|
"grad_norm": 0.6563030481338501, |
|
"learning_rate": 2.2301638527216194e-05, |
|
"loss": 0.5255, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 7.839278392783928, |
|
"grad_norm": 0.676189124584198, |
|
"learning_rate": 2.1977928637105692e-05, |
|
"loss": 0.5331, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 7.855678556785568, |
|
"grad_norm": 0.6740292906761169, |
|
"learning_rate": 2.1656295006688353e-05, |
|
"loss": 0.5161, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 7.872078720787208, |
|
"grad_norm": 0.6368678212165833, |
|
"learning_rate": 2.1336746195015846e-05, |
|
"loss": 0.5181, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.888478884788848, |
|
"grad_norm": 0.6666802763938904, |
|
"learning_rate": 2.1019290705660356e-05, |
|
"loss": 0.5248, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 7.904879048790487, |
|
"grad_norm": 0.7021058797836304, |
|
"learning_rate": 2.070393698648836e-05, |
|
"loss": 0.5076, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 7.921279212792128, |
|
"grad_norm": 0.6414440870285034, |
|
"learning_rate": 2.0390693429435627e-05, |
|
"loss": 0.5091, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 7.937679376793768, |
|
"grad_norm": 0.6494581699371338, |
|
"learning_rate": 2.0079568370284128e-05, |
|
"loss": 0.5205, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 7.954079540795408, |
|
"grad_norm": 0.6568921208381653, |
|
"learning_rate": 1.977057008844e-05, |
|
"loss": 0.5161, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 7.970479704797048, |
|
"grad_norm": 0.6623067855834961, |
|
"learning_rate": 1.946370680671341e-05, |
|
"loss": 0.5134, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 7.986879868798688, |
|
"grad_norm": 0.6465177536010742, |
|
"learning_rate": 1.9158986691099544e-05, |
|
"loss": 0.5097, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 8.003280032800328, |
|
"grad_norm": 0.6603899002075195, |
|
"learning_rate": 1.885641785056149e-05, |
|
"loss": 0.5152, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 8.019680196801968, |
|
"grad_norm": 0.6793326735496521, |
|
"learning_rate": 1.85560083368143e-05, |
|
"loss": 0.5201, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 8.036080360803608, |
|
"grad_norm": 0.6705737709999084, |
|
"learning_rate": 1.825776614411082e-05, |
|
"loss": 0.4945, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 8.052480524805247, |
|
"grad_norm": 0.6642212271690369, |
|
"learning_rate": 1.7961699209028905e-05, |
|
"loss": 0.5019, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 8.068880688806889, |
|
"grad_norm": 0.6646486520767212, |
|
"learning_rate": 1.766781541026018e-05, |
|
"loss": 0.5245, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 8.085280852808529, |
|
"grad_norm": 0.6514326930046082, |
|
"learning_rate": 1.7376122568400532e-05, |
|
"loss": 0.5083, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 8.101681016810169, |
|
"grad_norm": 0.6762531995773315, |
|
"learning_rate": 1.708662844574178e-05, |
|
"loss": 0.5144, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 8.118081180811808, |
|
"grad_norm": 0.6511978507041931, |
|
"learning_rate": 1.679934074606533e-05, |
|
"loss": 0.5045, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 8.134481344813448, |
|
"grad_norm": 0.6896832585334778, |
|
"learning_rate": 1.6514267114436945e-05, |
|
"loss": 0.5089, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 8.150881508815088, |
|
"grad_norm": 0.6686916947364807, |
|
"learning_rate": 1.6231415137003537e-05, |
|
"loss": 0.5048, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 8.167281672816728, |
|
"grad_norm": 0.6836830377578735, |
|
"learning_rate": 1.5950792340791043e-05, |
|
"loss": 0.5072, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 8.183681836818367, |
|
"grad_norm": 0.6605989336967468, |
|
"learning_rate": 1.5672406193504384e-05, |
|
"loss": 0.5115, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 8.200082000820009, |
|
"grad_norm": 0.6973133087158203, |
|
"learning_rate": 1.5396264103328474e-05, |
|
"loss": 0.5132, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.216482164821649, |
|
"grad_norm": 0.7265208959579468, |
|
"learning_rate": 1.5122373418731306e-05, |
|
"loss": 0.5036, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 8.232882328823289, |
|
"grad_norm": 0.6954894661903381, |
|
"learning_rate": 1.4850741428268244e-05, |
|
"loss": 0.5037, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 8.249282492824928, |
|
"grad_norm": 0.655786395072937, |
|
"learning_rate": 1.4581375360388183e-05, |
|
"loss": 0.515, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 8.265682656826568, |
|
"grad_norm": 0.674171507358551, |
|
"learning_rate": 1.4314282383241096e-05, |
|
"loss": 0.4967, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 8.282082820828208, |
|
"grad_norm": 0.7670585513114929, |
|
"learning_rate": 1.4049469604487297e-05, |
|
"loss": 0.4991, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 8.298482984829848, |
|
"grad_norm": 0.6614052057266235, |
|
"learning_rate": 1.3786944071108398e-05, |
|
"loss": 0.5169, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 8.314883148831488, |
|
"grad_norm": 0.7165561318397522, |
|
"learning_rate": 1.3526712769219618e-05, |
|
"loss": 0.502, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 8.33128331283313, |
|
"grad_norm": 0.7127593159675598, |
|
"learning_rate": 1.3268782623884047e-05, |
|
"loss": 0.5091, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 8.347683476834769, |
|
"grad_norm": 0.6743724346160889, |
|
"learning_rate": 1.301316049892818e-05, |
|
"loss": 0.5019, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 8.364083640836409, |
|
"grad_norm": 0.6609333157539368, |
|
"learning_rate": 1.2759853196759453e-05, |
|
"loss": 0.5052, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 8.380483804838049, |
|
"grad_norm": 0.7156481146812439, |
|
"learning_rate": 1.2508867458185037e-05, |
|
"loss": 0.51, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 8.396883968839688, |
|
"grad_norm": 0.7323744893074036, |
|
"learning_rate": 1.2260209962232628e-05, |
|
"loss": 0.5092, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 8.413284132841328, |
|
"grad_norm": 0.6540841460227966, |
|
"learning_rate": 1.201388732597255e-05, |
|
"loss": 0.507, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 8.429684296842968, |
|
"grad_norm": 0.6970362663269043, |
|
"learning_rate": 1.1769906104341832e-05, |
|
"loss": 0.5216, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 8.446084460844608, |
|
"grad_norm": 0.6555099487304688, |
|
"learning_rate": 1.1528272789969618e-05, |
|
"loss": 0.5001, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 8.46248462484625, |
|
"grad_norm": 0.6940327286720276, |
|
"learning_rate": 1.1288993813004467e-05, |
|
"loss": 0.5025, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 8.478884788847889, |
|
"grad_norm": 0.6825557351112366, |
|
"learning_rate": 1.1052075540943296e-05, |
|
"loss": 0.5089, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 8.495284952849529, |
|
"grad_norm": 0.6553847789764404, |
|
"learning_rate": 1.0817524278461776e-05, |
|
"loss": 0.5052, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 8.511685116851169, |
|
"grad_norm": 0.6892216205596924, |
|
"learning_rate": 1.0585346267246743e-05, |
|
"loss": 0.5158, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 8.528085280852808, |
|
"grad_norm": 0.6756864786148071, |
|
"learning_rate": 1.0355547685829926e-05, |
|
"loss": 0.5133, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 8.544485444854448, |
|
"grad_norm": 0.7741680145263672, |
|
"learning_rate": 1.0128134649423671e-05, |
|
"loss": 0.5167, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 8.560885608856088, |
|
"grad_norm": 0.6552737355232239, |
|
"learning_rate": 9.903113209758096e-06, |
|
"loss": 0.5094, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 8.577285772857728, |
|
"grad_norm": 0.6564160585403442, |
|
"learning_rate": 9.680489354920152e-06, |
|
"loss": 0.5048, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 8.59368593685937, |
|
"grad_norm": 0.7316927313804626, |
|
"learning_rate": 9.460269009194167e-06, |
|
"loss": 0.499, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 8.61008610086101, |
|
"grad_norm": 0.7389256358146667, |
|
"learning_rate": 9.242458032904311e-06, |
|
"loss": 0.5022, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 8.626486264862649, |
|
"grad_norm": 0.6633957624435425, |
|
"learning_rate": 9.027062222258487e-06, |
|
"loss": 0.5042, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 8.642886428864289, |
|
"grad_norm": 0.640006422996521, |
|
"learning_rate": 8.814087309194251e-06, |
|
"loss": 0.5068, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 8.659286592865929, |
|
"grad_norm": 0.6830674409866333, |
|
"learning_rate": 8.603538961226232e-06, |
|
"loss": 0.502, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 8.675686756867568, |
|
"grad_norm": 0.6639739871025085, |
|
"learning_rate": 8.395422781295192e-06, |
|
"loss": 0.5177, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 8.692086920869208, |
|
"grad_norm": 0.6661989092826843, |
|
"learning_rate": 8.189744307619118e-06, |
|
"loss": 0.5014, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 8.708487084870848, |
|
"grad_norm": 0.6645560264587402, |
|
"learning_rate": 7.986509013545673e-06, |
|
"loss": 0.5017, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 8.724887248872488, |
|
"grad_norm": 0.7042533159255981, |
|
"learning_rate": 7.785722307406684e-06, |
|
"loss": 0.5163, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 8.74128741287413, |
|
"grad_norm": 0.698277473449707, |
|
"learning_rate": 7.587389532374123e-06, |
|
"loss": 0.5054, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 8.75768757687577, |
|
"grad_norm": 0.6191078424453735, |
|
"learning_rate": 7.3915159663179075e-06, |
|
"loss": 0.4992, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 8.774087740877409, |
|
"grad_norm": 0.6769982576370239, |
|
"learning_rate": 7.198106821665585e-06, |
|
"loss": 0.5147, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 8.790487904879049, |
|
"grad_norm": 0.6643761396408081, |
|
"learning_rate": 7.007167245263435e-06, |
|
"loss": 0.5105, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 8.806888068880689, |
|
"grad_norm": 0.6664229035377502, |
|
"learning_rate": 6.818702318239689e-06, |
|
"loss": 0.5021, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 8.823288232882328, |
|
"grad_norm": 0.6411841511726379, |
|
"learning_rate": 6.632717055869164e-06, |
|
"loss": 0.5076, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 8.839688396883968, |
|
"grad_norm": 0.6643224954605103, |
|
"learning_rate": 6.4492164074399065e-06, |
|
"loss": 0.5044, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 8.85608856088561, |
|
"grad_norm": 0.6722341775894165, |
|
"learning_rate": 6.268205256121396e-06, |
|
"loss": 0.5092, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 8.87248872488725, |
|
"grad_norm": 0.6829484105110168, |
|
"learning_rate": 6.089688418834727e-06, |
|
"loss": 0.5164, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 0.7037432789802551, |
|
"learning_rate": 5.913670646124236e-06, |
|
"loss": 0.5142, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 8.905289052890529, |
|
"grad_norm": 0.6822744607925415, |
|
"learning_rate": 5.7401566220313005e-06, |
|
"loss": 0.5018, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 8.921689216892169, |
|
"grad_norm": 0.6738746166229248, |
|
"learning_rate": 5.569150963969494e-06, |
|
"loss": 0.5013, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 8.938089380893809, |
|
"grad_norm": 0.6965447664260864, |
|
"learning_rate": 5.400658222601873e-06, |
|
"loss": 0.5014, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 8.954489544895448, |
|
"grad_norm": 0.7140269875526428, |
|
"learning_rate": 5.2346828817197655e-06, |
|
"loss": 0.5119, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 8.970889708897088, |
|
"grad_norm": 0.6803216934204102, |
|
"learning_rate": 5.071229358123464e-06, |
|
"loss": 0.5021, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 8.987289872898728, |
|
"grad_norm": 0.6827109456062317, |
|
"learning_rate": 4.910302001504807e-06, |
|
"loss": 0.5157, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 9.00369003690037, |
|
"grad_norm": 0.6659403443336487, |
|
"learning_rate": 4.7519050943312325e-06, |
|
"loss": 0.4975, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 9.02009020090201, |
|
"grad_norm": 0.7039642333984375, |
|
"learning_rate": 4.596042851732008e-06, |
|
"loss": 0.4949, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 9.03649036490365, |
|
"grad_norm": 0.6756150722503662, |
|
"learning_rate": 4.442719421385922e-06, |
|
"loss": 0.4925, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 9.052890528905289, |
|
"grad_norm": 0.684483528137207, |
|
"learning_rate": 4.291938883411007e-06, |
|
"loss": 0.4997, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 9.069290692906929, |
|
"grad_norm": 0.6780720353126526, |
|
"learning_rate": 4.143705250255869e-06, |
|
"loss": 0.5104, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 9.085690856908569, |
|
"grad_norm": 0.6675730347633362, |
|
"learning_rate": 3.99802246659301e-06, |
|
"loss": 0.5181, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 9.102091020910208, |
|
"grad_norm": 0.6699404120445251, |
|
"learning_rate": 3.85489440921376e-06, |
|
"loss": 0.506, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 9.11849118491185, |
|
"grad_norm": 0.6987513899803162, |
|
"learning_rate": 3.7143248869252022e-06, |
|
"loss": 0.5058, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 9.13489134891349, |
|
"grad_norm": 0.6789569854736328, |
|
"learning_rate": 3.5763176404487564e-06, |
|
"loss": 0.4977, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 9.15129151291513, |
|
"grad_norm": 0.6613333821296692, |
|
"learning_rate": 3.440876342320609e-06, |
|
"loss": 0.5096, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 9.16769167691677, |
|
"grad_norm": 0.689217746257782, |
|
"learning_rate": 3.308004596794101e-06, |
|
"loss": 0.4949, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 9.18409184091841, |
|
"grad_norm": 0.7100328803062439, |
|
"learning_rate": 3.1777059397436692e-06, |
|
"loss": 0.5044, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 9.200492004920049, |
|
"grad_norm": 0.6680495738983154, |
|
"learning_rate": 3.049983838570858e-06, |
|
"loss": 0.5037, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 9.216892168921689, |
|
"grad_norm": 0.6641035676002502, |
|
"learning_rate": 2.9248416921119794e-06, |
|
"loss": 0.4964, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 9.233292332923329, |
|
"grad_norm": 0.7248251438140869, |
|
"learning_rate": 2.8022828305477423e-06, |
|
"loss": 0.5017, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 9.24969249692497, |
|
"grad_norm": 0.7348821759223938, |
|
"learning_rate": 2.682310515314512e-06, |
|
"loss": 0.5077, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 9.26609266092661, |
|
"grad_norm": 0.6882705688476562, |
|
"learning_rate": 2.5649279390176806e-06, |
|
"loss": 0.4926, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 9.28249282492825, |
|
"grad_norm": 0.6873205900192261, |
|
"learning_rate": 2.4501382253465543e-06, |
|
"loss": 0.5009, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 9.29889298892989, |
|
"grad_norm": 0.703940212726593, |
|
"learning_rate": 2.3379444289913342e-06, |
|
"loss": 0.5011, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 9.31529315293153, |
|
"grad_norm": 0.6873738169670105, |
|
"learning_rate": 2.228349535561769e-06, |
|
"loss": 0.4991, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 9.331693316933169, |
|
"grad_norm": 0.6344056725502014, |
|
"learning_rate": 2.1213564615077065e-06, |
|
"loss": 0.5099, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 9.348093480934809, |
|
"grad_norm": 0.6923158168792725, |
|
"learning_rate": 2.016968054041546e-06, |
|
"loss": 0.5068, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 9.364493644936449, |
|
"grad_norm": 0.6982558965682983, |
|
"learning_rate": 1.915187091062387e-06, |
|
"loss": 0.5052, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 9.38089380893809, |
|
"grad_norm": 0.6965045928955078, |
|
"learning_rate": 1.816016281082178e-06, |
|
"loss": 0.4963, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 9.39729397293973, |
|
"grad_norm": 0.704952597618103, |
|
"learning_rate": 1.7194582631535617e-06, |
|
"loss": 0.4922, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 9.41369413694137, |
|
"grad_norm": 0.673637866973877, |
|
"learning_rate": 1.6255156067997323e-06, |
|
"loss": 0.4943, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 9.43009430094301, |
|
"grad_norm": 0.6612069010734558, |
|
"learning_rate": 1.5341908119459792e-06, |
|
"loss": 0.4973, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 9.44649446494465, |
|
"grad_norm": 0.7053012847900391, |
|
"learning_rate": 1.4454863088532388e-06, |
|
"loss": 0.5049, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 9.46289462894629, |
|
"grad_norm": 0.6800163388252258, |
|
"learning_rate": 1.3594044580533482e-06, |
|
"loss": 0.5009, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 9.479294792947929, |
|
"grad_norm": 0.7071276307106018, |
|
"learning_rate": 1.2759475502862828e-06, |
|
"loss": 0.5016, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 9.495694956949569, |
|
"grad_norm": 0.693181037902832, |
|
"learning_rate": 1.19511780643915e-06, |
|
"loss": 0.5006, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 9.512095120951209, |
|
"grad_norm": 0.7130532264709473, |
|
"learning_rate": 1.1169173774871478e-06, |
|
"loss": 0.5072, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 9.52849528495285, |
|
"grad_norm": 0.6810155510902405, |
|
"learning_rate": 1.0413483444362771e-06, |
|
"loss": 0.5014, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 9.54489544895449, |
|
"grad_norm": 0.6402135491371155, |
|
"learning_rate": 9.684127182679526e-07, |
|
"loss": 0.4956, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 9.56129561295613, |
|
"grad_norm": 0.6540482640266418, |
|
"learning_rate": 8.981124398855678e-07, |
|
"loss": 0.5035, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 9.57769577695777, |
|
"grad_norm": 0.6759128570556641, |
|
"learning_rate": 8.304493800627589e-07, |
|
"loss": 0.4896, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 9.59409594095941, |
|
"grad_norm": 0.67359459400177, |
|
"learning_rate": 7.654253393936439e-07, |
|
"loss": 0.5071, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 9.61049610496105, |
|
"grad_norm": 0.740897536277771, |
|
"learning_rate": 7.030420482449395e-07, |
|
"loss": 0.5013, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 9.626896268962689, |
|
"grad_norm": 0.687887966632843, |
|
"learning_rate": 6.433011667098754e-07, |
|
"loss": 0.5017, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 9.64329643296433, |
|
"grad_norm": 0.6390769481658936, |
|
"learning_rate": 5.862042845640403e-07, |
|
"loss": 0.5032, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 9.65969659696597, |
|
"grad_norm": 0.6661211848258972, |
|
"learning_rate": 5.317529212230721e-07, |
|
"loss": 0.5026, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 9.67609676096761, |
|
"grad_norm": 0.7113086581230164, |
|
"learning_rate": 4.799485257022118e-07, |
|
"loss": 0.4996, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 9.69249692496925, |
|
"grad_norm": 0.6506823301315308, |
|
"learning_rate": 4.307924765777682e-07, |
|
"loss": 0.5032, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 9.70889708897089, |
|
"grad_norm": 0.6711443066596985, |
|
"learning_rate": 3.842860819504024e-07, |
|
"loss": 0.502, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 9.72529725297253, |
|
"grad_norm": 0.7337656021118164, |
|
"learning_rate": 3.404305794103224e-07, |
|
"loss": 0.5138, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 9.74169741697417, |
|
"grad_norm": 0.6719791889190674, |
|
"learning_rate": 2.9922713600439854e-07, |
|
"loss": 0.5015, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 9.758097580975809, |
|
"grad_norm": 0.6952204704284668, |
|
"learning_rate": 2.606768482050215e-07, |
|
"loss": 0.5161, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 9.774497744977449, |
|
"grad_norm": 0.7182480096817017, |
|
"learning_rate": 2.2478074188099219e-07, |
|
"loss": 0.5056, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 9.79089790897909, |
|
"grad_norm": 0.7192881107330322, |
|
"learning_rate": 1.915397722702217e-07, |
|
"loss": 0.4972, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 9.80729807298073, |
|
"grad_norm": 0.6714368462562561, |
|
"learning_rate": 1.609548239542402e-07, |
|
"loss": 0.4958, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 9.82369823698237, |
|
"grad_norm": 0.6657147407531738, |
|
"learning_rate": 1.3302671083474938e-07, |
|
"loss": 0.4943, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 9.84009840098401, |
|
"grad_norm": 0.6431368589401245, |
|
"learning_rate": 1.0775617611189503e-07, |
|
"loss": 0.5079, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.85649856498565, |
|
"grad_norm": 0.7010581493377686, |
|
"learning_rate": 8.514389226452757e-08, |
|
"loss": 0.52, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 9.87289872898729, |
|
"grad_norm": 0.6842231750488281, |
|
"learning_rate": 6.519046103230508e-08, |
|
"loss": 0.5011, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 9.88929889298893, |
|
"grad_norm": 0.6943760514259338, |
|
"learning_rate": 4.789641339963957e-08, |
|
"loss": 0.5041, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 9.90569905699057, |
|
"grad_norm": 0.6967430710792542, |
|
"learning_rate": 3.3262209581619297e-08, |
|
"loss": 0.5096, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 9.92209922099221, |
|
"grad_norm": 0.6971690058708191, |
|
"learning_rate": 2.1288239011729717e-08, |
|
"loss": 0.5076, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 9.93849938499385, |
|
"grad_norm": 0.668303906917572, |
|
"learning_rate": 1.1974820331517312e-08, |
|
"loss": 0.497, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 9.95489954899549, |
|
"grad_norm": 0.6784859895706177, |
|
"learning_rate": 5.3222013820741765e-09, |
|
"loss": 0.5058, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 9.97129971299713, |
|
"grad_norm": 0.6811095476150513, |
|
"learning_rate": 1.3305591974543953e-09, |
|
"loss": 0.501, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 9.98769987699877, |
|
"grad_norm": 0.6882724165916443, |
|
"learning_rate": 0.0, |
|
"loss": 0.5079, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 9.98769987699877, |
|
"step": 6090, |
|
"total_flos": 4.6640782508241715e+17, |
|
"train_loss": 0.5962841084046513, |
|
"train_runtime": 32440.4443, |
|
"train_samples_per_second": 2.255, |
|
"train_steps_per_second": 0.188 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 6090, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 4.6640782508241715e+17, |
|
"train_batch_size": 3, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|