|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.703940362087327, |
|
"eval_steps": 500, |
|
"global_step": 3800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002242026792220167, |
|
"grad_norm": 1.0706044435501099, |
|
"learning_rate": 9.999986217521373e-05, |
|
"loss": 2.3592, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004484053584440334, |
|
"grad_norm": 1.1121090650558472, |
|
"learning_rate": 9.999944870161475e-05, |
|
"loss": 2.2216, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006726080376660501, |
|
"grad_norm": 1.4128837585449219, |
|
"learning_rate": 9.999875958148252e-05, |
|
"loss": 2.0607, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.008968107168880668, |
|
"grad_norm": 1.3161660432815552, |
|
"learning_rate": 9.99977948186162e-05, |
|
"loss": 1.7687, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.011210133961100835, |
|
"grad_norm": 1.2509621381759644, |
|
"learning_rate": 9.999655441833445e-05, |
|
"loss": 1.8135, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.013452160753321002, |
|
"grad_norm": 1.2357916831970215, |
|
"learning_rate": 9.999503838747563e-05, |
|
"loss": 1.6791, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01569418754554117, |
|
"grad_norm": 1.3020069599151611, |
|
"learning_rate": 9.999324673439762e-05, |
|
"loss": 1.5995, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.017936214337761335, |
|
"grad_norm": 1.2886899709701538, |
|
"learning_rate": 9.999117946897775e-05, |
|
"loss": 1.5247, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.020178241129981504, |
|
"grad_norm": 1.5385006666183472, |
|
"learning_rate": 9.998883660261285e-05, |
|
"loss": 1.4753, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02242026792220167, |
|
"grad_norm": 1.3205918073654175, |
|
"learning_rate": 9.998621814821914e-05, |
|
"loss": 1.5195, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02466229471442184, |
|
"grad_norm": 1.1984736919403076, |
|
"learning_rate": 9.99833241202321e-05, |
|
"loss": 1.5227, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.026904321506642005, |
|
"grad_norm": 1.4019296169281006, |
|
"learning_rate": 9.998015453460651e-05, |
|
"loss": 1.4167, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02914634829886217, |
|
"grad_norm": 1.3315256834030151, |
|
"learning_rate": 9.997670940881627e-05, |
|
"loss": 1.4096, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.03138837509108234, |
|
"grad_norm": 1.1818488836288452, |
|
"learning_rate": 9.99729887618543e-05, |
|
"loss": 1.4869, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.033630401883302505, |
|
"grad_norm": 1.41718327999115, |
|
"learning_rate": 9.996899261423254e-05, |
|
"loss": 1.3926, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03587242867552267, |
|
"grad_norm": 1.1350888013839722, |
|
"learning_rate": 9.996472098798169e-05, |
|
"loss": 1.5258, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03811445546774284, |
|
"grad_norm": 1.2058793306350708, |
|
"learning_rate": 9.996017390665118e-05, |
|
"loss": 1.4866, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.04035648225996301, |
|
"grad_norm": 1.3261053562164307, |
|
"learning_rate": 9.995535139530904e-05, |
|
"loss": 1.4103, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.042598509052183174, |
|
"grad_norm": 1.4682806730270386, |
|
"learning_rate": 9.995025348054175e-05, |
|
"loss": 1.3903, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.04484053584440334, |
|
"grad_norm": 1.4472965002059937, |
|
"learning_rate": 9.994488019045405e-05, |
|
"loss": 1.4353, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.047082562636623505, |
|
"grad_norm": 1.1997061967849731, |
|
"learning_rate": 9.993923155466884e-05, |
|
"loss": 1.4072, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.04932458942884368, |
|
"grad_norm": 1.140612006187439, |
|
"learning_rate": 9.993330760432703e-05, |
|
"loss": 1.4188, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05156661622106384, |
|
"grad_norm": 1.353933572769165, |
|
"learning_rate": 9.992710837208726e-05, |
|
"loss": 1.3812, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.05380864301328401, |
|
"grad_norm": 1.6113260984420776, |
|
"learning_rate": 9.992063389212589e-05, |
|
"loss": 1.4144, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.056050669805504175, |
|
"grad_norm": 1.1334792375564575, |
|
"learning_rate": 9.991388420013666e-05, |
|
"loss": 1.4366, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05829269659772434, |
|
"grad_norm": 1.274436593055725, |
|
"learning_rate": 9.990685933333054e-05, |
|
"loss": 1.3906, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06053472338994451, |
|
"grad_norm": 1.1813853979110718, |
|
"learning_rate": 9.98995593304356e-05, |
|
"loss": 1.4297, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.06277675018216468, |
|
"grad_norm": 1.367594599723816, |
|
"learning_rate": 9.989198423169666e-05, |
|
"loss": 1.3987, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06501877697438484, |
|
"grad_norm": 1.1809221506118774, |
|
"learning_rate": 9.98841340788752e-05, |
|
"loss": 1.4149, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.06726080376660501, |
|
"grad_norm": 1.1612411737442017, |
|
"learning_rate": 9.987600891524902e-05, |
|
"loss": 1.4063, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06950283055882518, |
|
"grad_norm": 1.2665232419967651, |
|
"learning_rate": 9.986760878561209e-05, |
|
"loss": 1.3765, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.07174485735104534, |
|
"grad_norm": 1.1557029485702515, |
|
"learning_rate": 9.985893373627426e-05, |
|
"loss": 1.3657, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07398688414326551, |
|
"grad_norm": 1.2104707956314087, |
|
"learning_rate": 9.984998381506099e-05, |
|
"loss": 1.4443, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.07622891093548569, |
|
"grad_norm": 1.7405874729156494, |
|
"learning_rate": 9.984075907131314e-05, |
|
"loss": 1.4025, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07847093772770584, |
|
"grad_norm": 1.366018295288086, |
|
"learning_rate": 9.983125955588662e-05, |
|
"loss": 1.3552, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.08071296451992602, |
|
"grad_norm": 1.171950101852417, |
|
"learning_rate": 9.982148532115218e-05, |
|
"loss": 1.3902, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08295499131214618, |
|
"grad_norm": 1.1674119234085083, |
|
"learning_rate": 9.98114364209951e-05, |
|
"loss": 1.3777, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.08519701810436635, |
|
"grad_norm": 1.069298505783081, |
|
"learning_rate": 9.980111291081488e-05, |
|
"loss": 1.4023, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08743904489658652, |
|
"grad_norm": 1.3680400848388672, |
|
"learning_rate": 9.979051484752496e-05, |
|
"loss": 1.3485, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.08968107168880668, |
|
"grad_norm": 1.2846928834915161, |
|
"learning_rate": 9.977964228955232e-05, |
|
"loss": 1.4068, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09192309848102685, |
|
"grad_norm": 1.1173886060714722, |
|
"learning_rate": 9.976849529683734e-05, |
|
"loss": 1.4086, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.09416512527324701, |
|
"grad_norm": 1.1485246419906616, |
|
"learning_rate": 9.975707393083327e-05, |
|
"loss": 1.3981, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09640715206546718, |
|
"grad_norm": 1.503753900527954, |
|
"learning_rate": 9.974537825450598e-05, |
|
"loss": 1.3903, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.09864917885768736, |
|
"grad_norm": 1.1287413835525513, |
|
"learning_rate": 9.973340833233369e-05, |
|
"loss": 1.4314, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.10089120564990751, |
|
"grad_norm": 1.2056329250335693, |
|
"learning_rate": 9.972116423030641e-05, |
|
"loss": 1.402, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.10313323244212769, |
|
"grad_norm": 1.5625340938568115, |
|
"learning_rate": 9.970864601592583e-05, |
|
"loss": 1.3223, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.10537525923434785, |
|
"grad_norm": 1.3235552310943604, |
|
"learning_rate": 9.969585375820474e-05, |
|
"loss": 1.3783, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.10761728602656802, |
|
"grad_norm": 1.2155221700668335, |
|
"learning_rate": 9.968278752766672e-05, |
|
"loss": 1.422, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.10985931281878819, |
|
"grad_norm": 1.470157504081726, |
|
"learning_rate": 9.966944739634581e-05, |
|
"loss": 1.4048, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.11210133961100835, |
|
"grad_norm": 1.295973300933838, |
|
"learning_rate": 9.965583343778605e-05, |
|
"loss": 1.4016, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11434336640322852, |
|
"grad_norm": 1.306946873664856, |
|
"learning_rate": 9.964194572704106e-05, |
|
"loss": 1.3679, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.11658539319544868, |
|
"grad_norm": 1.2880349159240723, |
|
"learning_rate": 9.962778434067368e-05, |
|
"loss": 1.4271, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11882741998766885, |
|
"grad_norm": 1.165236473083496, |
|
"learning_rate": 9.96133493567555e-05, |
|
"loss": 1.4263, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.12106944677988903, |
|
"grad_norm": 1.293133020401001, |
|
"learning_rate": 9.959864085486648e-05, |
|
"loss": 1.4395, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12331147357210918, |
|
"grad_norm": 1.165431022644043, |
|
"learning_rate": 9.958365891609444e-05, |
|
"loss": 1.3845, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.12555350036432936, |
|
"grad_norm": 1.262797474861145, |
|
"learning_rate": 9.956840362303473e-05, |
|
"loss": 1.4234, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.12779552715654952, |
|
"grad_norm": 1.3967214822769165, |
|
"learning_rate": 9.955287505978959e-05, |
|
"loss": 1.3922, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.13003755394876967, |
|
"grad_norm": 1.1680519580841064, |
|
"learning_rate": 9.953707331196787e-05, |
|
"loss": 1.3595, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.13227958074098986, |
|
"grad_norm": 1.2890777587890625, |
|
"learning_rate": 9.95209984666845e-05, |
|
"loss": 1.3109, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.13452160753321002, |
|
"grad_norm": 1.3940659761428833, |
|
"learning_rate": 9.950465061255996e-05, |
|
"loss": 1.4059, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13676363432543018, |
|
"grad_norm": 1.1973896026611328, |
|
"learning_rate": 9.948802983971981e-05, |
|
"loss": 1.377, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.13900566111765036, |
|
"grad_norm": 1.222718358039856, |
|
"learning_rate": 9.947113623979422e-05, |
|
"loss": 1.3703, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.14124768790987052, |
|
"grad_norm": 1.3250412940979004, |
|
"learning_rate": 9.945396990591751e-05, |
|
"loss": 1.2982, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.14348971470209068, |
|
"grad_norm": 1.1987338066101074, |
|
"learning_rate": 9.943653093272749e-05, |
|
"loss": 1.3408, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14573174149431087, |
|
"grad_norm": 1.0673400163650513, |
|
"learning_rate": 9.941881941636506e-05, |
|
"loss": 1.3754, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.14797376828653103, |
|
"grad_norm": 1.1687766313552856, |
|
"learning_rate": 9.94008354544737e-05, |
|
"loss": 1.352, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.15021579507875119, |
|
"grad_norm": 1.1456810235977173, |
|
"learning_rate": 9.938257914619882e-05, |
|
"loss": 1.3725, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.15245782187097137, |
|
"grad_norm": 1.0828138589859009, |
|
"learning_rate": 9.936405059218728e-05, |
|
"loss": 1.3499, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.15469984866319153, |
|
"grad_norm": 1.4693915843963623, |
|
"learning_rate": 9.934524989458684e-05, |
|
"loss": 1.3956, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.1569418754554117, |
|
"grad_norm": 1.2608237266540527, |
|
"learning_rate": 9.932617715704562e-05, |
|
"loss": 1.3734, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15918390224763185, |
|
"grad_norm": 1.2090539932250977, |
|
"learning_rate": 9.930683248471142e-05, |
|
"loss": 1.4775, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.16142592903985203, |
|
"grad_norm": 1.1096559762954712, |
|
"learning_rate": 9.928721598423125e-05, |
|
"loss": 1.3189, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1636679558320722, |
|
"grad_norm": 1.3460302352905273, |
|
"learning_rate": 9.926732776375073e-05, |
|
"loss": 1.3477, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.16590998262429235, |
|
"grad_norm": 1.372318983078003, |
|
"learning_rate": 9.924716793291346e-05, |
|
"loss": 1.3753, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.16815200941651254, |
|
"grad_norm": 1.4617116451263428, |
|
"learning_rate": 9.922673660286039e-05, |
|
"loss": 1.3927, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.1703940362087327, |
|
"grad_norm": 1.1783477067947388, |
|
"learning_rate": 9.920603388622928e-05, |
|
"loss": 1.3698, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.17263606300095286, |
|
"grad_norm": 1.087998628616333, |
|
"learning_rate": 9.918505989715403e-05, |
|
"loss": 1.3449, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.17487808979317304, |
|
"grad_norm": 1.1929972171783447, |
|
"learning_rate": 9.916381475126407e-05, |
|
"loss": 1.4253, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1771201165853932, |
|
"grad_norm": 1.3316450119018555, |
|
"learning_rate": 9.914229856568369e-05, |
|
"loss": 1.4232, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.17936214337761336, |
|
"grad_norm": 1.0855283737182617, |
|
"learning_rate": 9.912051145903144e-05, |
|
"loss": 1.3813, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.18160417016983352, |
|
"grad_norm": 1.1286154985427856, |
|
"learning_rate": 9.909845355141946e-05, |
|
"loss": 1.3733, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.1838461969620537, |
|
"grad_norm": 1.1122159957885742, |
|
"learning_rate": 9.90761249644528e-05, |
|
"loss": 1.3546, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.18608822375427386, |
|
"grad_norm": 1.0875800848007202, |
|
"learning_rate": 9.905352582122878e-05, |
|
"loss": 1.4501, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.18833025054649402, |
|
"grad_norm": 1.130376935005188, |
|
"learning_rate": 9.903065624633628e-05, |
|
"loss": 1.404, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1905722773387142, |
|
"grad_norm": 1.267067790031433, |
|
"learning_rate": 9.900751636585506e-05, |
|
"loss": 1.3379, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.19281430413093437, |
|
"grad_norm": 1.1137257814407349, |
|
"learning_rate": 9.898410630735509e-05, |
|
"loss": 1.3062, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.19505633092315453, |
|
"grad_norm": 1.3096929788589478, |
|
"learning_rate": 9.896042619989581e-05, |
|
"loss": 1.4038, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.1972983577153747, |
|
"grad_norm": 1.3187003135681152, |
|
"learning_rate": 9.893647617402548e-05, |
|
"loss": 1.3936, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.19954038450759487, |
|
"grad_norm": 1.1851136684417725, |
|
"learning_rate": 9.891225636178037e-05, |
|
"loss": 1.3456, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.20178241129981503, |
|
"grad_norm": 1.2681955099105835, |
|
"learning_rate": 9.88877668966841e-05, |
|
"loss": 1.3961, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2040244380920352, |
|
"grad_norm": 1.2412629127502441, |
|
"learning_rate": 9.886300791374688e-05, |
|
"loss": 1.3843, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.20626646488425537, |
|
"grad_norm": 1.3303419351577759, |
|
"learning_rate": 9.883797954946476e-05, |
|
"loss": 1.3459, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.20850849167647553, |
|
"grad_norm": 1.100720763206482, |
|
"learning_rate": 9.881268194181892e-05, |
|
"loss": 1.4156, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.2107505184686957, |
|
"grad_norm": 1.0826263427734375, |
|
"learning_rate": 9.878711523027484e-05, |
|
"loss": 1.3297, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.21299254526091588, |
|
"grad_norm": 1.4636311531066895, |
|
"learning_rate": 9.876127955578158e-05, |
|
"loss": 1.3662, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.21523457205313604, |
|
"grad_norm": 1.1484990119934082, |
|
"learning_rate": 9.873517506077101e-05, |
|
"loss": 1.36, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2174765988453562, |
|
"grad_norm": 1.2333481311798096, |
|
"learning_rate": 9.870880188915698e-05, |
|
"loss": 1.3587, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.21971862563757638, |
|
"grad_norm": 1.1720088720321655, |
|
"learning_rate": 9.868216018633456e-05, |
|
"loss": 1.2882, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.22196065242979654, |
|
"grad_norm": 1.2749361991882324, |
|
"learning_rate": 9.865525009917921e-05, |
|
"loss": 1.3564, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.2242026792220167, |
|
"grad_norm": 1.1952840089797974, |
|
"learning_rate": 9.862807177604602e-05, |
|
"loss": 1.3956, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22644470601423686, |
|
"grad_norm": 1.215401530265808, |
|
"learning_rate": 9.860062536676888e-05, |
|
"loss": 1.3836, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.22868673280645704, |
|
"grad_norm": 1.2550543546676636, |
|
"learning_rate": 9.857291102265959e-05, |
|
"loss": 1.3626, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2309287595986772, |
|
"grad_norm": 1.1673667430877686, |
|
"learning_rate": 9.854492889650709e-05, |
|
"loss": 1.3601, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.23317078639089736, |
|
"grad_norm": 1.2657443284988403, |
|
"learning_rate": 9.851667914257661e-05, |
|
"loss": 1.3216, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.23541281318311755, |
|
"grad_norm": 1.3874006271362305, |
|
"learning_rate": 9.848816191660878e-05, |
|
"loss": 1.3565, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2376548399753377, |
|
"grad_norm": 1.3701063394546509, |
|
"learning_rate": 9.845937737581885e-05, |
|
"loss": 1.3676, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.23989686676755786, |
|
"grad_norm": 1.1547927856445312, |
|
"learning_rate": 9.843032567889572e-05, |
|
"loss": 1.3882, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.24213889355977805, |
|
"grad_norm": 1.2529016733169556, |
|
"learning_rate": 9.840100698600118e-05, |
|
"loss": 1.4058, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.2443809203519982, |
|
"grad_norm": 1.250368595123291, |
|
"learning_rate": 9.837142145876892e-05, |
|
"loss": 1.3544, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.24662294714421837, |
|
"grad_norm": 1.0099977254867554, |
|
"learning_rate": 9.834156926030368e-05, |
|
"loss": 1.3435, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.24886497393643853, |
|
"grad_norm": 1.169044017791748, |
|
"learning_rate": 9.831145055518039e-05, |
|
"loss": 1.3226, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.2511070007286587, |
|
"grad_norm": 1.1605632305145264, |
|
"learning_rate": 9.828106550944322e-05, |
|
"loss": 1.3941, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2533490275208789, |
|
"grad_norm": 1.1079938411712646, |
|
"learning_rate": 9.825041429060466e-05, |
|
"loss": 1.3151, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.25559105431309903, |
|
"grad_norm": 1.39505934715271, |
|
"learning_rate": 9.821949706764463e-05, |
|
"loss": 1.34, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2578330811053192, |
|
"grad_norm": 1.3362523317337036, |
|
"learning_rate": 9.81883140110095e-05, |
|
"loss": 1.3317, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.26007510789753935, |
|
"grad_norm": 1.131722092628479, |
|
"learning_rate": 9.815686529261119e-05, |
|
"loss": 1.3044, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.26231713468975953, |
|
"grad_norm": 1.227959394454956, |
|
"learning_rate": 9.812515108582622e-05, |
|
"loss": 1.3882, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.2645591614819797, |
|
"grad_norm": 1.2343266010284424, |
|
"learning_rate": 9.809317156549476e-05, |
|
"loss": 1.3132, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.26680118827419985, |
|
"grad_norm": 1.3055241107940674, |
|
"learning_rate": 9.806092690791962e-05, |
|
"loss": 1.3804, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.26904321506642004, |
|
"grad_norm": 1.1219717264175415, |
|
"learning_rate": 9.80284172908653e-05, |
|
"loss": 1.4393, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2712852418586402, |
|
"grad_norm": 1.1093581914901733, |
|
"learning_rate": 9.799564289355707e-05, |
|
"loss": 1.4185, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.27352726865086036, |
|
"grad_norm": 1.181302785873413, |
|
"learning_rate": 9.79626038966799e-05, |
|
"loss": 1.3762, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.27576929544308054, |
|
"grad_norm": 1.238273024559021, |
|
"learning_rate": 9.79293004823775e-05, |
|
"loss": 1.4161, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.27801132223530073, |
|
"grad_norm": 1.2279235124588013, |
|
"learning_rate": 9.789573283425126e-05, |
|
"loss": 1.3645, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.28025334902752086, |
|
"grad_norm": 1.0937743186950684, |
|
"learning_rate": 9.78619011373594e-05, |
|
"loss": 1.374, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.28249537581974105, |
|
"grad_norm": 1.0662868022918701, |
|
"learning_rate": 9.782780557821576e-05, |
|
"loss": 1.2941, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.28473740261196123, |
|
"grad_norm": 1.2285066843032837, |
|
"learning_rate": 9.779344634478884e-05, |
|
"loss": 1.3532, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.28697942940418136, |
|
"grad_norm": 1.336958646774292, |
|
"learning_rate": 9.775882362650083e-05, |
|
"loss": 1.3376, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.28922145619640155, |
|
"grad_norm": 1.1303439140319824, |
|
"learning_rate": 9.772393761422645e-05, |
|
"loss": 1.3311, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.29146348298862174, |
|
"grad_norm": 1.155773401260376, |
|
"learning_rate": 9.768878850029201e-05, |
|
"loss": 1.3207, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.29370550978084187, |
|
"grad_norm": 1.1738945245742798, |
|
"learning_rate": 9.765337647847429e-05, |
|
"loss": 1.3448, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.29594753657306205, |
|
"grad_norm": 1.1708767414093018, |
|
"learning_rate": 9.761770174399943e-05, |
|
"loss": 1.4237, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.29818956336528224, |
|
"grad_norm": 1.1160731315612793, |
|
"learning_rate": 9.758176449354194e-05, |
|
"loss": 1.3669, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.30043159015750237, |
|
"grad_norm": 1.2477370500564575, |
|
"learning_rate": 9.754556492522359e-05, |
|
"loss": 1.3638, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.30267361694972256, |
|
"grad_norm": 1.1834142208099365, |
|
"learning_rate": 9.750910323861228e-05, |
|
"loss": 1.3193, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.30491564374194274, |
|
"grad_norm": 1.1948530673980713, |
|
"learning_rate": 9.747237963472098e-05, |
|
"loss": 1.3663, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3071576705341629, |
|
"grad_norm": 1.172042965888977, |
|
"learning_rate": 9.743539431600661e-05, |
|
"loss": 1.3777, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.30939969732638306, |
|
"grad_norm": 1.276157259941101, |
|
"learning_rate": 9.739814748636891e-05, |
|
"loss": 1.3025, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3116417241186032, |
|
"grad_norm": 1.0595532655715942, |
|
"learning_rate": 9.736063935114934e-05, |
|
"loss": 1.3276, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.3138837509108234, |
|
"grad_norm": 1.1211802959442139, |
|
"learning_rate": 9.732287011712992e-05, |
|
"loss": 1.3408, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.31612577770304356, |
|
"grad_norm": 1.0752381086349487, |
|
"learning_rate": 9.72848399925321e-05, |
|
"loss": 1.3546, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.3183678044952637, |
|
"grad_norm": 1.1245768070220947, |
|
"learning_rate": 9.724654918701568e-05, |
|
"loss": 1.3702, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3206098312874839, |
|
"grad_norm": 1.1155140399932861, |
|
"learning_rate": 9.720799791167749e-05, |
|
"loss": 1.4169, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.32285185807970407, |
|
"grad_norm": 1.0726211071014404, |
|
"learning_rate": 9.716918637905041e-05, |
|
"loss": 1.3408, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3250938848719242, |
|
"grad_norm": 1.0829260349273682, |
|
"learning_rate": 9.713011480310208e-05, |
|
"loss": 1.3408, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3273359116641444, |
|
"grad_norm": 1.2374547719955444, |
|
"learning_rate": 9.709078339923377e-05, |
|
"loss": 1.4005, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.32957793845636457, |
|
"grad_norm": 1.0573582649230957, |
|
"learning_rate": 9.705119238427915e-05, |
|
"loss": 1.3704, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.3318199652485847, |
|
"grad_norm": 1.245229959487915, |
|
"learning_rate": 9.701134197650318e-05, |
|
"loss": 1.359, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3340619920408049, |
|
"grad_norm": 1.2916717529296875, |
|
"learning_rate": 9.697123239560081e-05, |
|
"loss": 1.3983, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.3363040188330251, |
|
"grad_norm": 1.0935052633285522, |
|
"learning_rate": 9.693086386269581e-05, |
|
"loss": 1.2974, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3385460456252452, |
|
"grad_norm": 1.2657032012939453, |
|
"learning_rate": 9.689023660033956e-05, |
|
"loss": 1.4309, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.3407880724174654, |
|
"grad_norm": 1.198128581047058, |
|
"learning_rate": 9.684935083250979e-05, |
|
"loss": 1.361, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3430300992096856, |
|
"grad_norm": 1.0504889488220215, |
|
"learning_rate": 9.680820678460941e-05, |
|
"loss": 1.3671, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.3452721260019057, |
|
"grad_norm": 0.9644368290901184, |
|
"learning_rate": 9.676680468346521e-05, |
|
"loss": 1.3223, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.3475141527941259, |
|
"grad_norm": 1.2863438129425049, |
|
"learning_rate": 9.672514475732659e-05, |
|
"loss": 1.2832, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.3497561795863461, |
|
"grad_norm": 1.2852818965911865, |
|
"learning_rate": 9.66832272358644e-05, |
|
"loss": 1.3586, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.3519982063785662, |
|
"grad_norm": 1.088563323020935, |
|
"learning_rate": 9.664105235016955e-05, |
|
"loss": 1.3518, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.3542402331707864, |
|
"grad_norm": 1.138024926185608, |
|
"learning_rate": 9.659862033275187e-05, |
|
"loss": 1.4139, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.35648225996300653, |
|
"grad_norm": 1.1250396966934204, |
|
"learning_rate": 9.655593141753865e-05, |
|
"loss": 1.3991, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.3587242867552267, |
|
"grad_norm": 1.3767707347869873, |
|
"learning_rate": 9.651298583987353e-05, |
|
"loss": 1.3365, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3609663135474469, |
|
"grad_norm": 1.0378060340881348, |
|
"learning_rate": 9.646978383651515e-05, |
|
"loss": 1.3198, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.36320834033966704, |
|
"grad_norm": 1.138748049736023, |
|
"learning_rate": 9.642632564563576e-05, |
|
"loss": 1.3889, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.3654503671318872, |
|
"grad_norm": 1.2029573917388916, |
|
"learning_rate": 9.638261150681998e-05, |
|
"loss": 1.3673, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.3676923939241074, |
|
"grad_norm": 1.0248106718063354, |
|
"learning_rate": 9.63386416610635e-05, |
|
"loss": 1.3291, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.36993442071632754, |
|
"grad_norm": 1.1774693727493286, |
|
"learning_rate": 9.62944163507717e-05, |
|
"loss": 1.3082, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.3721764475085477, |
|
"grad_norm": 1.075829029083252, |
|
"learning_rate": 9.624993581975833e-05, |
|
"loss": 1.3391, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.3744184743007679, |
|
"grad_norm": 1.2112139463424683, |
|
"learning_rate": 9.62052003132442e-05, |
|
"loss": 1.3169, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.37666050109298804, |
|
"grad_norm": 1.2244364023208618, |
|
"learning_rate": 9.616021007785576e-05, |
|
"loss": 1.3267, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.37890252788520823, |
|
"grad_norm": 1.0250012874603271, |
|
"learning_rate": 9.611496536162379e-05, |
|
"loss": 1.3162, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.3811445546774284, |
|
"grad_norm": 1.1248130798339844, |
|
"learning_rate": 9.606946641398203e-05, |
|
"loss": 1.3244, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.38338658146964855, |
|
"grad_norm": 1.1805521249771118, |
|
"learning_rate": 9.602371348576577e-05, |
|
"loss": 1.3474, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.38562860826186873, |
|
"grad_norm": 1.129887342453003, |
|
"learning_rate": 9.597770682921055e-05, |
|
"loss": 1.317, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3878706350540889, |
|
"grad_norm": 1.2311817407608032, |
|
"learning_rate": 9.593144669795066e-05, |
|
"loss": 1.3399, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.39011266184630905, |
|
"grad_norm": 1.1044973134994507, |
|
"learning_rate": 9.588493334701777e-05, |
|
"loss": 1.3829, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.39235468863852924, |
|
"grad_norm": 1.142473816871643, |
|
"learning_rate": 9.583816703283965e-05, |
|
"loss": 1.3408, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.3945967154307494, |
|
"grad_norm": 1.054578423500061, |
|
"learning_rate": 9.579114801323854e-05, |
|
"loss": 1.2964, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.39683874222296955, |
|
"grad_norm": 1.1482200622558594, |
|
"learning_rate": 9.574387654742992e-05, |
|
"loss": 1.3412, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.39908076901518974, |
|
"grad_norm": 1.0244128704071045, |
|
"learning_rate": 9.569635289602097e-05, |
|
"loss": 1.3605, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.40132279580740987, |
|
"grad_norm": 0.9474136233329773, |
|
"learning_rate": 9.564857732100916e-05, |
|
"loss": 1.3675, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.40356482259963006, |
|
"grad_norm": 1.0347918272018433, |
|
"learning_rate": 9.560055008578085e-05, |
|
"loss": 1.3696, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.40580684939185024, |
|
"grad_norm": 1.2021688222885132, |
|
"learning_rate": 9.555227145510977e-05, |
|
"loss": 1.32, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.4080488761840704, |
|
"grad_norm": 1.1408722400665283, |
|
"learning_rate": 9.550374169515557e-05, |
|
"loss": 1.3757, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.41029090297629056, |
|
"grad_norm": 1.015257716178894, |
|
"learning_rate": 9.545496107346244e-05, |
|
"loss": 1.3332, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.41253292976851075, |
|
"grad_norm": 1.5246819257736206, |
|
"learning_rate": 9.540592985895752e-05, |
|
"loss": 1.2848, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4147749565607309, |
|
"grad_norm": 1.1320191621780396, |
|
"learning_rate": 9.535664832194946e-05, |
|
"loss": 1.3324, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.41701698335295107, |
|
"grad_norm": 1.169104814529419, |
|
"learning_rate": 9.530711673412698e-05, |
|
"loss": 1.3697, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.41925901014517125, |
|
"grad_norm": 1.03293776512146, |
|
"learning_rate": 9.525733536855728e-05, |
|
"loss": 1.3582, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.4215010369373914, |
|
"grad_norm": 1.3983210325241089, |
|
"learning_rate": 9.520730449968461e-05, |
|
"loss": 1.3631, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.42374306372961157, |
|
"grad_norm": 1.2297945022583008, |
|
"learning_rate": 9.515702440332869e-05, |
|
"loss": 1.4169, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.42598509052183176, |
|
"grad_norm": 1.3570704460144043, |
|
"learning_rate": 9.510649535668332e-05, |
|
"loss": 1.3588, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4282271173140519, |
|
"grad_norm": 1.1815954446792603, |
|
"learning_rate": 9.505571763831468e-05, |
|
"loss": 1.364, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.4304691441062721, |
|
"grad_norm": 1.199096441268921, |
|
"learning_rate": 9.500469152815988e-05, |
|
"loss": 1.3813, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.43271117089849226, |
|
"grad_norm": 1.0751597881317139, |
|
"learning_rate": 9.495341730752543e-05, |
|
"loss": 1.3479, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.4349531976907124, |
|
"grad_norm": 1.121031641960144, |
|
"learning_rate": 9.490189525908569e-05, |
|
"loss": 1.2976, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.4371952244829326, |
|
"grad_norm": 1.0710008144378662, |
|
"learning_rate": 9.485012566688127e-05, |
|
"loss": 1.33, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.43943725127515276, |
|
"grad_norm": 1.1103382110595703, |
|
"learning_rate": 9.479810881631747e-05, |
|
"loss": 1.3194, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.4416792780673729, |
|
"grad_norm": 1.1765540838241577, |
|
"learning_rate": 9.474584499416275e-05, |
|
"loss": 1.4135, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.4439213048595931, |
|
"grad_norm": 1.1305935382843018, |
|
"learning_rate": 9.469333448854713e-05, |
|
"loss": 1.2884, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.44616333165181327, |
|
"grad_norm": 1.0487785339355469, |
|
"learning_rate": 9.464057758896055e-05, |
|
"loss": 1.3547, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.4484053584440334, |
|
"grad_norm": 1.071997046470642, |
|
"learning_rate": 9.458757458625138e-05, |
|
"loss": 1.3376, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4506473852362536, |
|
"grad_norm": 1.2403199672698975, |
|
"learning_rate": 9.453432577262471e-05, |
|
"loss": 1.3056, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.4528894120284737, |
|
"grad_norm": 1.2904599905014038, |
|
"learning_rate": 9.448083144164077e-05, |
|
"loss": 1.3357, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.4551314388206939, |
|
"grad_norm": 0.9735843539237976, |
|
"learning_rate": 9.442709188821337e-05, |
|
"loss": 1.3731, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.4573734656129141, |
|
"grad_norm": 1.131100058555603, |
|
"learning_rate": 9.437310740860822e-05, |
|
"loss": 1.3478, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4596154924051342, |
|
"grad_norm": 1.149906873703003, |
|
"learning_rate": 9.431887830044129e-05, |
|
"loss": 1.3439, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.4618575191973544, |
|
"grad_norm": 1.177563190460205, |
|
"learning_rate": 9.426440486267716e-05, |
|
"loss": 1.4656, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.4640995459895746, |
|
"grad_norm": 1.1288046836853027, |
|
"learning_rate": 9.420968739562744e-05, |
|
"loss": 1.4185, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.4663415727817947, |
|
"grad_norm": 1.2524133920669556, |
|
"learning_rate": 9.415472620094909e-05, |
|
"loss": 1.3401, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.4685835995740149, |
|
"grad_norm": 1.212417721748352, |
|
"learning_rate": 9.409952158164266e-05, |
|
"loss": 1.3573, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.4708256263662351, |
|
"grad_norm": 1.0959070920944214, |
|
"learning_rate": 9.404407384205078e-05, |
|
"loss": 1.2674, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.4730676531584552, |
|
"grad_norm": 1.1945031881332397, |
|
"learning_rate": 9.398838328785635e-05, |
|
"loss": 1.3511, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.4753096799506754, |
|
"grad_norm": 1.1044509410858154, |
|
"learning_rate": 9.393245022608091e-05, |
|
"loss": 1.3917, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.4775517067428956, |
|
"grad_norm": 1.4578787088394165, |
|
"learning_rate": 9.387627496508298e-05, |
|
"loss": 1.3883, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.47979373353511573, |
|
"grad_norm": 1.1177469491958618, |
|
"learning_rate": 9.381985781455625e-05, |
|
"loss": 1.3079, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.4820357603273359, |
|
"grad_norm": 1.0329993963241577, |
|
"learning_rate": 9.376319908552803e-05, |
|
"loss": 1.3693, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.4842777871195561, |
|
"grad_norm": 1.0311007499694824, |
|
"learning_rate": 9.37062990903574e-05, |
|
"loss": 1.2942, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.48651981391177623, |
|
"grad_norm": 1.036125659942627, |
|
"learning_rate": 9.364915814273351e-05, |
|
"loss": 1.3083, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.4887618407039964, |
|
"grad_norm": 1.1864365339279175, |
|
"learning_rate": 9.359177655767396e-05, |
|
"loss": 1.3344, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.4910038674962166, |
|
"grad_norm": 1.2126179933547974, |
|
"learning_rate": 9.353415465152293e-05, |
|
"loss": 1.3113, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.49324589428843674, |
|
"grad_norm": 1.0252037048339844, |
|
"learning_rate": 9.34762927419495e-05, |
|
"loss": 1.3527, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4954879210806569, |
|
"grad_norm": 1.058380126953125, |
|
"learning_rate": 9.341819114794584e-05, |
|
"loss": 1.327, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.49772994787287705, |
|
"grad_norm": 1.0073350667953491, |
|
"learning_rate": 9.335985018982559e-05, |
|
"loss": 1.3563, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.49997197466509724, |
|
"grad_norm": 1.0354520082473755, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 1.311, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.5022140014573174, |
|
"grad_norm": 1.4187575578689575, |
|
"learning_rate": 9.324245146908592e-05, |
|
"loss": 1.386, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5044560282495376, |
|
"grad_norm": 1.1989063024520874, |
|
"learning_rate": 9.318339435368464e-05, |
|
"loss": 1.3826, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.5066980550417578, |
|
"grad_norm": 1.1496927738189697, |
|
"learning_rate": 9.312409916859948e-05, |
|
"loss": 1.3464, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5089400818339779, |
|
"grad_norm": 1.0821688175201416, |
|
"learning_rate": 9.306456624072426e-05, |
|
"loss": 1.319, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.5111821086261981, |
|
"grad_norm": 0.9903674721717834, |
|
"learning_rate": 9.300479589826355e-05, |
|
"loss": 1.331, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5134241354184182, |
|
"grad_norm": 1.0320252180099487, |
|
"learning_rate": 9.294478847073069e-05, |
|
"loss": 1.3697, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.5156661622106384, |
|
"grad_norm": 1.0023198127746582, |
|
"learning_rate": 9.288454428894615e-05, |
|
"loss": 1.2954, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5179081890028586, |
|
"grad_norm": 0.9635931253433228, |
|
"learning_rate": 9.282406368503556e-05, |
|
"loss": 1.3488, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.5201502157950787, |
|
"grad_norm": 1.0810673236846924, |
|
"learning_rate": 9.276334699242799e-05, |
|
"loss": 1.3507, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5223922425872989, |
|
"grad_norm": 1.1213270425796509, |
|
"learning_rate": 9.270239454585404e-05, |
|
"loss": 1.3535, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.5246342693795191, |
|
"grad_norm": 1.1882630586624146, |
|
"learning_rate": 9.264120668134405e-05, |
|
"loss": 1.3221, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5268762961717393, |
|
"grad_norm": 1.1680420637130737, |
|
"learning_rate": 9.257978373622615e-05, |
|
"loss": 1.3585, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.5291183229639594, |
|
"grad_norm": 1.063761591911316, |
|
"learning_rate": 9.251812604912453e-05, |
|
"loss": 1.3171, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5313603497561796, |
|
"grad_norm": 1.2708847522735596, |
|
"learning_rate": 9.245623395995751e-05, |
|
"loss": 1.3829, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.5336023765483997, |
|
"grad_norm": 1.1421536207199097, |
|
"learning_rate": 9.239410780993564e-05, |
|
"loss": 1.3211, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5358444033406199, |
|
"grad_norm": 1.2646090984344482, |
|
"learning_rate": 9.233174794155985e-05, |
|
"loss": 1.3228, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.5380864301328401, |
|
"grad_norm": 1.1613190174102783, |
|
"learning_rate": 9.226915469861956e-05, |
|
"loss": 1.3229, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5403284569250603, |
|
"grad_norm": 1.0214089155197144, |
|
"learning_rate": 9.220632842619079e-05, |
|
"loss": 1.3952, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.5425704837172804, |
|
"grad_norm": 1.172778844833374, |
|
"learning_rate": 9.214326947063423e-05, |
|
"loss": 1.3208, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5448125105095006, |
|
"grad_norm": 1.251479983329773, |
|
"learning_rate": 9.207997817959338e-05, |
|
"loss": 1.3232, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.5470545373017207, |
|
"grad_norm": 1.2302333116531372, |
|
"learning_rate": 9.201645490199256e-05, |
|
"loss": 1.2792, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5492965640939409, |
|
"grad_norm": 1.0342446565628052, |
|
"learning_rate": 9.195269998803507e-05, |
|
"loss": 1.3181, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.5515385908861611, |
|
"grad_norm": 0.9909287095069885, |
|
"learning_rate": 9.188871378920122e-05, |
|
"loss": 1.339, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5537806176783813, |
|
"grad_norm": 1.1493330001831055, |
|
"learning_rate": 9.182449665824636e-05, |
|
"loss": 1.3659, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.5560226444706015, |
|
"grad_norm": 1.094141960144043, |
|
"learning_rate": 9.1760048949199e-05, |
|
"loss": 1.3464, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5582646712628216, |
|
"grad_norm": 1.0574826002120972, |
|
"learning_rate": 9.169537101735879e-05, |
|
"loss": 1.2936, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.5605066980550417, |
|
"grad_norm": 0.9537421464920044, |
|
"learning_rate": 9.163046321929462e-05, |
|
"loss": 1.2573, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5627487248472619, |
|
"grad_norm": 1.3234580755233765, |
|
"learning_rate": 9.156532591284263e-05, |
|
"loss": 1.3271, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.5649907516394821, |
|
"grad_norm": 1.078527808189392, |
|
"learning_rate": 9.149995945710423e-05, |
|
"loss": 1.3126, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.5672327784317023, |
|
"grad_norm": 1.2018640041351318, |
|
"learning_rate": 9.143436421244416e-05, |
|
"loss": 1.3642, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.5694748052239225, |
|
"grad_norm": 1.0188864469528198, |
|
"learning_rate": 9.136854054048838e-05, |
|
"loss": 1.2751, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.5717168320161425, |
|
"grad_norm": 1.2628931999206543, |
|
"learning_rate": 9.130248880412229e-05, |
|
"loss": 1.364, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.5739588588083627, |
|
"grad_norm": 1.1243770122528076, |
|
"learning_rate": 9.123620936748853e-05, |
|
"loss": 1.3668, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.5762008856005829, |
|
"grad_norm": 1.1701164245605469, |
|
"learning_rate": 9.116970259598505e-05, |
|
"loss": 1.3434, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.5784429123928031, |
|
"grad_norm": 1.0601651668548584, |
|
"learning_rate": 9.110296885626314e-05, |
|
"loss": 1.2645, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5806849391850233, |
|
"grad_norm": 1.2184094190597534, |
|
"learning_rate": 9.103600851622531e-05, |
|
"loss": 1.3468, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.5829269659772435, |
|
"grad_norm": 1.5354876518249512, |
|
"learning_rate": 9.096882194502337e-05, |
|
"loss": 1.4595, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5851689927694635, |
|
"grad_norm": 1.0867820978164673, |
|
"learning_rate": 9.09014095130563e-05, |
|
"loss": 1.385, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.5874110195616837, |
|
"grad_norm": 1.2308603525161743, |
|
"learning_rate": 9.083377159196825e-05, |
|
"loss": 1.3503, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.5896530463539039, |
|
"grad_norm": 1.2136027812957764, |
|
"learning_rate": 9.07659085546465e-05, |
|
"loss": 1.3986, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.5918950731461241, |
|
"grad_norm": 0.9775259494781494, |
|
"learning_rate": 9.069782077521943e-05, |
|
"loss": 1.4075, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.5941370999383443, |
|
"grad_norm": 1.0732626914978027, |
|
"learning_rate": 9.062950862905432e-05, |
|
"loss": 1.3594, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.5963791267305645, |
|
"grad_norm": 0.9587686061859131, |
|
"learning_rate": 9.056097249275553e-05, |
|
"loss": 1.3741, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.5986211535227846, |
|
"grad_norm": 0.9384256601333618, |
|
"learning_rate": 9.049221274416213e-05, |
|
"loss": 1.3553, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.6008631803150047, |
|
"grad_norm": 1.0991201400756836, |
|
"learning_rate": 9.042322976234606e-05, |
|
"loss": 1.3533, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6031052071072249, |
|
"grad_norm": 1.215849757194519, |
|
"learning_rate": 9.035402392760988e-05, |
|
"loss": 1.3747, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.6053472338994451, |
|
"grad_norm": 1.416882872581482, |
|
"learning_rate": 9.02845956214848e-05, |
|
"loss": 1.3554, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6075892606916653, |
|
"grad_norm": 1.1282700300216675, |
|
"learning_rate": 9.021494522672845e-05, |
|
"loss": 1.3741, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.6098312874838855, |
|
"grad_norm": 0.9101713299751282, |
|
"learning_rate": 9.014507312732285e-05, |
|
"loss": 1.3012, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6120733142761056, |
|
"grad_norm": 1.0538674592971802, |
|
"learning_rate": 9.007497970847234e-05, |
|
"loss": 1.3273, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.6143153410683257, |
|
"grad_norm": 1.3435333967208862, |
|
"learning_rate": 9.000466535660129e-05, |
|
"loss": 1.4025, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6165573678605459, |
|
"grad_norm": 1.0372601747512817, |
|
"learning_rate": 8.993413045935215e-05, |
|
"loss": 1.3212, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.6187993946527661, |
|
"grad_norm": 1.0866960287094116, |
|
"learning_rate": 8.986337540558318e-05, |
|
"loss": 1.3799, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6210414214449863, |
|
"grad_norm": 1.0121322870254517, |
|
"learning_rate": 8.97924005853664e-05, |
|
"loss": 1.3493, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.6232834482372064, |
|
"grad_norm": 1.071612000465393, |
|
"learning_rate": 8.972120638998539e-05, |
|
"loss": 1.3564, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6255254750294266, |
|
"grad_norm": 1.103440523147583, |
|
"learning_rate": 8.964979321193314e-05, |
|
"loss": 1.2915, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.6277675018216468, |
|
"grad_norm": 0.9943517446517944, |
|
"learning_rate": 8.957816144490989e-05, |
|
"loss": 1.3424, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6300095286138669, |
|
"grad_norm": 1.0576980113983154, |
|
"learning_rate": 8.950631148382095e-05, |
|
"loss": 1.3101, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.6322515554060871, |
|
"grad_norm": 1.0375151634216309, |
|
"learning_rate": 8.943424372477455e-05, |
|
"loss": 1.3308, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.6344935821983073, |
|
"grad_norm": 1.1026891469955444, |
|
"learning_rate": 8.936195856507962e-05, |
|
"loss": 1.3229, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.6367356089905274, |
|
"grad_norm": 1.1137725114822388, |
|
"learning_rate": 8.928945640324364e-05, |
|
"loss": 1.2864, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.6389776357827476, |
|
"grad_norm": 1.0674328804016113, |
|
"learning_rate": 8.921673763897041e-05, |
|
"loss": 1.339, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.6412196625749678, |
|
"grad_norm": 1.1279280185699463, |
|
"learning_rate": 8.914380267315782e-05, |
|
"loss": 1.3516, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.643461689367188, |
|
"grad_norm": 0.9942423105239868, |
|
"learning_rate": 8.907065190789577e-05, |
|
"loss": 1.3102, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.6457037161594081, |
|
"grad_norm": 1.1335337162017822, |
|
"learning_rate": 8.899728574646376e-05, |
|
"loss": 1.304, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.6479457429516283, |
|
"grad_norm": 1.0654945373535156, |
|
"learning_rate": 8.892370459332883e-05, |
|
"loss": 1.273, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.6501877697438484, |
|
"grad_norm": 1.0929509401321411, |
|
"learning_rate": 8.884990885414326e-05, |
|
"loss": 1.3298, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6524297965360686, |
|
"grad_norm": 1.157837986946106, |
|
"learning_rate": 8.87758989357423e-05, |
|
"loss": 1.3395, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.6546718233282888, |
|
"grad_norm": 1.1370052099227905, |
|
"learning_rate": 8.8701675246142e-05, |
|
"loss": 1.3823, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.656913850120509, |
|
"grad_norm": 1.096897840499878, |
|
"learning_rate": 8.862723819453696e-05, |
|
"loss": 1.2579, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.6591558769127291, |
|
"grad_norm": 1.028351902961731, |
|
"learning_rate": 8.855258819129796e-05, |
|
"loss": 1.323, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.6613979037049492, |
|
"grad_norm": 1.2492655515670776, |
|
"learning_rate": 8.847772564796987e-05, |
|
"loss": 1.3316, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.6636399304971694, |
|
"grad_norm": 1.1350480318069458, |
|
"learning_rate": 8.840265097726923e-05, |
|
"loss": 1.3331, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.6658819572893896, |
|
"grad_norm": 1.057501196861267, |
|
"learning_rate": 8.832736459308207e-05, |
|
"loss": 1.3092, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.6681239840816098, |
|
"grad_norm": 0.9846299290657043, |
|
"learning_rate": 8.825186691046157e-05, |
|
"loss": 1.3709, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.67036601087383, |
|
"grad_norm": 1.2653725147247314, |
|
"learning_rate": 8.817615834562583e-05, |
|
"loss": 1.368, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.6726080376660502, |
|
"grad_norm": 0.9622915387153625, |
|
"learning_rate": 8.81002393159555e-05, |
|
"loss": 1.3952, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6748500644582702, |
|
"grad_norm": 1.1680620908737183, |
|
"learning_rate": 8.802411023999153e-05, |
|
"loss": 1.378, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.6770920912504904, |
|
"grad_norm": 1.2336018085479736, |
|
"learning_rate": 8.79477715374329e-05, |
|
"loss": 1.3017, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.6793341180427106, |
|
"grad_norm": 1.0431910753250122, |
|
"learning_rate": 8.78712236291342e-05, |
|
"loss": 1.2801, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.6815761448349308, |
|
"grad_norm": 0.9812450408935547, |
|
"learning_rate": 8.779446693710341e-05, |
|
"loss": 1.4084, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.683818171627151, |
|
"grad_norm": 0.9624593257904053, |
|
"learning_rate": 8.771750188449951e-05, |
|
"loss": 1.324, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.6860601984193712, |
|
"grad_norm": 0.9403428435325623, |
|
"learning_rate": 8.764032889563017e-05, |
|
"loss": 1.3739, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.6883022252115912, |
|
"grad_norm": 1.0417587757110596, |
|
"learning_rate": 8.756294839594943e-05, |
|
"loss": 1.2942, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.6905442520038114, |
|
"grad_norm": 1.1011159420013428, |
|
"learning_rate": 8.74853608120553e-05, |
|
"loss": 1.391, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.6927862787960316, |
|
"grad_norm": 1.0298092365264893, |
|
"learning_rate": 8.74075665716875e-05, |
|
"loss": 1.2973, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.6950283055882518, |
|
"grad_norm": 1.156357765197754, |
|
"learning_rate": 8.732956610372499e-05, |
|
"loss": 1.2932, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.697270332380472, |
|
"grad_norm": 0.9823068976402283, |
|
"learning_rate": 8.725135983818369e-05, |
|
"loss": 1.3696, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.6995123591726922, |
|
"grad_norm": 1.0234986543655396, |
|
"learning_rate": 8.717294820621407e-05, |
|
"loss": 1.3504, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 1.3021448850631714, |
|
"learning_rate": 8.70943316400988e-05, |
|
"loss": 1.3624, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.7039964127571324, |
|
"grad_norm": 1.166528582572937, |
|
"learning_rate": 8.70155105732503e-05, |
|
"loss": 1.3469, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.7062384395493526, |
|
"grad_norm": 1.2379478216171265, |
|
"learning_rate": 8.693648544020847e-05, |
|
"loss": 1.3586, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.7084804663415728, |
|
"grad_norm": 0.9946653842926025, |
|
"learning_rate": 8.68572566766382e-05, |
|
"loss": 1.3349, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.710722493133793, |
|
"grad_norm": 1.184866189956665, |
|
"learning_rate": 8.677782471932696e-05, |
|
"loss": 1.2896, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.7129645199260131, |
|
"grad_norm": 1.2160494327545166, |
|
"learning_rate": 8.669819000618246e-05, |
|
"loss": 1.3714, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.7152065467182332, |
|
"grad_norm": 1.096117615699768, |
|
"learning_rate": 8.66183529762302e-05, |
|
"loss": 1.3556, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.7174485735104534, |
|
"grad_norm": 0.9968474507331848, |
|
"learning_rate": 8.653831406961105e-05, |
|
"loss": 1.3476, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7196906003026736, |
|
"grad_norm": 1.093274474143982, |
|
"learning_rate": 8.64580737275788e-05, |
|
"loss": 1.3187, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.7219326270948938, |
|
"grad_norm": 1.1728419065475464, |
|
"learning_rate": 8.637763239249777e-05, |
|
"loss": 1.3481, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.724174653887114, |
|
"grad_norm": 1.1466108560562134, |
|
"learning_rate": 8.629699050784038e-05, |
|
"loss": 1.3226, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.7264166806793341, |
|
"grad_norm": 1.0177853107452393, |
|
"learning_rate": 8.621614851818461e-05, |
|
"loss": 1.3065, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7286587074715543, |
|
"grad_norm": 0.9964995384216309, |
|
"learning_rate": 8.61351068692117e-05, |
|
"loss": 1.3096, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.7309007342637744, |
|
"grad_norm": 0.9439290165901184, |
|
"learning_rate": 8.605386600770353e-05, |
|
"loss": 1.2842, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.7331427610559946, |
|
"grad_norm": 1.1577221155166626, |
|
"learning_rate": 8.59724263815403e-05, |
|
"loss": 1.3666, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.7353847878482148, |
|
"grad_norm": 1.0668253898620605, |
|
"learning_rate": 8.589078843969796e-05, |
|
"loss": 1.3084, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.737626814640435, |
|
"grad_norm": 1.0648199319839478, |
|
"learning_rate": 8.580895263224578e-05, |
|
"loss": 1.3817, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.7398688414326551, |
|
"grad_norm": 1.081084132194519, |
|
"learning_rate": 8.572691941034389e-05, |
|
"loss": 1.2961, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7421108682248753, |
|
"grad_norm": 0.9493741393089294, |
|
"learning_rate": 8.564468922624073e-05, |
|
"loss": 1.3692, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.7443528950170955, |
|
"grad_norm": 1.156214952468872, |
|
"learning_rate": 8.556226253327059e-05, |
|
"loss": 1.3368, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.7465949218093156, |
|
"grad_norm": 1.098140001296997, |
|
"learning_rate": 8.547963978585114e-05, |
|
"loss": 1.3045, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.7488369486015358, |
|
"grad_norm": 1.1900348663330078, |
|
"learning_rate": 8.539682143948087e-05, |
|
"loss": 1.3388, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.751078975393756, |
|
"grad_norm": 1.0908799171447754, |
|
"learning_rate": 8.531380795073662e-05, |
|
"loss": 1.2893, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.7533210021859761, |
|
"grad_norm": 1.1332789659500122, |
|
"learning_rate": 8.523059977727103e-05, |
|
"loss": 1.278, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.7555630289781963, |
|
"grad_norm": 1.1658406257629395, |
|
"learning_rate": 8.514719737781008e-05, |
|
"loss": 1.38, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.7578050557704165, |
|
"grad_norm": 1.1062614917755127, |
|
"learning_rate": 8.506360121215045e-05, |
|
"loss": 1.2967, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.7600470825626366, |
|
"grad_norm": 1.1336619853973389, |
|
"learning_rate": 8.497981174115712e-05, |
|
"loss": 1.3339, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.7622891093548568, |
|
"grad_norm": 0.9592335820198059, |
|
"learning_rate": 8.48958294267607e-05, |
|
"loss": 1.3373, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7645311361470769, |
|
"grad_norm": 1.2497416734695435, |
|
"learning_rate": 8.4811654731955e-05, |
|
"loss": 1.3679, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.7667731629392971, |
|
"grad_norm": 1.078972578048706, |
|
"learning_rate": 8.472728812079436e-05, |
|
"loss": 1.3833, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.7690151897315173, |
|
"grad_norm": 1.0341068506240845, |
|
"learning_rate": 8.464273005839119e-05, |
|
"loss": 1.304, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.7712572165237375, |
|
"grad_norm": 0.9276494383811951, |
|
"learning_rate": 8.455798101091338e-05, |
|
"loss": 1.3569, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.7734992433159577, |
|
"grad_norm": 1.232210397720337, |
|
"learning_rate": 8.447304144558171e-05, |
|
"loss": 1.3199, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.7757412701081778, |
|
"grad_norm": 1.031119704246521, |
|
"learning_rate": 8.438791183066728e-05, |
|
"loss": 1.3693, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.7779832969003979, |
|
"grad_norm": 0.9429606795310974, |
|
"learning_rate": 8.43025926354889e-05, |
|
"loss": 1.3712, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.7802253236926181, |
|
"grad_norm": 1.0232348442077637, |
|
"learning_rate": 8.421708433041058e-05, |
|
"loss": 1.2815, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.7824673504848383, |
|
"grad_norm": 1.1679573059082031, |
|
"learning_rate": 8.413138738683887e-05, |
|
"loss": 1.2576, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.7847093772770585, |
|
"grad_norm": 1.3956390619277954, |
|
"learning_rate": 8.40455022772203e-05, |
|
"loss": 1.3678, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7869514040692787, |
|
"grad_norm": 1.1722822189331055, |
|
"learning_rate": 8.395942947503874e-05, |
|
"loss": 1.2261, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.7891934308614988, |
|
"grad_norm": 1.1038949489593506, |
|
"learning_rate": 8.38731694548128e-05, |
|
"loss": 1.3066, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.7914354576537189, |
|
"grad_norm": 1.0882680416107178, |
|
"learning_rate": 8.378672269209326e-05, |
|
"loss": 1.388, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.7936774844459391, |
|
"grad_norm": 0.9000134468078613, |
|
"learning_rate": 8.370008966346037e-05, |
|
"loss": 1.3099, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.7959195112381593, |
|
"grad_norm": 0.993665874004364, |
|
"learning_rate": 8.361327084652126e-05, |
|
"loss": 1.2892, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.7981615380303795, |
|
"grad_norm": 1.091774344444275, |
|
"learning_rate": 8.352626671990735e-05, |
|
"loss": 1.3601, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.8004035648225997, |
|
"grad_norm": 1.1141952276229858, |
|
"learning_rate": 8.343907776327164e-05, |
|
"loss": 1.3546, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.8026455916148197, |
|
"grad_norm": 0.9900937676429749, |
|
"learning_rate": 8.335170445728608e-05, |
|
"loss": 1.3254, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.8048876184070399, |
|
"grad_norm": 0.959354817867279, |
|
"learning_rate": 8.326414728363899e-05, |
|
"loss": 1.3446, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.8071296451992601, |
|
"grad_norm": 1.1290162801742554, |
|
"learning_rate": 8.317640672503231e-05, |
|
"loss": 1.3338, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8093716719914803, |
|
"grad_norm": 0.9364314675331116, |
|
"learning_rate": 8.308848326517897e-05, |
|
"loss": 1.2879, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.8116136987837005, |
|
"grad_norm": 1.0674771070480347, |
|
"learning_rate": 8.300037738880029e-05, |
|
"loss": 1.3129, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.8138557255759207, |
|
"grad_norm": 1.0436745882034302, |
|
"learning_rate": 8.291208958162317e-05, |
|
"loss": 1.3547, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.8160977523681407, |
|
"grad_norm": 1.097528100013733, |
|
"learning_rate": 8.282362033037758e-05, |
|
"loss": 1.3301, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.8183397791603609, |
|
"grad_norm": 1.0497652292251587, |
|
"learning_rate": 8.273497012279371e-05, |
|
"loss": 1.2466, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.8205818059525811, |
|
"grad_norm": 1.011123776435852, |
|
"learning_rate": 8.264613944759943e-05, |
|
"loss": 1.3085, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.8228238327448013, |
|
"grad_norm": 1.0443741083145142, |
|
"learning_rate": 8.255712879451747e-05, |
|
"loss": 1.281, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.8250658595370215, |
|
"grad_norm": 1.2140185832977295, |
|
"learning_rate": 8.246793865426279e-05, |
|
"loss": 1.3612, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.8273078863292417, |
|
"grad_norm": 1.128836989402771, |
|
"learning_rate": 8.237856951853989e-05, |
|
"loss": 1.322, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.8295499131214618, |
|
"grad_norm": 1.0461573600769043, |
|
"learning_rate": 8.228902188004004e-05, |
|
"loss": 1.2147, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8317919399136819, |
|
"grad_norm": 1.025303602218628, |
|
"learning_rate": 8.219929623243862e-05, |
|
"loss": 1.3644, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.8340339667059021, |
|
"grad_norm": 1.1245356798171997, |
|
"learning_rate": 8.210939307039234e-05, |
|
"loss": 1.2791, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.8362759934981223, |
|
"grad_norm": 1.0641727447509766, |
|
"learning_rate": 8.201931288953657e-05, |
|
"loss": 1.3585, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.8385180202903425, |
|
"grad_norm": 1.0719192028045654, |
|
"learning_rate": 8.19290561864826e-05, |
|
"loss": 1.3353, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.8407600470825627, |
|
"grad_norm": 1.0135860443115234, |
|
"learning_rate": 8.183862345881483e-05, |
|
"loss": 1.3111, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.8430020738747828, |
|
"grad_norm": 1.0956032276153564, |
|
"learning_rate": 8.174801520508813e-05, |
|
"loss": 1.3599, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.845244100667003, |
|
"grad_norm": 1.2083892822265625, |
|
"learning_rate": 8.165723192482502e-05, |
|
"loss": 1.2641, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.8474861274592231, |
|
"grad_norm": 1.0608189105987549, |
|
"learning_rate": 8.156627411851295e-05, |
|
"loss": 1.3246, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.8497281542514433, |
|
"grad_norm": 1.099736213684082, |
|
"learning_rate": 8.147514228760153e-05, |
|
"loss": 1.294, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.8519701810436635, |
|
"grad_norm": 1.0537753105163574, |
|
"learning_rate": 8.138383693449978e-05, |
|
"loss": 1.3138, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8542122078358836, |
|
"grad_norm": 1.1678063869476318, |
|
"learning_rate": 8.12923585625733e-05, |
|
"loss": 1.3333, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.8564542346281038, |
|
"grad_norm": 1.0176467895507812, |
|
"learning_rate": 8.120070767614161e-05, |
|
"loss": 1.2475, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.858696261420324, |
|
"grad_norm": 1.0608762502670288, |
|
"learning_rate": 8.110888478047523e-05, |
|
"loss": 1.3255, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.8609382882125441, |
|
"grad_norm": 0.9746761918067932, |
|
"learning_rate": 8.101689038179299e-05, |
|
"loss": 1.2848, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.8631803150047643, |
|
"grad_norm": 1.1493169069290161, |
|
"learning_rate": 8.092472498725927e-05, |
|
"loss": 1.3407, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.8654223417969845, |
|
"grad_norm": 1.04042649269104, |
|
"learning_rate": 8.083238910498108e-05, |
|
"loss": 1.3759, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.8676643685892046, |
|
"grad_norm": 1.1784476041793823, |
|
"learning_rate": 8.073988324400535e-05, |
|
"loss": 1.3276, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.8699063953814248, |
|
"grad_norm": 1.0766850709915161, |
|
"learning_rate": 8.064720791431608e-05, |
|
"loss": 1.4061, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.872148422173645, |
|
"grad_norm": 1.1751985549926758, |
|
"learning_rate": 8.055436362683158e-05, |
|
"loss": 1.3455, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.8743904489658652, |
|
"grad_norm": 1.0339034795761108, |
|
"learning_rate": 8.046135089340164e-05, |
|
"loss": 1.3087, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.8766324757580853, |
|
"grad_norm": 1.1246895790100098, |
|
"learning_rate": 8.036817022680466e-05, |
|
"loss": 1.2804, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.8788745025503055, |
|
"grad_norm": 0.9990755319595337, |
|
"learning_rate": 8.027482214074482e-05, |
|
"loss": 1.3058, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.8811165293425256, |
|
"grad_norm": 1.0636389255523682, |
|
"learning_rate": 8.018130714984933e-05, |
|
"loss": 1.3505, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.8833585561347458, |
|
"grad_norm": 1.2202845811843872, |
|
"learning_rate": 8.008762576966557e-05, |
|
"loss": 1.3404, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.885600582926966, |
|
"grad_norm": 1.0653436183929443, |
|
"learning_rate": 7.999377851665817e-05, |
|
"loss": 1.3974, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.8878426097191862, |
|
"grad_norm": 1.3170489072799683, |
|
"learning_rate": 7.989976590820623e-05, |
|
"loss": 1.314, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.8900846365114063, |
|
"grad_norm": 1.0469021797180176, |
|
"learning_rate": 7.980558846260044e-05, |
|
"loss": 1.3115, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.8923266633036265, |
|
"grad_norm": 1.0016125440597534, |
|
"learning_rate": 7.971124669904029e-05, |
|
"loss": 1.2834, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.8945686900958466, |
|
"grad_norm": 1.1542069911956787, |
|
"learning_rate": 7.961674113763109e-05, |
|
"loss": 1.2743, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.8968107168880668, |
|
"grad_norm": 1.0665364265441895, |
|
"learning_rate": 7.952207229938119e-05, |
|
"loss": 1.3778, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.899052743680287, |
|
"grad_norm": 1.06927490234375, |
|
"learning_rate": 7.942724070619911e-05, |
|
"loss": 1.3158, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.9012947704725072, |
|
"grad_norm": 1.1074497699737549, |
|
"learning_rate": 7.933224688089059e-05, |
|
"loss": 1.3796, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.9035367972647274, |
|
"grad_norm": 0.9936386942863464, |
|
"learning_rate": 7.923709134715577e-05, |
|
"loss": 1.3099, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.9057788240569474, |
|
"grad_norm": 1.0144227743148804, |
|
"learning_rate": 7.914177462958631e-05, |
|
"loss": 1.3097, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.9080208508491676, |
|
"grad_norm": 1.1205965280532837, |
|
"learning_rate": 7.904629725366247e-05, |
|
"loss": 1.3218, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.9102628776413878, |
|
"grad_norm": 1.0762195587158203, |
|
"learning_rate": 7.895065974575017e-05, |
|
"loss": 1.3102, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.912504904433608, |
|
"grad_norm": 1.1134177446365356, |
|
"learning_rate": 7.885486263309823e-05, |
|
"loss": 1.2953, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.9147469312258282, |
|
"grad_norm": 1.171975016593933, |
|
"learning_rate": 7.875890644383525e-05, |
|
"loss": 1.3812, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.9169889580180484, |
|
"grad_norm": 1.035203456878662, |
|
"learning_rate": 7.866279170696693e-05, |
|
"loss": 1.3105, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.9192309848102684, |
|
"grad_norm": 0.9938043355941772, |
|
"learning_rate": 7.856651895237297e-05, |
|
"loss": 1.2807, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.9214730116024886, |
|
"grad_norm": 1.012306571006775, |
|
"learning_rate": 7.847008871080423e-05, |
|
"loss": 1.2452, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.9237150383947088, |
|
"grad_norm": 1.160154938697815, |
|
"learning_rate": 7.837350151387985e-05, |
|
"loss": 1.3946, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.925957065186929, |
|
"grad_norm": 1.1950114965438843, |
|
"learning_rate": 7.827675789408417e-05, |
|
"loss": 1.3793, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.9281990919791492, |
|
"grad_norm": 0.9952568411827087, |
|
"learning_rate": 7.817985838476398e-05, |
|
"loss": 1.3438, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.9304411187713694, |
|
"grad_norm": 0.9820153713226318, |
|
"learning_rate": 7.808280352012544e-05, |
|
"loss": 1.2817, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.9326831455635894, |
|
"grad_norm": 1.062547206878662, |
|
"learning_rate": 7.798559383523116e-05, |
|
"loss": 1.2524, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.9349251723558096, |
|
"grad_norm": 0.9531433582305908, |
|
"learning_rate": 7.788822986599733e-05, |
|
"loss": 1.3326, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.9371671991480298, |
|
"grad_norm": 0.9412059783935547, |
|
"learning_rate": 7.779071214919066e-05, |
|
"loss": 1.3434, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.93940922594025, |
|
"grad_norm": 1.337913990020752, |
|
"learning_rate": 7.769304122242551e-05, |
|
"loss": 1.3211, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.9416512527324702, |
|
"grad_norm": 0.9646030068397522, |
|
"learning_rate": 7.759521762416084e-05, |
|
"loss": 1.2644, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9438932795246903, |
|
"grad_norm": 1.146712303161621, |
|
"learning_rate": 7.749724189369735e-05, |
|
"loss": 1.3066, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.9461353063169105, |
|
"grad_norm": 0.9840266704559326, |
|
"learning_rate": 7.739911457117437e-05, |
|
"loss": 1.337, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.9483773331091306, |
|
"grad_norm": 1.027145504951477, |
|
"learning_rate": 7.730083619756698e-05, |
|
"loss": 1.3583, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.9506193599013508, |
|
"grad_norm": 0.9447183609008789, |
|
"learning_rate": 7.720240731468306e-05, |
|
"loss": 1.2966, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.952861386693571, |
|
"grad_norm": 0.9172132015228271, |
|
"learning_rate": 7.710382846516017e-05, |
|
"loss": 1.324, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.9551034134857912, |
|
"grad_norm": 1.004164218902588, |
|
"learning_rate": 7.700510019246266e-05, |
|
"loss": 1.3354, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.9573454402780113, |
|
"grad_norm": 1.1161928176879883, |
|
"learning_rate": 7.690622304087865e-05, |
|
"loss": 1.2743, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.9595874670702315, |
|
"grad_norm": 1.2197874784469604, |
|
"learning_rate": 7.680719755551707e-05, |
|
"loss": 1.2924, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.9618294938624516, |
|
"grad_norm": 1.1961028575897217, |
|
"learning_rate": 7.670802428230452e-05, |
|
"loss": 1.3233, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.9640715206546718, |
|
"grad_norm": 1.09461510181427, |
|
"learning_rate": 7.660870376798244e-05, |
|
"loss": 1.3149, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.966313547446892, |
|
"grad_norm": 1.1680549383163452, |
|
"learning_rate": 7.650923656010398e-05, |
|
"loss": 1.3106, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.9685555742391122, |
|
"grad_norm": 1.0645558834075928, |
|
"learning_rate": 7.6409623207031e-05, |
|
"loss": 1.2427, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.9707976010313323, |
|
"grad_norm": 1.3543119430541992, |
|
"learning_rate": 7.630986425793105e-05, |
|
"loss": 1.257, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.9730396278235525, |
|
"grad_norm": 0.9309380650520325, |
|
"learning_rate": 7.620996026277438e-05, |
|
"loss": 1.3291, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.9752816546157727, |
|
"grad_norm": 1.0483700037002563, |
|
"learning_rate": 7.610991177233085e-05, |
|
"loss": 1.3066, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.9775236814079928, |
|
"grad_norm": 1.029461145401001, |
|
"learning_rate": 7.600971933816695e-05, |
|
"loss": 1.3153, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.979765708200213, |
|
"grad_norm": 1.025608777999878, |
|
"learning_rate": 7.590938351264269e-05, |
|
"loss": 1.2595, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.9820077349924332, |
|
"grad_norm": 1.1784470081329346, |
|
"learning_rate": 7.580890484890864e-05, |
|
"loss": 1.3677, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.9842497617846533, |
|
"grad_norm": 1.0288585424423218, |
|
"learning_rate": 7.570828390090279e-05, |
|
"loss": 1.2931, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.9864917885768735, |
|
"grad_norm": 0.9635973572731018, |
|
"learning_rate": 7.560752122334757e-05, |
|
"loss": 1.2542, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9887338153690937, |
|
"grad_norm": 1.0460883378982544, |
|
"learning_rate": 7.55066173717468e-05, |
|
"loss": 1.2744, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.9909758421613138, |
|
"grad_norm": 1.0957541465759277, |
|
"learning_rate": 7.54055729023825e-05, |
|
"loss": 1.3375, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.993217868953534, |
|
"grad_norm": 1.203940510749817, |
|
"learning_rate": 7.5304388372312e-05, |
|
"loss": 1.363, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.9954598957457541, |
|
"grad_norm": 1.2144309282302856, |
|
"learning_rate": 7.520306433936473e-05, |
|
"loss": 1.3041, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.9977019225379743, |
|
"grad_norm": 1.1100728511810303, |
|
"learning_rate": 7.510160136213921e-05, |
|
"loss": 1.2448, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.9999439493301945, |
|
"grad_norm": 1.0487066507339478, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.2796, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.0021859761224146, |
|
"grad_norm": 0.9101243615150452, |
|
"learning_rate": 7.489826081307452e-05, |
|
"loss": 1.2459, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 1.0044280029146349, |
|
"grad_norm": 0.9735124707221985, |
|
"learning_rate": 7.479638436225003e-05, |
|
"loss": 1.271, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.006670029706855, |
|
"grad_norm": 1.0015895366668701, |
|
"learning_rate": 7.469437120917054e-05, |
|
"loss": 1.331, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 1.0089120564990752, |
|
"grad_norm": 1.1906746625900269, |
|
"learning_rate": 7.459222191623369e-05, |
|
"loss": 1.2832, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.0111540832912953, |
|
"grad_norm": 1.022809386253357, |
|
"learning_rate": 7.448993704658766e-05, |
|
"loss": 1.2637, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 1.0133961100835156, |
|
"grad_norm": 1.0506726503372192, |
|
"learning_rate": 7.438751716412807e-05, |
|
"loss": 1.2623, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.0156381368757357, |
|
"grad_norm": 1.114424705505371, |
|
"learning_rate": 7.428496283349483e-05, |
|
"loss": 1.2747, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 1.0178801636679558, |
|
"grad_norm": 1.1833229064941406, |
|
"learning_rate": 7.418227462006912e-05, |
|
"loss": 1.387, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.020122190460176, |
|
"grad_norm": 1.137563943862915, |
|
"learning_rate": 7.407945308997017e-05, |
|
"loss": 1.3009, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.0223642172523961, |
|
"grad_norm": 1.0473971366882324, |
|
"learning_rate": 7.39764988100522e-05, |
|
"loss": 1.2309, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.0246062440446164, |
|
"grad_norm": 1.0773533582687378, |
|
"learning_rate": 7.387341234790124e-05, |
|
"loss": 1.2865, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 1.0268482708368365, |
|
"grad_norm": 1.1596111059188843, |
|
"learning_rate": 7.377019427183212e-05, |
|
"loss": 1.3355, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.0290902976290566, |
|
"grad_norm": 1.0251152515411377, |
|
"learning_rate": 7.366684515088521e-05, |
|
"loss": 1.3117, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.0313323244212769, |
|
"grad_norm": 1.039408802986145, |
|
"learning_rate": 7.356336555482332e-05, |
|
"loss": 1.3272, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.033574351213497, |
|
"grad_norm": 0.9818054437637329, |
|
"learning_rate": 7.345975605412855e-05, |
|
"loss": 1.3615, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 1.0358163780057172, |
|
"grad_norm": 1.0482890605926514, |
|
"learning_rate": 7.335601721999922e-05, |
|
"loss": 1.3027, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.0380584047979373, |
|
"grad_norm": 1.1090137958526611, |
|
"learning_rate": 7.325214962434665e-05, |
|
"loss": 1.2632, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 1.0403004315901576, |
|
"grad_norm": 1.005988597869873, |
|
"learning_rate": 7.314815383979198e-05, |
|
"loss": 1.2945, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.0425424583823777, |
|
"grad_norm": 1.1132372617721558, |
|
"learning_rate": 7.304403043966309e-05, |
|
"loss": 1.3651, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.0447844851745978, |
|
"grad_norm": 1.154373049736023, |
|
"learning_rate": 7.29397799979914e-05, |
|
"loss": 1.2766, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.047026511966818, |
|
"grad_norm": 1.1040149927139282, |
|
"learning_rate": 7.283540308950867e-05, |
|
"loss": 1.2856, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 1.0492685387590381, |
|
"grad_norm": 1.1813440322875977, |
|
"learning_rate": 7.273090028964396e-05, |
|
"loss": 1.2265, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.0515105655512584, |
|
"grad_norm": 1.099605679512024, |
|
"learning_rate": 7.262627217452027e-05, |
|
"loss": 1.2973, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.0537525923434785, |
|
"grad_norm": 1.2352324724197388, |
|
"learning_rate": 7.252151932095154e-05, |
|
"loss": 1.2729, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.0559946191356986, |
|
"grad_norm": 1.2545338869094849, |
|
"learning_rate": 7.241664230643931e-05, |
|
"loss": 1.293, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.0582366459279189, |
|
"grad_norm": 1.1900233030319214, |
|
"learning_rate": 7.23116417091697e-05, |
|
"loss": 1.3372, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.060478672720139, |
|
"grad_norm": 1.1750991344451904, |
|
"learning_rate": 7.220651810801009e-05, |
|
"loss": 1.2848, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.0627206995123593, |
|
"grad_norm": 1.1401137113571167, |
|
"learning_rate": 7.210127208250599e-05, |
|
"loss": 1.2853, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.0649627263045793, |
|
"grad_norm": 1.0749046802520752, |
|
"learning_rate": 7.199590421287788e-05, |
|
"loss": 1.3066, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.0672047530967994, |
|
"grad_norm": 1.2057609558105469, |
|
"learning_rate": 7.189041508001786e-05, |
|
"loss": 1.3053, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.0694467798890197, |
|
"grad_norm": 1.0309621095657349, |
|
"learning_rate": 7.178480526548666e-05, |
|
"loss": 1.3314, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.0716888066812398, |
|
"grad_norm": 1.1735321283340454, |
|
"learning_rate": 7.167907535151027e-05, |
|
"loss": 1.2538, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.07393083347346, |
|
"grad_norm": 1.0819196701049805, |
|
"learning_rate": 7.157322592097682e-05, |
|
"loss": 1.3022, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 1.0761728602656802, |
|
"grad_norm": 1.0149192810058594, |
|
"learning_rate": 7.146725755743329e-05, |
|
"loss": 1.3713, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.0784148870579005, |
|
"grad_norm": 0.8954042196273804, |
|
"learning_rate": 7.136117084508237e-05, |
|
"loss": 1.2962, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 1.0806569138501205, |
|
"grad_norm": 1.0265322923660278, |
|
"learning_rate": 7.125496636877922e-05, |
|
"loss": 1.3084, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.0828989406423406, |
|
"grad_norm": 1.0515128374099731, |
|
"learning_rate": 7.114864471402818e-05, |
|
"loss": 1.2758, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.085140967434561, |
|
"grad_norm": 1.1725807189941406, |
|
"learning_rate": 7.104220646697962e-05, |
|
"loss": 1.3046, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.087382994226781, |
|
"grad_norm": 1.1021701097488403, |
|
"learning_rate": 7.093565221442672e-05, |
|
"loss": 1.2635, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.0896250210190013, |
|
"grad_norm": 1.1437387466430664, |
|
"learning_rate": 7.082898254380214e-05, |
|
"loss": 1.323, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.0918670478112213, |
|
"grad_norm": 0.998076856136322, |
|
"learning_rate": 7.072219804317488e-05, |
|
"loss": 1.1992, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 1.0941090746034414, |
|
"grad_norm": 1.0903971195220947, |
|
"learning_rate": 7.061529930124695e-05, |
|
"loss": 1.2515, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.0963511013956617, |
|
"grad_norm": 1.143904209136963, |
|
"learning_rate": 7.050828690735022e-05, |
|
"loss": 1.286, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.0985931281878818, |
|
"grad_norm": 1.1476929187774658, |
|
"learning_rate": 7.040116145144311e-05, |
|
"loss": 1.2324, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.100835154980102, |
|
"grad_norm": 1.0371499061584473, |
|
"learning_rate": 7.029392352410733e-05, |
|
"loss": 1.2511, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 1.1030771817723222, |
|
"grad_norm": 1.069429636001587, |
|
"learning_rate": 7.018657371654464e-05, |
|
"loss": 1.3456, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.1053192085645422, |
|
"grad_norm": 0.9130118489265442, |
|
"learning_rate": 7.007911262057365e-05, |
|
"loss": 1.3043, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 1.1075612353567625, |
|
"grad_norm": 1.152266502380371, |
|
"learning_rate": 6.997154082862644e-05, |
|
"loss": 1.2775, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.1098032621489826, |
|
"grad_norm": 1.117077112197876, |
|
"learning_rate": 6.986385893374537e-05, |
|
"loss": 1.315, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.112045288941203, |
|
"grad_norm": 1.4062610864639282, |
|
"learning_rate": 6.975606752957984e-05, |
|
"loss": 1.2661, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.114287315733423, |
|
"grad_norm": 1.1715933084487915, |
|
"learning_rate": 6.96481672103829e-05, |
|
"loss": 1.3384, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.1165293425256433, |
|
"grad_norm": 0.937059760093689, |
|
"learning_rate": 6.95401585710081e-05, |
|
"loss": 1.2838, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.1187713693178634, |
|
"grad_norm": 1.0344353914260864, |
|
"learning_rate": 6.943204220690616e-05, |
|
"loss": 1.2396, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 1.1210133961100834, |
|
"grad_norm": 1.1878572702407837, |
|
"learning_rate": 6.932381871412167e-05, |
|
"loss": 1.329, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.1232554229023037, |
|
"grad_norm": 1.155254602432251, |
|
"learning_rate": 6.92154886892898e-05, |
|
"loss": 1.2652, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 1.1254974496945238, |
|
"grad_norm": 1.0120606422424316, |
|
"learning_rate": 6.910705272963307e-05, |
|
"loss": 1.2904, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.127739476486744, |
|
"grad_norm": 1.284738540649414, |
|
"learning_rate": 6.899851143295799e-05, |
|
"loss": 1.236, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 1.1299815032789642, |
|
"grad_norm": 1.1446951627731323, |
|
"learning_rate": 6.888986539765181e-05, |
|
"loss": 1.3456, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.1322235300711843, |
|
"grad_norm": 1.183556079864502, |
|
"learning_rate": 6.878111522267917e-05, |
|
"loss": 1.3006, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.1344655568634046, |
|
"grad_norm": 1.1089967489242554, |
|
"learning_rate": 6.867226150757888e-05, |
|
"loss": 1.3098, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.1367075836556246, |
|
"grad_norm": 1.1036224365234375, |
|
"learning_rate": 6.856330485246054e-05, |
|
"loss": 1.2543, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 1.138949610447845, |
|
"grad_norm": 1.2652587890625, |
|
"learning_rate": 6.845424585800123e-05, |
|
"loss": 1.2941, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.141191637240065, |
|
"grad_norm": 1.0114392042160034, |
|
"learning_rate": 6.834508512544228e-05, |
|
"loss": 1.306, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 1.143433664032285, |
|
"grad_norm": 1.0309230089187622, |
|
"learning_rate": 6.823582325658588e-05, |
|
"loss": 1.2697, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.1456756908245054, |
|
"grad_norm": 1.490627408027649, |
|
"learning_rate": 6.812646085379178e-05, |
|
"loss": 1.2784, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.1479177176167255, |
|
"grad_norm": 1.1522798538208008, |
|
"learning_rate": 6.801699851997393e-05, |
|
"loss": 1.2499, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.1501597444089458, |
|
"grad_norm": 1.0427577495574951, |
|
"learning_rate": 6.790743685859728e-05, |
|
"loss": 1.2711, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 1.1524017712011658, |
|
"grad_norm": 1.0645527839660645, |
|
"learning_rate": 6.779777647367434e-05, |
|
"loss": 1.2498, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.1546437979933861, |
|
"grad_norm": 1.0170249938964844, |
|
"learning_rate": 6.768801796976183e-05, |
|
"loss": 1.2622, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.1568858247856062, |
|
"grad_norm": 1.1332886219024658, |
|
"learning_rate": 6.75781619519575e-05, |
|
"loss": 1.3146, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.1591278515778263, |
|
"grad_norm": 1.1379398107528687, |
|
"learning_rate": 6.746820902589659e-05, |
|
"loss": 1.2898, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 1.1613698783700466, |
|
"grad_norm": 1.0116194486618042, |
|
"learning_rate": 6.735815979774866e-05, |
|
"loss": 1.3308, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.1636119051622666, |
|
"grad_norm": 0.9840161204338074, |
|
"learning_rate": 6.724801487421416e-05, |
|
"loss": 1.2739, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 1.165853931954487, |
|
"grad_norm": 1.3689374923706055, |
|
"learning_rate": 6.713777486252113e-05, |
|
"loss": 1.273, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.168095958746707, |
|
"grad_norm": 1.1147258281707764, |
|
"learning_rate": 6.702744037042179e-05, |
|
"loss": 1.3653, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 1.170337985538927, |
|
"grad_norm": 1.0359976291656494, |
|
"learning_rate": 6.691701200618925e-05, |
|
"loss": 1.2928, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.1725800123311474, |
|
"grad_norm": 1.0808576345443726, |
|
"learning_rate": 6.680649037861416e-05, |
|
"loss": 1.2834, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 1.1748220391233675, |
|
"grad_norm": 1.2251567840576172, |
|
"learning_rate": 6.669587609700129e-05, |
|
"loss": 1.206, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.1770640659155878, |
|
"grad_norm": 1.0829846858978271, |
|
"learning_rate": 6.658516977116623e-05, |
|
"loss": 1.2292, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.1793060927078078, |
|
"grad_norm": 1.1904149055480957, |
|
"learning_rate": 6.647437201143201e-05, |
|
"loss": 1.275, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.181548119500028, |
|
"grad_norm": 1.223581314086914, |
|
"learning_rate": 6.636348342862575e-05, |
|
"loss": 1.2954, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 1.1837901462922482, |
|
"grad_norm": 1.1710941791534424, |
|
"learning_rate": 6.625250463407522e-05, |
|
"loss": 1.2927, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.1860321730844683, |
|
"grad_norm": 1.0496562719345093, |
|
"learning_rate": 6.61414362396056e-05, |
|
"loss": 1.2966, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 1.1882741998766886, |
|
"grad_norm": 1.0458779335021973, |
|
"learning_rate": 6.603027885753598e-05, |
|
"loss": 1.3081, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.1905162266689087, |
|
"grad_norm": 1.2921910285949707, |
|
"learning_rate": 6.591903310067608e-05, |
|
"loss": 1.2511, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 1.192758253461129, |
|
"grad_norm": 1.0614089965820312, |
|
"learning_rate": 6.580769958232279e-05, |
|
"loss": 1.2995, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.195000280253349, |
|
"grad_norm": 1.2062052488327026, |
|
"learning_rate": 6.569627891625683e-05, |
|
"loss": 1.3231, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 1.197242307045569, |
|
"grad_norm": 1.065064549446106, |
|
"learning_rate": 6.558477171673941e-05, |
|
"loss": 1.3189, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.1994843338377894, |
|
"grad_norm": 1.0669735670089722, |
|
"learning_rate": 6.547317859850875e-05, |
|
"loss": 1.3024, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.2017263606300095, |
|
"grad_norm": 1.1397708654403687, |
|
"learning_rate": 6.536150017677675e-05, |
|
"loss": 1.342, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.2039683874222298, |
|
"grad_norm": 1.1043004989624023, |
|
"learning_rate": 6.524973706722562e-05, |
|
"loss": 1.3442, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 1.2062104142144499, |
|
"grad_norm": 1.1043583154678345, |
|
"learning_rate": 6.513788988600441e-05, |
|
"loss": 1.2344, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.2084524410066702, |
|
"grad_norm": 1.1633187532424927, |
|
"learning_rate": 6.502595924972565e-05, |
|
"loss": 1.3185, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.2106944677988902, |
|
"grad_norm": 1.2432576417922974, |
|
"learning_rate": 6.491394577546204e-05, |
|
"loss": 1.2941, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.2129364945911103, |
|
"grad_norm": 1.0130048990249634, |
|
"learning_rate": 6.480185008074284e-05, |
|
"loss": 1.2495, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 1.2151785213833306, |
|
"grad_norm": 1.1565743684768677, |
|
"learning_rate": 6.468967278355072e-05, |
|
"loss": 1.2585, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.2174205481755507, |
|
"grad_norm": 0.9963768124580383, |
|
"learning_rate": 6.457741450231812e-05, |
|
"loss": 1.3497, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 1.2196625749677708, |
|
"grad_norm": 1.1197139024734497, |
|
"learning_rate": 6.446507585592399e-05, |
|
"loss": 1.2958, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.221904601759991, |
|
"grad_norm": 1.1450271606445312, |
|
"learning_rate": 6.435265746369033e-05, |
|
"loss": 1.3259, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.2241466285522111, |
|
"grad_norm": 1.0894269943237305, |
|
"learning_rate": 6.424015994537877e-05, |
|
"loss": 1.272, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.2263886553444314, |
|
"grad_norm": 1.1631505489349365, |
|
"learning_rate": 6.412758392118718e-05, |
|
"loss": 1.3315, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 1.2286306821366515, |
|
"grad_norm": 1.213643193244934, |
|
"learning_rate": 6.40149300117462e-05, |
|
"loss": 1.3228, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.2308727089288718, |
|
"grad_norm": 1.0162944793701172, |
|
"learning_rate": 6.390219883811591e-05, |
|
"loss": 1.2519, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 1.2331147357210919, |
|
"grad_norm": 1.1782135963439941, |
|
"learning_rate": 6.378939102178225e-05, |
|
"loss": 1.3281, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.235356762513312, |
|
"grad_norm": 1.062117576599121, |
|
"learning_rate": 6.367650718465379e-05, |
|
"loss": 1.2671, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 1.2375987893055322, |
|
"grad_norm": 1.3144171237945557, |
|
"learning_rate": 6.356354794905814e-05, |
|
"loss": 1.3392, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.2398408160977523, |
|
"grad_norm": 1.0592882633209229, |
|
"learning_rate": 6.345051393773861e-05, |
|
"loss": 1.2902, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.2420828428899726, |
|
"grad_norm": 1.2294663190841675, |
|
"learning_rate": 6.333740577385074e-05, |
|
"loss": 1.3081, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.2443248696821927, |
|
"grad_norm": 1.0388215780258179, |
|
"learning_rate": 6.322422408095886e-05, |
|
"loss": 1.2917, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.246566896474413, |
|
"grad_norm": 1.094425916671753, |
|
"learning_rate": 6.311096948303264e-05, |
|
"loss": 1.3252, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.248808923266633, |
|
"grad_norm": 1.3590023517608643, |
|
"learning_rate": 6.299764260444378e-05, |
|
"loss": 1.2825, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 1.2510509500588531, |
|
"grad_norm": 1.1007918119430542, |
|
"learning_rate": 6.288424406996238e-05, |
|
"loss": 1.2437, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.2532929768510734, |
|
"grad_norm": 1.2783552408218384, |
|
"learning_rate": 6.277077450475354e-05, |
|
"loss": 1.3539, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 1.2555350036432935, |
|
"grad_norm": 1.2107961177825928, |
|
"learning_rate": 6.265723453437404e-05, |
|
"loss": 1.3215, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.2577770304355136, |
|
"grad_norm": 1.0384870767593384, |
|
"learning_rate": 6.254362478476878e-05, |
|
"loss": 1.2514, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 1.2600190572277339, |
|
"grad_norm": 1.3173192739486694, |
|
"learning_rate": 6.242994588226731e-05, |
|
"loss": 1.3129, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.262261084019954, |
|
"grad_norm": 0.9206444621086121, |
|
"learning_rate": 6.231619845358045e-05, |
|
"loss": 1.3108, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 1.2645031108121743, |
|
"grad_norm": 1.13257896900177, |
|
"learning_rate": 6.220238312579682e-05, |
|
"loss": 1.286, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.2667451376043943, |
|
"grad_norm": 1.3280197381973267, |
|
"learning_rate": 6.208850052637933e-05, |
|
"loss": 1.2462, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.2689871643966146, |
|
"grad_norm": 1.2632642984390259, |
|
"learning_rate": 6.197455128316178e-05, |
|
"loss": 1.2761, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.2712291911888347, |
|
"grad_norm": 1.0141961574554443, |
|
"learning_rate": 6.186053602434539e-05, |
|
"loss": 1.2421, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.2734712179810548, |
|
"grad_norm": 1.2043393850326538, |
|
"learning_rate": 6.174645537849529e-05, |
|
"loss": 1.333, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.275713244773275, |
|
"grad_norm": 1.1990864276885986, |
|
"learning_rate": 6.163230997453712e-05, |
|
"loss": 1.3188, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 1.2779552715654952, |
|
"grad_norm": 1.0753861665725708, |
|
"learning_rate": 6.15181004417535e-05, |
|
"loss": 1.3231, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.2801972983577155, |
|
"grad_norm": 1.089961290359497, |
|
"learning_rate": 6.140382740978062e-05, |
|
"loss": 1.258, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 1.2824393251499355, |
|
"grad_norm": 1.217774510383606, |
|
"learning_rate": 6.12894915086047e-05, |
|
"loss": 1.2642, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.2846813519421558, |
|
"grad_norm": 1.180626630783081, |
|
"learning_rate": 6.117509336855865e-05, |
|
"loss": 1.2759, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 1.286923378734376, |
|
"grad_norm": 1.2082866430282593, |
|
"learning_rate": 6.106063362031838e-05, |
|
"loss": 1.3255, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.289165405526596, |
|
"grad_norm": 1.1015843152999878, |
|
"learning_rate": 6.094611289489951e-05, |
|
"loss": 1.3282, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.2914074323188163, |
|
"grad_norm": 1.1207735538482666, |
|
"learning_rate": 6.083153182365383e-05, |
|
"loss": 1.2982, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.2936494591110363, |
|
"grad_norm": 1.1439082622528076, |
|
"learning_rate": 6.071689103826582e-05, |
|
"loss": 1.3463, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 1.2958914859032564, |
|
"grad_norm": 1.1893078088760376, |
|
"learning_rate": 6.060219117074913e-05, |
|
"loss": 1.2573, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.2981335126954767, |
|
"grad_norm": 1.2720766067504883, |
|
"learning_rate": 6.048743285344317e-05, |
|
"loss": 1.3029, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 1.3003755394876968, |
|
"grad_norm": 1.0983214378356934, |
|
"learning_rate": 6.037261671900953e-05, |
|
"loss": 1.2845, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.302617566279917, |
|
"grad_norm": 1.1721152067184448, |
|
"learning_rate": 6.02577434004286e-05, |
|
"loss": 1.3025, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.3048595930721372, |
|
"grad_norm": 1.1017165184020996, |
|
"learning_rate": 6.0142813530996e-05, |
|
"loss": 1.3166, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.3071016198643575, |
|
"grad_norm": 1.1608681678771973, |
|
"learning_rate": 6.002782774431911e-05, |
|
"loss": 1.259, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 1.3093436466565775, |
|
"grad_norm": 1.249861478805542, |
|
"learning_rate": 5.9912786674313614e-05, |
|
"loss": 1.2469, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.3115856734487976, |
|
"grad_norm": 1.2426470518112183, |
|
"learning_rate": 5.9797690955199926e-05, |
|
"loss": 1.2541, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.313827700241018, |
|
"grad_norm": 1.0516715049743652, |
|
"learning_rate": 5.968254122149974e-05, |
|
"loss": 1.277, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.316069727033238, |
|
"grad_norm": 1.4431426525115967, |
|
"learning_rate": 5.95673381080326e-05, |
|
"loss": 1.3182, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 1.3183117538254583, |
|
"grad_norm": 1.2504661083221436, |
|
"learning_rate": 5.945208224991226e-05, |
|
"loss": 1.3503, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.3205537806176784, |
|
"grad_norm": 1.1429800987243652, |
|
"learning_rate": 5.933677428254328e-05, |
|
"loss": 1.2767, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 1.3227958074098987, |
|
"grad_norm": 1.2553044557571411, |
|
"learning_rate": 5.922141484161751e-05, |
|
"loss": 1.2817, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.3250378342021187, |
|
"grad_norm": 1.2155333757400513, |
|
"learning_rate": 5.910600456311055e-05, |
|
"loss": 1.3347, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 1.3272798609943388, |
|
"grad_norm": 1.2551952600479126, |
|
"learning_rate": 5.8990544083278285e-05, |
|
"loss": 1.2119, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.329521887786559, |
|
"grad_norm": 1.1889550685882568, |
|
"learning_rate": 5.887503403865333e-05, |
|
"loss": 1.3307, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 1.3317639145787792, |
|
"grad_norm": 1.1134368181228638, |
|
"learning_rate": 5.8759475066041624e-05, |
|
"loss": 1.3094, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.3340059413709993, |
|
"grad_norm": 1.2652761936187744, |
|
"learning_rate": 5.8643867802518756e-05, |
|
"loss": 1.3296, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.3362479681632196, |
|
"grad_norm": 1.6688954830169678, |
|
"learning_rate": 5.852821288542658e-05, |
|
"loss": 1.3148, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.3384899949554399, |
|
"grad_norm": 0.9661517143249512, |
|
"learning_rate": 5.841251095236969e-05, |
|
"loss": 1.3197, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 1.34073202174766, |
|
"grad_norm": 1.1682339906692505, |
|
"learning_rate": 5.829676264121183e-05, |
|
"loss": 1.3328, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.34297404853988, |
|
"grad_norm": 1.109320044517517, |
|
"learning_rate": 5.818096859007247e-05, |
|
"loss": 1.2575, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 1.3452160753321003, |
|
"grad_norm": 1.2029309272766113, |
|
"learning_rate": 5.8065129437323206e-05, |
|
"loss": 1.3296, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.3474581021243204, |
|
"grad_norm": 1.104525089263916, |
|
"learning_rate": 5.794924582158431e-05, |
|
"loss": 1.2558, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 1.3497001289165405, |
|
"grad_norm": 1.1124447584152222, |
|
"learning_rate": 5.783331838172116e-05, |
|
"loss": 1.3036, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.3519421557087608, |
|
"grad_norm": 1.1220247745513916, |
|
"learning_rate": 5.771734775684072e-05, |
|
"loss": 1.3161, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 1.3541841825009808, |
|
"grad_norm": 1.143099069595337, |
|
"learning_rate": 5.760133458628809e-05, |
|
"loss": 1.3066, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.3564262092932011, |
|
"grad_norm": 0.9693493247032166, |
|
"learning_rate": 5.7485279509642885e-05, |
|
"loss": 1.3089, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.3586682360854212, |
|
"grad_norm": 1.0467145442962646, |
|
"learning_rate": 5.736918316671572e-05, |
|
"loss": 1.2631, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.3609102628776415, |
|
"grad_norm": 1.1905845403671265, |
|
"learning_rate": 5.7253046197544754e-05, |
|
"loss": 1.2759, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 1.3631522896698616, |
|
"grad_norm": 1.0732934474945068, |
|
"learning_rate": 5.713686924239211e-05, |
|
"loss": 1.296, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.3653943164620816, |
|
"grad_norm": 1.2555313110351562, |
|
"learning_rate": 5.702065294174036e-05, |
|
"loss": 1.2306, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 1.367636343254302, |
|
"grad_norm": 1.033304214477539, |
|
"learning_rate": 5.690439793628896e-05, |
|
"loss": 1.3072, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.369878370046522, |
|
"grad_norm": 1.058167576789856, |
|
"learning_rate": 5.6788104866950754e-05, |
|
"loss": 1.3995, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 1.372120396838742, |
|
"grad_norm": 1.0705965757369995, |
|
"learning_rate": 5.667177437484845e-05, |
|
"loss": 1.3035, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.3743624236309624, |
|
"grad_norm": 1.052674651145935, |
|
"learning_rate": 5.655540710131105e-05, |
|
"loss": 1.3247, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 1.3766044504231827, |
|
"grad_norm": 1.2467668056488037, |
|
"learning_rate": 5.643900368787036e-05, |
|
"loss": 1.3106, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.3788464772154028, |
|
"grad_norm": 1.1554597616195679, |
|
"learning_rate": 5.632256477625739e-05, |
|
"loss": 1.2686, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.3810885040076228, |
|
"grad_norm": 1.0708049535751343, |
|
"learning_rate": 5.6206091008398866e-05, |
|
"loss": 1.2774, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.3833305307998431, |
|
"grad_norm": 1.1895546913146973, |
|
"learning_rate": 5.608958302641364e-05, |
|
"loss": 1.1813, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 1.3855725575920632, |
|
"grad_norm": 1.4244434833526611, |
|
"learning_rate": 5.597304147260927e-05, |
|
"loss": 1.3678, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.3878145843842833, |
|
"grad_norm": 1.2443078756332397, |
|
"learning_rate": 5.5856466989478325e-05, |
|
"loss": 1.2248, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 1.3900566111765036, |
|
"grad_norm": 1.0258877277374268, |
|
"learning_rate": 5.573986021969494e-05, |
|
"loss": 1.2725, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.3922986379687237, |
|
"grad_norm": 1.0962164402008057, |
|
"learning_rate": 5.5623221806111224e-05, |
|
"loss": 1.2393, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 1.394540664760944, |
|
"grad_norm": 1.3782082796096802, |
|
"learning_rate": 5.550655239175377e-05, |
|
"loss": 1.2817, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.396782691553164, |
|
"grad_norm": 1.2269506454467773, |
|
"learning_rate": 5.538985261982006e-05, |
|
"loss": 1.2376, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.3990247183453843, |
|
"grad_norm": 1.2134568691253662, |
|
"learning_rate": 5.527312313367492e-05, |
|
"loss": 1.2925, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.4012667451376044, |
|
"grad_norm": 1.275994896888733, |
|
"learning_rate": 5.515636457684705e-05, |
|
"loss": 1.351, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.4035087719298245, |
|
"grad_norm": 1.0931142568588257, |
|
"learning_rate": 5.5039577593025335e-05, |
|
"loss": 1.3186, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.4057507987220448, |
|
"grad_norm": 1.0260752439498901, |
|
"learning_rate": 5.492276282605544e-05, |
|
"loss": 1.2835, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 1.4079928255142649, |
|
"grad_norm": 1.2498077154159546, |
|
"learning_rate": 5.480592091993616e-05, |
|
"loss": 1.3022, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.410234852306485, |
|
"grad_norm": 1.0274704694747925, |
|
"learning_rate": 5.4689052518815954e-05, |
|
"loss": 1.2354, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 1.4124768790987052, |
|
"grad_norm": 1.3377097845077515, |
|
"learning_rate": 5.457215826698928e-05, |
|
"loss": 1.3043, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.4147189058909255, |
|
"grad_norm": 1.2201504707336426, |
|
"learning_rate": 5.4455238808893185e-05, |
|
"loss": 1.36, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 1.4169609326831456, |
|
"grad_norm": 1.1458607912063599, |
|
"learning_rate": 5.433829478910362e-05, |
|
"loss": 1.285, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.4192029594753657, |
|
"grad_norm": 1.1673274040222168, |
|
"learning_rate": 5.4221326852331965e-05, |
|
"loss": 1.3474, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 1.421444986267586, |
|
"grad_norm": 1.141150951385498, |
|
"learning_rate": 5.410433564342146e-05, |
|
"loss": 1.3101, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.423687013059806, |
|
"grad_norm": 1.18087899684906, |
|
"learning_rate": 5.398732180734365e-05, |
|
"loss": 1.3324, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.4259290398520261, |
|
"grad_norm": 1.1001255512237549, |
|
"learning_rate": 5.3870285989194814e-05, |
|
"loss": 1.3046, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.4281710666442464, |
|
"grad_norm": 1.2380887269973755, |
|
"learning_rate": 5.3753228834192384e-05, |
|
"loss": 1.283, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 1.4304130934364665, |
|
"grad_norm": 1.2417025566101074, |
|
"learning_rate": 5.3636150987671496e-05, |
|
"loss": 1.2536, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.4326551202286868, |
|
"grad_norm": 1.2791988849639893, |
|
"learning_rate": 5.35190530950813e-05, |
|
"loss": 1.314, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 1.4348971470209069, |
|
"grad_norm": 1.0879089832305908, |
|
"learning_rate": 5.3401935801981464e-05, |
|
"loss": 1.2726, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.4371391738131272, |
|
"grad_norm": 1.1599972248077393, |
|
"learning_rate": 5.328479975403864e-05, |
|
"loss": 1.3082, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 1.4393812006053472, |
|
"grad_norm": 1.1873195171356201, |
|
"learning_rate": 5.316764559702285e-05, |
|
"loss": 1.2853, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.4416232273975673, |
|
"grad_norm": 1.049641489982605, |
|
"learning_rate": 5.3050473976803974e-05, |
|
"loss": 1.3048, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 1.4438652541897876, |
|
"grad_norm": 1.0594843626022339, |
|
"learning_rate": 5.293328553934813e-05, |
|
"loss": 1.2845, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.4461072809820077, |
|
"grad_norm": 1.007035732269287, |
|
"learning_rate": 5.2816080930714194e-05, |
|
"loss": 1.3099, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.4483493077742278, |
|
"grad_norm": 1.0724034309387207, |
|
"learning_rate": 5.269886079705018e-05, |
|
"loss": 1.28, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.450591334566448, |
|
"grad_norm": 1.023113489151001, |
|
"learning_rate": 5.258162578458963e-05, |
|
"loss": 1.3397, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 1.4528333613586684, |
|
"grad_norm": 1.005807638168335, |
|
"learning_rate": 5.246437653964822e-05, |
|
"loss": 1.2121, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.4550753881508884, |
|
"grad_norm": 1.0102343559265137, |
|
"learning_rate": 5.234711370862001e-05, |
|
"loss": 1.276, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 1.4573174149431085, |
|
"grad_norm": 1.173030138015747, |
|
"learning_rate": 5.2229837937974e-05, |
|
"loss": 1.3212, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.4595594417353288, |
|
"grad_norm": 1.0489596128463745, |
|
"learning_rate": 5.2112549874250495e-05, |
|
"loss": 1.32, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 1.4618014685275489, |
|
"grad_norm": 1.084416389465332, |
|
"learning_rate": 5.199525016405759e-05, |
|
"loss": 1.2529, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.464043495319769, |
|
"grad_norm": 1.0936429500579834, |
|
"learning_rate": 5.187793945406759e-05, |
|
"loss": 1.241, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 1.4662855221119893, |
|
"grad_norm": 1.199352502822876, |
|
"learning_rate": 5.1760618391013424e-05, |
|
"loss": 1.2246, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.4685275489042093, |
|
"grad_norm": 1.1133605241775513, |
|
"learning_rate": 5.164328762168514e-05, |
|
"loss": 1.3192, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.4707695756964296, |
|
"grad_norm": 1.165466070175171, |
|
"learning_rate": 5.152594779292624e-05, |
|
"loss": 1.3289, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.4730116024886497, |
|
"grad_norm": 1.1635582447052002, |
|
"learning_rate": 5.140859955163021e-05, |
|
"loss": 1.2729, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 1.47525362928087, |
|
"grad_norm": 1.1590099334716797, |
|
"learning_rate": 5.1291243544736875e-05, |
|
"loss": 1.3111, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.47749565607309, |
|
"grad_norm": 1.157904863357544, |
|
"learning_rate": 5.1173880419228935e-05, |
|
"loss": 1.2491, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 1.4797376828653102, |
|
"grad_norm": 1.103690266609192, |
|
"learning_rate": 5.105651082212828e-05, |
|
"loss": 1.2776, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.4819797096575305, |
|
"grad_norm": 1.020355463027954, |
|
"learning_rate": 5.093913540049249e-05, |
|
"loss": 1.1967, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 1.4842217364497505, |
|
"grad_norm": 1.2379658222198486, |
|
"learning_rate": 5.082175480141126e-05, |
|
"loss": 1.2427, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.4864637632419706, |
|
"grad_norm": 1.072657585144043, |
|
"learning_rate": 5.0704369672002835e-05, |
|
"loss": 1.325, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 1.488705790034191, |
|
"grad_norm": 1.2614028453826904, |
|
"learning_rate": 5.0586980659410434e-05, |
|
"loss": 1.3126, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.4909478168264112, |
|
"grad_norm": 1.1547425985336304, |
|
"learning_rate": 5.0469588410798676e-05, |
|
"loss": 1.2616, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.4931898436186313, |
|
"grad_norm": 1.2182773351669312, |
|
"learning_rate": 5.035219357335001e-05, |
|
"loss": 1.2992, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.4954318704108513, |
|
"grad_norm": 1.2161564826965332, |
|
"learning_rate": 5.023479679426122e-05, |
|
"loss": 1.2788, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 1.4976738972030716, |
|
"grad_norm": 1.122253656387329, |
|
"learning_rate": 5.011739872073968e-05, |
|
"loss": 1.3072, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.4999159239952917, |
|
"grad_norm": 0.9858971834182739, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2049, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 1.5021579507875118, |
|
"grad_norm": 1.0259901285171509, |
|
"learning_rate": 4.9882601279260324e-05, |
|
"loss": 1.3184, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.504399977579732, |
|
"grad_norm": 1.0712144374847412, |
|
"learning_rate": 4.9765203205738805e-05, |
|
"loss": 1.2826, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 1.5066420043719524, |
|
"grad_norm": 1.0313420295715332, |
|
"learning_rate": 4.964780642664999e-05, |
|
"loss": 1.3633, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.5088840311641722, |
|
"grad_norm": 1.1968498229980469, |
|
"learning_rate": 4.953041158920133e-05, |
|
"loss": 1.2564, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 1.5111260579563925, |
|
"grad_norm": 1.0766561031341553, |
|
"learning_rate": 4.9413019340589585e-05, |
|
"loss": 1.2387, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.5133680847486128, |
|
"grad_norm": 1.2741787433624268, |
|
"learning_rate": 4.929563032799717e-05, |
|
"loss": 1.2113, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.515610111540833, |
|
"grad_norm": 1.1092220544815063, |
|
"learning_rate": 4.917824519858875e-05, |
|
"loss": 1.2782, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.517852138333053, |
|
"grad_norm": 1.1351913213729858, |
|
"learning_rate": 4.906086459950753e-05, |
|
"loss": 1.2667, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 1.5200941651252733, |
|
"grad_norm": 1.1670454740524292, |
|
"learning_rate": 4.8943489177871735e-05, |
|
"loss": 1.2764, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.5223361919174934, |
|
"grad_norm": 1.1347793340682983, |
|
"learning_rate": 4.882611958077108e-05, |
|
"loss": 1.3095, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 1.5245782187097134, |
|
"grad_norm": 1.0640754699707031, |
|
"learning_rate": 4.870875645526313e-05, |
|
"loss": 1.2696, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.5268202455019337, |
|
"grad_norm": 1.1215641498565674, |
|
"learning_rate": 4.859140044836979e-05, |
|
"loss": 1.2618, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 1.529062272294154, |
|
"grad_norm": 0.9714592695236206, |
|
"learning_rate": 4.847405220707377e-05, |
|
"loss": 1.3044, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.531304299086374, |
|
"grad_norm": 1.055709719657898, |
|
"learning_rate": 4.8356712378314876e-05, |
|
"loss": 1.3893, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 1.5335463258785942, |
|
"grad_norm": 1.0931789875030518, |
|
"learning_rate": 4.823938160898657e-05, |
|
"loss": 1.3075, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.5357883526708145, |
|
"grad_norm": 1.0338480472564697, |
|
"learning_rate": 4.812206054593242e-05, |
|
"loss": 1.334, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.5380303794630346, |
|
"grad_norm": 1.1629575490951538, |
|
"learning_rate": 4.800474983594242e-05, |
|
"loss": 1.2991, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.5402724062552546, |
|
"grad_norm": 0.9702677726745605, |
|
"learning_rate": 4.788745012574952e-05, |
|
"loss": 1.2372, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 1.542514433047475, |
|
"grad_norm": 1.1541732549667358, |
|
"learning_rate": 4.777016206202602e-05, |
|
"loss": 1.3549, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.5447564598396952, |
|
"grad_norm": 1.108521580696106, |
|
"learning_rate": 4.765288629137999e-05, |
|
"loss": 1.2351, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 1.5469984866319153, |
|
"grad_norm": 1.2135175466537476, |
|
"learning_rate": 4.753562346035178e-05, |
|
"loss": 1.2808, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.5492405134241354, |
|
"grad_norm": 1.0196810960769653, |
|
"learning_rate": 4.7418374215410374e-05, |
|
"loss": 1.2797, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 1.5514825402163557, |
|
"grad_norm": 1.1233173608779907, |
|
"learning_rate": 4.730113920294983e-05, |
|
"loss": 1.2932, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.5537245670085758, |
|
"grad_norm": 1.0524299144744873, |
|
"learning_rate": 4.7183919069285804e-05, |
|
"loss": 1.2907, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 1.5559665938007958, |
|
"grad_norm": 1.174949288368225, |
|
"learning_rate": 4.706671446065188e-05, |
|
"loss": 1.2596, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.5582086205930161, |
|
"grad_norm": 1.152764081954956, |
|
"learning_rate": 4.694952602319603e-05, |
|
"loss": 1.3416, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.5604506473852362, |
|
"grad_norm": 1.1165378093719482, |
|
"learning_rate": 4.683235440297717e-05, |
|
"loss": 1.2781, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.5626926741774563, |
|
"grad_norm": 1.0606844425201416, |
|
"learning_rate": 4.671520024596137e-05, |
|
"loss": 1.3009, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 1.5649347009696766, |
|
"grad_norm": 1.023228645324707, |
|
"learning_rate": 4.659806419801855e-05, |
|
"loss": 1.3042, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.5671767277618969, |
|
"grad_norm": 1.2059510946273804, |
|
"learning_rate": 4.6480946904918735e-05, |
|
"loss": 1.2997, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 1.569418754554117, |
|
"grad_norm": 1.0934103727340698, |
|
"learning_rate": 4.636384901232852e-05, |
|
"loss": 1.2771, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.571660781346337, |
|
"grad_norm": 1.032578945159912, |
|
"learning_rate": 4.6246771165807614e-05, |
|
"loss": 1.2553, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 1.5739028081385573, |
|
"grad_norm": 1.055982232093811, |
|
"learning_rate": 4.612971401080521e-05, |
|
"loss": 1.2673, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.5761448349307774, |
|
"grad_norm": 1.00336754322052, |
|
"learning_rate": 4.6012678192656364e-05, |
|
"loss": 1.2102, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 1.5783868617229975, |
|
"grad_norm": 1.0931719541549683, |
|
"learning_rate": 4.589566435657854e-05, |
|
"loss": 1.242, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.5806288885152178, |
|
"grad_norm": 1.1765341758728027, |
|
"learning_rate": 4.5778673147668053e-05, |
|
"loss": 1.2747, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.582870915307438, |
|
"grad_norm": 1.2692338228225708, |
|
"learning_rate": 4.5661705210896395e-05, |
|
"loss": 1.3241, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.5851129420996581, |
|
"grad_norm": 1.2092036008834839, |
|
"learning_rate": 4.5544761191106826e-05, |
|
"loss": 1.271, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 1.5873549688918782, |
|
"grad_norm": 1.2053848505020142, |
|
"learning_rate": 4.542784173301072e-05, |
|
"loss": 1.2828, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.5895969956840985, |
|
"grad_norm": 1.1756088733673096, |
|
"learning_rate": 4.5310947481184064e-05, |
|
"loss": 1.2556, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 1.5918390224763186, |
|
"grad_norm": 1.1956021785736084, |
|
"learning_rate": 4.5194079080063835e-05, |
|
"loss": 1.2561, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.5940810492685387, |
|
"grad_norm": 1.0988577604293823, |
|
"learning_rate": 4.5077237173944576e-05, |
|
"loss": 1.4031, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 1.596323076060759, |
|
"grad_norm": 1.1947277784347534, |
|
"learning_rate": 4.496042240697467e-05, |
|
"loss": 1.2634, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.5985651028529793, |
|
"grad_norm": 1.0463786125183105, |
|
"learning_rate": 4.484363542315297e-05, |
|
"loss": 1.2856, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 1.6008071296451991, |
|
"grad_norm": 1.0513739585876465, |
|
"learning_rate": 4.4726876866325086e-05, |
|
"loss": 1.2232, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.6030491564374194, |
|
"grad_norm": 1.1749991178512573, |
|
"learning_rate": 4.461014738017995e-05, |
|
"loss": 1.3407, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.6052911832296397, |
|
"grad_norm": 1.0796834230422974, |
|
"learning_rate": 4.4493447608246253e-05, |
|
"loss": 1.2917, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.6075332100218598, |
|
"grad_norm": 1.0776811838150024, |
|
"learning_rate": 4.437677819388879e-05, |
|
"loss": 1.3028, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 1.6097752368140799, |
|
"grad_norm": 1.1453006267547607, |
|
"learning_rate": 4.4260139780305074e-05, |
|
"loss": 1.2752, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.6120172636063002, |
|
"grad_norm": 0.9714769124984741, |
|
"learning_rate": 4.4143533010521686e-05, |
|
"loss": 1.2274, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 1.6142592903985202, |
|
"grad_norm": 1.298377513885498, |
|
"learning_rate": 4.4026958527390735e-05, |
|
"loss": 1.2982, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.6165013171907403, |
|
"grad_norm": 1.1102139949798584, |
|
"learning_rate": 4.391041697358636e-05, |
|
"loss": 1.3122, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 1.6187433439829606, |
|
"grad_norm": 1.0720750093460083, |
|
"learning_rate": 4.3793908991601166e-05, |
|
"loss": 1.3212, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.620985370775181, |
|
"grad_norm": 1.0826951265335083, |
|
"learning_rate": 4.367743522374261e-05, |
|
"loss": 1.2706, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 1.623227397567401, |
|
"grad_norm": 0.9729198217391968, |
|
"learning_rate": 4.3560996312129636e-05, |
|
"loss": 1.3026, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.625469424359621, |
|
"grad_norm": 1.081446647644043, |
|
"learning_rate": 4.344459289868895e-05, |
|
"loss": 1.2997, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.6277114511518413, |
|
"grad_norm": 1.1220719814300537, |
|
"learning_rate": 4.3328225625151553e-05, |
|
"loss": 1.2356, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.6299534779440614, |
|
"grad_norm": 1.0425808429718018, |
|
"learning_rate": 4.3211895133049244e-05, |
|
"loss": 1.2756, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 1.6321955047362815, |
|
"grad_norm": 1.0694538354873657, |
|
"learning_rate": 4.309560206371106e-05, |
|
"loss": 1.316, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.6344375315285018, |
|
"grad_norm": 1.0898274183273315, |
|
"learning_rate": 4.297934705825966e-05, |
|
"loss": 1.3316, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 1.636679558320722, |
|
"grad_norm": 1.176999807357788, |
|
"learning_rate": 4.2863130757607906e-05, |
|
"loss": 1.2538, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.638921585112942, |
|
"grad_norm": 1.2387757301330566, |
|
"learning_rate": 4.274695380245526e-05, |
|
"loss": 1.3211, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 1.6411636119051622, |
|
"grad_norm": 1.3879566192626953, |
|
"learning_rate": 4.263081683328429e-05, |
|
"loss": 1.2902, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.6434056386973825, |
|
"grad_norm": 1.071897268295288, |
|
"learning_rate": 4.2514720490357134e-05, |
|
"loss": 1.251, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 1.6456476654896026, |
|
"grad_norm": 1.221535086631775, |
|
"learning_rate": 4.239866541371192e-05, |
|
"loss": 1.2478, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.6478896922818227, |
|
"grad_norm": 1.0815098285675049, |
|
"learning_rate": 4.2282652243159276e-05, |
|
"loss": 1.2811, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.650131719074043, |
|
"grad_norm": 1.1960694789886475, |
|
"learning_rate": 4.216668161827887e-05, |
|
"loss": 1.2937, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.652373745866263, |
|
"grad_norm": 1.307964563369751, |
|
"learning_rate": 4.20507541784157e-05, |
|
"loss": 1.2725, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 1.6546157726584831, |
|
"grad_norm": 1.150295376777649, |
|
"learning_rate": 4.193487056267679e-05, |
|
"loss": 1.2542, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.6568577994507034, |
|
"grad_norm": 1.0650702714920044, |
|
"learning_rate": 4.181903140992754e-05, |
|
"loss": 1.1894, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 1.6590998262429237, |
|
"grad_norm": 1.0896923542022705, |
|
"learning_rate": 4.170323735878818e-05, |
|
"loss": 1.3178, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.6613418530351438, |
|
"grad_norm": 1.1696605682373047, |
|
"learning_rate": 4.1587489047630314e-05, |
|
"loss": 1.2414, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 1.6635838798273639, |
|
"grad_norm": 1.0459294319152832, |
|
"learning_rate": 4.1471787114573426e-05, |
|
"loss": 1.3447, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.6658259066195842, |
|
"grad_norm": 1.2181882858276367, |
|
"learning_rate": 4.135613219748125e-05, |
|
"loss": 1.2815, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 1.6680679334118043, |
|
"grad_norm": 1.2734391689300537, |
|
"learning_rate": 4.124052493395838e-05, |
|
"loss": 1.2832, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.6703099602040243, |
|
"grad_norm": 1.102831244468689, |
|
"learning_rate": 4.112496596134667e-05, |
|
"loss": 1.2647, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.6725519869962446, |
|
"grad_norm": 1.2140913009643555, |
|
"learning_rate": 4.100945591672173e-05, |
|
"loss": 1.258, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.674794013788465, |
|
"grad_norm": 1.0504231452941895, |
|
"learning_rate": 4.089399543688947e-05, |
|
"loss": 1.2588, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 1.6770360405806848, |
|
"grad_norm": 1.1240216493606567, |
|
"learning_rate": 4.07785851583825e-05, |
|
"loss": 1.2755, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.679278067372905, |
|
"grad_norm": 1.1572685241699219, |
|
"learning_rate": 4.066322571745673e-05, |
|
"loss": 1.2768, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 1.6815200941651254, |
|
"grad_norm": 0.9713603854179382, |
|
"learning_rate": 4.054791775008775e-05, |
|
"loss": 1.2288, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.6837621209573455, |
|
"grad_norm": 1.1444541215896606, |
|
"learning_rate": 4.043266189196741e-05, |
|
"loss": 1.2193, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 1.6860041477495655, |
|
"grad_norm": 1.0875182151794434, |
|
"learning_rate": 4.031745877850026e-05, |
|
"loss": 1.2802, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.6882461745417858, |
|
"grad_norm": 1.1171207427978516, |
|
"learning_rate": 4.02023090448001e-05, |
|
"loss": 1.307, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 1.690488201334006, |
|
"grad_norm": 0.991519570350647, |
|
"learning_rate": 4.008721332568639e-05, |
|
"loss": 1.2811, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.692730228126226, |
|
"grad_norm": 1.1010782718658447, |
|
"learning_rate": 3.9972172255680886e-05, |
|
"loss": 1.2631, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.6949722549184463, |
|
"grad_norm": 1.1108042001724243, |
|
"learning_rate": 3.985718646900402e-05, |
|
"loss": 1.3115, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.6972142817106666, |
|
"grad_norm": 1.009866714477539, |
|
"learning_rate": 3.974225659957141e-05, |
|
"loss": 1.2613, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 1.6994563085028866, |
|
"grad_norm": 1.0787150859832764, |
|
"learning_rate": 3.9627383280990474e-05, |
|
"loss": 1.353, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.7016983352951067, |
|
"grad_norm": 1.0613850355148315, |
|
"learning_rate": 3.951256714655685e-05, |
|
"loss": 1.3048, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 1.703940362087327, |
|
"grad_norm": 1.1148884296417236, |
|
"learning_rate": 3.939780882925088e-05, |
|
"loss": 1.2918, |
|
"step": 3800 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 6690, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.325845119889375e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|