|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0003621220351258, |
|
"eval_steps": 346, |
|
"global_step": 1381, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007242440702516748, |
|
"grad_norm": 0.22644081711769104, |
|
"learning_rate": 2e-05, |
|
"loss": 10.3782, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0014484881405033496, |
|
"grad_norm": 0.21413658559322357, |
|
"learning_rate": 4e-05, |
|
"loss": 10.378, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0021727322107550242, |
|
"grad_norm": 0.20930856466293335, |
|
"learning_rate": 6e-05, |
|
"loss": 10.3801, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.002896976281006699, |
|
"grad_norm": 0.21580834686756134, |
|
"learning_rate": 8e-05, |
|
"loss": 10.3772, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003621220351258374, |
|
"grad_norm": 0.22320739924907684, |
|
"learning_rate": 0.0001, |
|
"loss": 10.3771, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0043454644215100485, |
|
"grad_norm": 0.20463065803050995, |
|
"learning_rate": 0.00012, |
|
"loss": 10.3765, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.005069708491761723, |
|
"grad_norm": 0.2269536256790161, |
|
"learning_rate": 0.00014, |
|
"loss": 10.374, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.005793952562013398, |
|
"grad_norm": 0.23299536108970642, |
|
"learning_rate": 0.00016, |
|
"loss": 10.3733, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006518196632265073, |
|
"grad_norm": 0.2280343770980835, |
|
"learning_rate": 0.00018, |
|
"loss": 10.3716, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.007242440702516748, |
|
"grad_norm": 0.23425506055355072, |
|
"learning_rate": 0.0002, |
|
"loss": 10.371, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007966684772768424, |
|
"grad_norm": 0.26600638031959534, |
|
"learning_rate": 0.00019999973746050225, |
|
"loss": 10.3668, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.008690928843020097, |
|
"grad_norm": 0.2833026051521301, |
|
"learning_rate": 0.00019999894984338746, |
|
"loss": 10.3668, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.009415172913271772, |
|
"grad_norm": 0.31404921412467957, |
|
"learning_rate": 0.00019999763715279132, |
|
"loss": 10.3583, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.010139416983523447, |
|
"grad_norm": 0.3399479389190674, |
|
"learning_rate": 0.00019999579939560644, |
|
"loss": 10.3628, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.010863661053775122, |
|
"grad_norm": 0.3927531838417053, |
|
"learning_rate": 0.00019999343658148253, |
|
"loss": 10.3534, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.011587905124026797, |
|
"grad_norm": 0.43685317039489746, |
|
"learning_rate": 0.00019999054872282622, |
|
"loss": 10.3498, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.012312149194278471, |
|
"grad_norm": 0.5002313852310181, |
|
"learning_rate": 0.00019998713583480103, |
|
"loss": 10.3404, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.013036393264530146, |
|
"grad_norm": 0.5821593999862671, |
|
"learning_rate": 0.00019998319793532735, |
|
"loss": 10.3355, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.013760637334781821, |
|
"grad_norm": 0.6062823534011841, |
|
"learning_rate": 0.00019997873504508222, |
|
"loss": 10.329, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.014484881405033496, |
|
"grad_norm": 0.6338623762130737, |
|
"learning_rate": 0.0001999737471874994, |
|
"loss": 10.3277, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015209125475285171, |
|
"grad_norm": 0.7111278176307678, |
|
"learning_rate": 0.00019996823438876902, |
|
"loss": 10.3189, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.015933369545536848, |
|
"grad_norm": 0.7820473313331604, |
|
"learning_rate": 0.00019996219667783765, |
|
"loss": 10.3128, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01665761361578852, |
|
"grad_norm": 0.8882144093513489, |
|
"learning_rate": 0.00019995563408640806, |
|
"loss": 10.3005, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.017381857686040194, |
|
"grad_norm": 0.9162967205047607, |
|
"learning_rate": 0.00019994854664893906, |
|
"loss": 10.2918, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01810610175629187, |
|
"grad_norm": 0.9656468033790588, |
|
"learning_rate": 0.00019994093440264522, |
|
"loss": 10.277, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.018830345826543544, |
|
"grad_norm": 0.9979098439216614, |
|
"learning_rate": 0.00019993279738749687, |
|
"loss": 10.2707, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.01955458989679522, |
|
"grad_norm": 0.9871697425842285, |
|
"learning_rate": 0.00019992413564621985, |
|
"loss": 10.2556, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.020278833967046894, |
|
"grad_norm": 1.0156805515289307, |
|
"learning_rate": 0.00019991494922429504, |
|
"loss": 10.2469, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02100307803729857, |
|
"grad_norm": 0.9702807068824768, |
|
"learning_rate": 0.00019990523816995848, |
|
"loss": 10.2315, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.021727322107550243, |
|
"grad_norm": 0.9247342348098755, |
|
"learning_rate": 0.0001998950025342008, |
|
"loss": 10.2134, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.022451566177801918, |
|
"grad_norm": 0.8712878227233887, |
|
"learning_rate": 0.00019988424237076728, |
|
"loss": 10.1968, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.023175810248053593, |
|
"grad_norm": 0.8503078818321228, |
|
"learning_rate": 0.0001998729577361572, |
|
"loss": 10.1812, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.023900054318305268, |
|
"grad_norm": 0.8318528532981873, |
|
"learning_rate": 0.0001998611486896238, |
|
"loss": 10.1752, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.024624298388556943, |
|
"grad_norm": 0.7345334887504578, |
|
"learning_rate": 0.00019984881529317393, |
|
"loss": 10.1601, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.025348542458808618, |
|
"grad_norm": 0.7379999160766602, |
|
"learning_rate": 0.0001998359576115677, |
|
"loss": 10.1499, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.026072786529060293, |
|
"grad_norm": 0.674487829208374, |
|
"learning_rate": 0.00019982257571231804, |
|
"loss": 10.1309, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.026797030599311968, |
|
"grad_norm": 0.6472688317298889, |
|
"learning_rate": 0.00019980866966569054, |
|
"loss": 10.1182, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.027521274669563642, |
|
"grad_norm": 0.6024581789970398, |
|
"learning_rate": 0.00019979423954470286, |
|
"loss": 10.1074, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.028245518739815317, |
|
"grad_norm": 0.610586404800415, |
|
"learning_rate": 0.0001997792854251246, |
|
"loss": 10.0958, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.028969762810066992, |
|
"grad_norm": 0.5543527007102966, |
|
"learning_rate": 0.00019976380738547666, |
|
"loss": 10.0964, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.029694006880318667, |
|
"grad_norm": 0.5993051528930664, |
|
"learning_rate": 0.000199747805507031, |
|
"loss": 10.0835, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.030418250950570342, |
|
"grad_norm": 0.5518122911453247, |
|
"learning_rate": 0.0001997312798738101, |
|
"loss": 10.0662, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.031142495020822017, |
|
"grad_norm": 0.5521525144577026, |
|
"learning_rate": 0.00019971423057258664, |
|
"loss": 10.0451, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.031866739091073695, |
|
"grad_norm": 0.5692468285560608, |
|
"learning_rate": 0.00019969665769288284, |
|
"loss": 10.0466, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03259098316132537, |
|
"grad_norm": 0.5686005353927612, |
|
"learning_rate": 0.00019967856132697027, |
|
"loss": 10.0256, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03331522723157704, |
|
"grad_norm": 0.5743457078933716, |
|
"learning_rate": 0.00019965994156986912, |
|
"loss": 10.0049, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.034039471301828716, |
|
"grad_norm": 0.5410902500152588, |
|
"learning_rate": 0.0001996407985193478, |
|
"loss": 10.0077, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03476371537208039, |
|
"grad_norm": 0.5433526635169983, |
|
"learning_rate": 0.0001996211322759225, |
|
"loss": 9.9934, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.035487959442332066, |
|
"grad_norm": 0.5521161556243896, |
|
"learning_rate": 0.00019960094294285647, |
|
"loss": 9.9753, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03621220351258374, |
|
"grad_norm": 0.6015803217887878, |
|
"learning_rate": 0.00019958023062615973, |
|
"loss": 9.976, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.036936447582835416, |
|
"grad_norm": 0.5433859825134277, |
|
"learning_rate": 0.00019955899543458824, |
|
"loss": 9.9617, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03766069165308709, |
|
"grad_norm": 0.5231461524963379, |
|
"learning_rate": 0.00019953723747964355, |
|
"loss": 9.9525, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.038384935723338766, |
|
"grad_norm": 0.5396632552146912, |
|
"learning_rate": 0.00019951495687557213, |
|
"loss": 9.9383, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.03910917979359044, |
|
"grad_norm": 0.5196948051452637, |
|
"learning_rate": 0.00019949215373936475, |
|
"loss": 9.928, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.039833423863842116, |
|
"grad_norm": 0.4962172210216522, |
|
"learning_rate": 0.00019946882819075587, |
|
"loss": 9.9235, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04055766793409379, |
|
"grad_norm": 0.5424382090568542, |
|
"learning_rate": 0.00019944498035222305, |
|
"loss": 9.9033, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.041281912004345465, |
|
"grad_norm": 0.5209097266197205, |
|
"learning_rate": 0.00019942061034898626, |
|
"loss": 9.902, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04200615607459714, |
|
"grad_norm": 0.4981401562690735, |
|
"learning_rate": 0.00019939571830900735, |
|
"loss": 9.8884, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.042730400144848815, |
|
"grad_norm": 0.4946765899658203, |
|
"learning_rate": 0.0001993703043629891, |
|
"loss": 9.8841, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.04345464421510049, |
|
"grad_norm": 0.526029646396637, |
|
"learning_rate": 0.00019934436864437485, |
|
"loss": 9.8607, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.044178888285352165, |
|
"grad_norm": 0.528313159942627, |
|
"learning_rate": 0.0001993179112893476, |
|
"loss": 9.8549, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.044903132355603836, |
|
"grad_norm": 0.5022369623184204, |
|
"learning_rate": 0.00019929093243682938, |
|
"loss": 9.83, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.045627376425855515, |
|
"grad_norm": 0.5330261588096619, |
|
"learning_rate": 0.00019926343222848042, |
|
"loss": 9.8244, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.046351620496107186, |
|
"grad_norm": 0.5101606249809265, |
|
"learning_rate": 0.0001992354108086986, |
|
"loss": 9.8145, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.047075864566358865, |
|
"grad_norm": 0.5162172317504883, |
|
"learning_rate": 0.0001992068683246185, |
|
"loss": 9.8123, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.047800108636610536, |
|
"grad_norm": 0.5125104784965515, |
|
"learning_rate": 0.0001991778049261107, |
|
"loss": 9.7947, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.048524352706862214, |
|
"grad_norm": 0.5619991421699524, |
|
"learning_rate": 0.00019914822076578097, |
|
"loss": 9.773, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.049248596777113886, |
|
"grad_norm": 0.5185785889625549, |
|
"learning_rate": 0.0001991181159989696, |
|
"loss": 9.7618, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.049972840847365564, |
|
"grad_norm": 0.4937543272972107, |
|
"learning_rate": 0.0001990874907837503, |
|
"loss": 9.7543, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.050697084917617236, |
|
"grad_norm": 0.5016326904296875, |
|
"learning_rate": 0.00019905634528092972, |
|
"loss": 9.7509, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.051421328987868914, |
|
"grad_norm": 0.5426475405693054, |
|
"learning_rate": 0.0001990246796540463, |
|
"loss": 9.7287, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.052145573058120585, |
|
"grad_norm": 0.5001341700553894, |
|
"learning_rate": 0.00019899249406936964, |
|
"loss": 9.7159, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.052869817128372264, |
|
"grad_norm": 0.5133237242698669, |
|
"learning_rate": 0.00019895978869589946, |
|
"loss": 9.7129, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.053594061198623935, |
|
"grad_norm": 0.5022423267364502, |
|
"learning_rate": 0.00019892656370536482, |
|
"loss": 9.6986, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.054318305268875614, |
|
"grad_norm": 0.5012578368186951, |
|
"learning_rate": 0.0001988928192722231, |
|
"loss": 9.6968, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.055042549339127285, |
|
"grad_norm": 0.5184592008590698, |
|
"learning_rate": 0.00019885855557365937, |
|
"loss": 9.6754, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.05576679340937896, |
|
"grad_norm": 0.49060916900634766, |
|
"learning_rate": 0.000198823772789585, |
|
"loss": 9.6662, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.056491037479630635, |
|
"grad_norm": 0.5203105211257935, |
|
"learning_rate": 0.0001987884711026371, |
|
"loss": 9.6577, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.05721528154988231, |
|
"grad_norm": 0.5055034160614014, |
|
"learning_rate": 0.00019875265069817743, |
|
"loss": 9.6392, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.057939525620133984, |
|
"grad_norm": 0.5237613320350647, |
|
"learning_rate": 0.00019871631176429145, |
|
"loss": 9.6415, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05866376969038566, |
|
"grad_norm": 0.5136992931365967, |
|
"learning_rate": 0.0001986794544917872, |
|
"loss": 9.6265, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.059388013760637334, |
|
"grad_norm": 0.5193312168121338, |
|
"learning_rate": 0.00019864207907419447, |
|
"loss": 9.6138, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06011225783088901, |
|
"grad_norm": 0.5376698970794678, |
|
"learning_rate": 0.0001986041857077638, |
|
"loss": 9.6057, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.060836501901140684, |
|
"grad_norm": 0.52781081199646, |
|
"learning_rate": 0.00019856577459146526, |
|
"loss": 9.5839, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06156074597139236, |
|
"grad_norm": 0.5407407879829407, |
|
"learning_rate": 0.00019852684592698756, |
|
"loss": 9.5891, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.062284990041644034, |
|
"grad_norm": 0.5036855936050415, |
|
"learning_rate": 0.0001984873999187369, |
|
"loss": 9.5722, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06300923411189571, |
|
"grad_norm": 0.5385954976081848, |
|
"learning_rate": 0.00019844743677383604, |
|
"loss": 9.5572, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06373347818214739, |
|
"grad_norm": 0.5041705965995789, |
|
"learning_rate": 0.00019840695670212302, |
|
"loss": 9.5528, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.06445772225239906, |
|
"grad_norm": 0.5461394190788269, |
|
"learning_rate": 0.00019836595991615022, |
|
"loss": 9.5181, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.06518196632265073, |
|
"grad_norm": 0.5438934564590454, |
|
"learning_rate": 0.00019832444663118315, |
|
"loss": 9.5303, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06590621039290241, |
|
"grad_norm": 0.5632613301277161, |
|
"learning_rate": 0.00019828241706519934, |
|
"loss": 9.5119, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.06663045446315408, |
|
"grad_norm": 0.4781850278377533, |
|
"learning_rate": 0.0001982398714388872, |
|
"loss": 9.5208, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.06735469853340575, |
|
"grad_norm": 0.5311410427093506, |
|
"learning_rate": 0.00019819680997564492, |
|
"loss": 9.5111, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.06807894260365743, |
|
"grad_norm": 0.5310368537902832, |
|
"learning_rate": 0.00019815323290157916, |
|
"loss": 9.4963, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.06880318667390911, |
|
"grad_norm": 0.5059214234352112, |
|
"learning_rate": 0.000198109140445504, |
|
"loss": 9.4878, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06952743074416078, |
|
"grad_norm": 0.517311155796051, |
|
"learning_rate": 0.00019806453283893963, |
|
"loss": 9.478, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07025167481441245, |
|
"grad_norm": 0.5341803431510925, |
|
"learning_rate": 0.00019801941031611126, |
|
"loss": 9.4307, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07097591888466413, |
|
"grad_norm": 0.5468002557754517, |
|
"learning_rate": 0.0001979737731139478, |
|
"loss": 9.462, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07170016295491581, |
|
"grad_norm": 0.5668586492538452, |
|
"learning_rate": 0.00019792762147208056, |
|
"loss": 9.4286, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07242440702516748, |
|
"grad_norm": 0.6626901626586914, |
|
"learning_rate": 0.00019788095563284217, |
|
"loss": 9.4239, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07314865109541915, |
|
"grad_norm": 0.5568472743034363, |
|
"learning_rate": 0.00019783377584126508, |
|
"loss": 9.4093, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.07387289516567083, |
|
"grad_norm": 0.5283701419830322, |
|
"learning_rate": 0.00019778608234508055, |
|
"loss": 9.4273, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07459713923592251, |
|
"grad_norm": 0.5079399943351746, |
|
"learning_rate": 0.00019773787539471705, |
|
"loss": 9.3889, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.07532138330617417, |
|
"grad_norm": 0.5189304351806641, |
|
"learning_rate": 0.00019768915524329917, |
|
"loss": 9.3884, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07604562737642585, |
|
"grad_norm": 0.504858136177063, |
|
"learning_rate": 0.00019763992214664615, |
|
"loss": 9.4027, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07676987144667753, |
|
"grad_norm": 0.5144493579864502, |
|
"learning_rate": 0.00019759017636327073, |
|
"loss": 9.3547, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.07749411551692921, |
|
"grad_norm": 0.49943238496780396, |
|
"learning_rate": 0.0001975399181543775, |
|
"loss": 9.3584, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.07821835958718087, |
|
"grad_norm": 0.5128799080848694, |
|
"learning_rate": 0.0001974891477838618, |
|
"loss": 9.3521, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.07894260365743255, |
|
"grad_norm": 0.5011847019195557, |
|
"learning_rate": 0.00019743786551830813, |
|
"loss": 9.3468, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.07966684772768423, |
|
"grad_norm": 0.5017261505126953, |
|
"learning_rate": 0.00019738607162698895, |
|
"loss": 9.3335, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08039109179793591, |
|
"grad_norm": 0.5072489976882935, |
|
"learning_rate": 0.00019733376638186308, |
|
"loss": 9.3045, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.08111533586818757, |
|
"grad_norm": 0.5118771195411682, |
|
"learning_rate": 0.00019728095005757434, |
|
"loss": 9.3186, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.08183957993843925, |
|
"grad_norm": 0.5291309356689453, |
|
"learning_rate": 0.0001972276229314502, |
|
"loss": 9.3077, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.08256382400869093, |
|
"grad_norm": 0.49927669763565063, |
|
"learning_rate": 0.00019717378528350023, |
|
"loss": 9.2945, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.08328806807894261, |
|
"grad_norm": 0.5379682183265686, |
|
"learning_rate": 0.00019711943739641452, |
|
"loss": 9.2844, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08401231214919427, |
|
"grad_norm": 0.5146782398223877, |
|
"learning_rate": 0.00019706457955556247, |
|
"loss": 9.2589, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.08473655621944595, |
|
"grad_norm": 0.5160855054855347, |
|
"learning_rate": 0.0001970092120489911, |
|
"loss": 9.2522, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08546080028969763, |
|
"grad_norm": 0.5257864594459534, |
|
"learning_rate": 0.0001969533351674235, |
|
"loss": 9.2622, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.08618504435994931, |
|
"grad_norm": 0.4924103617668152, |
|
"learning_rate": 0.00019689694920425746, |
|
"loss": 9.2382, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.08690928843020097, |
|
"grad_norm": 0.5249960422515869, |
|
"learning_rate": 0.00019684005445556383, |
|
"loss": 9.2173, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08763353250045265, |
|
"grad_norm": 0.5177789330482483, |
|
"learning_rate": 0.000196782651220085, |
|
"loss": 9.2171, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.08835777657070433, |
|
"grad_norm": 0.5228692889213562, |
|
"learning_rate": 0.0001967247397992333, |
|
"loss": 9.2168, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.08908202064095601, |
|
"grad_norm": 0.5426861047744751, |
|
"learning_rate": 0.00019666632049708942, |
|
"loss": 9.1786, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.08980626471120767, |
|
"grad_norm": 0.5005182027816772, |
|
"learning_rate": 0.00019660739362040082, |
|
"loss": 9.1994, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09053050878145935, |
|
"grad_norm": 0.516734778881073, |
|
"learning_rate": 0.00019654795947858023, |
|
"loss": 9.172, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09125475285171103, |
|
"grad_norm": 0.5049043297767639, |
|
"learning_rate": 0.00019648801838370377, |
|
"loss": 9.1714, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.09197899692196271, |
|
"grad_norm": 0.5162491202354431, |
|
"learning_rate": 0.00019642757065050956, |
|
"loss": 9.149, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.09270324099221437, |
|
"grad_norm": 0.49220365285873413, |
|
"learning_rate": 0.000196366616596396, |
|
"loss": 9.164, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.09342748506246605, |
|
"grad_norm": 0.508711576461792, |
|
"learning_rate": 0.00019630515654141996, |
|
"loss": 9.1311, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.09415172913271773, |
|
"grad_norm": 0.5290361642837524, |
|
"learning_rate": 0.0001962431908082953, |
|
"loss": 9.1079, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09487597320296941, |
|
"grad_norm": 0.5396744608879089, |
|
"learning_rate": 0.00019618071972239107, |
|
"loss": 9.115, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.09560021727322107, |
|
"grad_norm": 0.49481114745140076, |
|
"learning_rate": 0.0001961177436117298, |
|
"loss": 9.1326, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.09632446134347275, |
|
"grad_norm": 0.4906870126724243, |
|
"learning_rate": 0.0001960542628069859, |
|
"loss": 9.1192, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.09704870541372443, |
|
"grad_norm": 0.5055575370788574, |
|
"learning_rate": 0.00019599027764148367, |
|
"loss": 9.1099, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.0977729494839761, |
|
"grad_norm": 0.5195087790489197, |
|
"learning_rate": 0.00019592578845119575, |
|
"loss": 9.0824, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09849719355422777, |
|
"grad_norm": 0.48597005009651184, |
|
"learning_rate": 0.0001958607955747414, |
|
"loss": 9.0973, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.09922143762447945, |
|
"grad_norm": 0.5294395089149475, |
|
"learning_rate": 0.0001957952993533845, |
|
"loss": 9.0725, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.09994568169473113, |
|
"grad_norm": 0.5333034992218018, |
|
"learning_rate": 0.00019572930013103202, |
|
"loss": 9.0177, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.10066992576498279, |
|
"grad_norm": 0.5079728960990906, |
|
"learning_rate": 0.00019566279825423196, |
|
"loss": 9.0231, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.10139416983523447, |
|
"grad_norm": 0.5101923942565918, |
|
"learning_rate": 0.00019559579407217172, |
|
"loss": 9.0478, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10211841390548615, |
|
"grad_norm": 0.5175127387046814, |
|
"learning_rate": 0.0001955282879366762, |
|
"loss": 9.0333, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.10284265797573783, |
|
"grad_norm": 0.5011202692985535, |
|
"learning_rate": 0.00019546028020220595, |
|
"loss": 9.0051, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.10356690204598949, |
|
"grad_norm": 0.5366819500923157, |
|
"learning_rate": 0.00019539177122585523, |
|
"loss": 9.014, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.10429114611624117, |
|
"grad_norm": 0.5233102440834045, |
|
"learning_rate": 0.00019532276136735038, |
|
"loss": 8.9821, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.10501539018649285, |
|
"grad_norm": 0.5242369771003723, |
|
"learning_rate": 0.00019525325098904757, |
|
"loss": 9.0231, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.10573963425674453, |
|
"grad_norm": 0.5241793394088745, |
|
"learning_rate": 0.00019518324045593132, |
|
"loss": 8.9769, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.10646387832699619, |
|
"grad_norm": 0.5393111109733582, |
|
"learning_rate": 0.0001951127301356121, |
|
"loss": 9.0189, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.10718812239724787, |
|
"grad_norm": 0.5141487717628479, |
|
"learning_rate": 0.00019504172039832492, |
|
"loss": 8.9846, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.10791236646749955, |
|
"grad_norm": 0.5347380042076111, |
|
"learning_rate": 0.00019497021161692687, |
|
"loss": 8.9623, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.10863661053775123, |
|
"grad_norm": 0.5859756469726562, |
|
"learning_rate": 0.00019489820416689565, |
|
"loss": 8.9612, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10936085460800289, |
|
"grad_norm": 0.5630178451538086, |
|
"learning_rate": 0.0001948256984263272, |
|
"loss": 8.9316, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.11008509867825457, |
|
"grad_norm": 0.5126703381538391, |
|
"learning_rate": 0.00019475269477593394, |
|
"loss": 8.9421, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.11080934274850625, |
|
"grad_norm": 0.5410084128379822, |
|
"learning_rate": 0.0001946791935990427, |
|
"loss": 8.9093, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.11153358681875793, |
|
"grad_norm": 0.500529408454895, |
|
"learning_rate": 0.00019460519528159275, |
|
"loss": 8.9258, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.11225783088900959, |
|
"grad_norm": 0.505597710609436, |
|
"learning_rate": 0.00019453070021213366, |
|
"loss": 8.9067, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.11298207495926127, |
|
"grad_norm": 0.5078559517860413, |
|
"learning_rate": 0.00019445570878182342, |
|
"loss": 8.8985, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.11370631902951295, |
|
"grad_norm": 0.48962968587875366, |
|
"learning_rate": 0.0001943802213844263, |
|
"loss": 8.8889, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.11443056309976463, |
|
"grad_norm": 0.521984338760376, |
|
"learning_rate": 0.00019430423841631074, |
|
"loss": 8.8726, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.11515480717001629, |
|
"grad_norm": 0.510870635509491, |
|
"learning_rate": 0.00019422776027644737, |
|
"loss": 8.8657, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.11587905124026797, |
|
"grad_norm": 0.48674073815345764, |
|
"learning_rate": 0.0001941507873664068, |
|
"loss": 8.8745, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11660329531051965, |
|
"grad_norm": 0.5165760517120361, |
|
"learning_rate": 0.00019407332009035769, |
|
"loss": 8.8614, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.11732753938077133, |
|
"grad_norm": 0.5049470067024231, |
|
"learning_rate": 0.00019399535885506432, |
|
"loss": 8.8781, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.11805178345102299, |
|
"grad_norm": 0.49627047777175903, |
|
"learning_rate": 0.00019391690406988485, |
|
"loss": 8.832, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.11877602752127467, |
|
"grad_norm": 0.5152491331100464, |
|
"learning_rate": 0.00019383795614676886, |
|
"loss": 8.852, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.11950027159152635, |
|
"grad_norm": 0.49169403314590454, |
|
"learning_rate": 0.00019375851550025529, |
|
"loss": 8.8621, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.12022451566177803, |
|
"grad_norm": 0.5024704337120056, |
|
"learning_rate": 0.00019367858254747028, |
|
"loss": 8.8057, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.12094875973202969, |
|
"grad_norm": 0.5297830104827881, |
|
"learning_rate": 0.00019359815770812503, |
|
"loss": 8.7918, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.12167300380228137, |
|
"grad_norm": 0.5009456276893616, |
|
"learning_rate": 0.00019351724140451344, |
|
"loss": 8.795, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.12239724787253305, |
|
"grad_norm": 0.49791526794433594, |
|
"learning_rate": 0.00019343583406151004, |
|
"loss": 8.7908, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.12312149194278472, |
|
"grad_norm": 0.4869917035102844, |
|
"learning_rate": 0.00019335393610656767, |
|
"loss": 8.8117, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12384573601303639, |
|
"grad_norm": 0.4747772216796875, |
|
"learning_rate": 0.00019327154796971527, |
|
"loss": 8.7738, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.12456998008328807, |
|
"grad_norm": 0.49815577268600464, |
|
"learning_rate": 0.0001931886700835557, |
|
"loss": 8.7769, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.12529422415353975, |
|
"grad_norm": 0.4809548556804657, |
|
"learning_rate": 0.00019310530288326329, |
|
"loss": 8.7769, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.12601846822379142, |
|
"grad_norm": 0.519578218460083, |
|
"learning_rate": 0.00019302144680658173, |
|
"loss": 8.7652, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.1267427122940431, |
|
"grad_norm": 0.49192696809768677, |
|
"learning_rate": 0.0001929371022938216, |
|
"loss": 8.7399, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12746695636429478, |
|
"grad_norm": 0.5109142661094666, |
|
"learning_rate": 0.00019285226978785832, |
|
"loss": 8.7374, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.12819120043454643, |
|
"grad_norm": 0.48110830783843994, |
|
"learning_rate": 0.00019276694973412948, |
|
"loss": 8.7524, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.1289154445047981, |
|
"grad_norm": 0.511137843132019, |
|
"learning_rate": 0.0001926811425806328, |
|
"loss": 8.7089, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.1296396885750498, |
|
"grad_norm": 0.49463972449302673, |
|
"learning_rate": 0.00019259484877792358, |
|
"loss": 8.7089, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.13036393264530147, |
|
"grad_norm": 0.4988570213317871, |
|
"learning_rate": 0.00019250806877911249, |
|
"loss": 8.7283, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13108817671555315, |
|
"grad_norm": 0.48762744665145874, |
|
"learning_rate": 0.00019242080303986305, |
|
"loss": 8.6905, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.13181242078580482, |
|
"grad_norm": 0.5100088119506836, |
|
"learning_rate": 0.00019233305201838937, |
|
"loss": 8.6756, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.1325366648560565, |
|
"grad_norm": 0.502967119216919, |
|
"learning_rate": 0.00019224481617545358, |
|
"loss": 8.6831, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.13326090892630815, |
|
"grad_norm": 0.4762354791164398, |
|
"learning_rate": 0.00019215609597436362, |
|
"loss": 8.6856, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.13398515299655983, |
|
"grad_norm": 0.5262447595596313, |
|
"learning_rate": 0.00019206689188097054, |
|
"loss": 8.6393, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1347093970668115, |
|
"grad_norm": 0.5070264339447021, |
|
"learning_rate": 0.00019197720436366637, |
|
"loss": 8.628, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.1354336411370632, |
|
"grad_norm": 0.4977290630340576, |
|
"learning_rate": 0.00019188703389338142, |
|
"loss": 8.6412, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.13615788520731487, |
|
"grad_norm": 0.5031901001930237, |
|
"learning_rate": 0.00019179638094358187, |
|
"loss": 8.6122, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.13688212927756654, |
|
"grad_norm": 0.5076611638069153, |
|
"learning_rate": 0.00019170524599026732, |
|
"loss": 8.6475, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.13760637334781822, |
|
"grad_norm": 0.5410529971122742, |
|
"learning_rate": 0.00019161362951196825, |
|
"loss": 8.6091, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1383306174180699, |
|
"grad_norm": 0.5248331427574158, |
|
"learning_rate": 0.0001915215319897436, |
|
"loss": 8.6022, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.13905486148832155, |
|
"grad_norm": 0.5243874788284302, |
|
"learning_rate": 0.00019142895390717804, |
|
"loss": 8.6125, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.13977910555857323, |
|
"grad_norm": 0.597634494304657, |
|
"learning_rate": 0.0001913358957503797, |
|
"loss": 8.6093, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.1405033496288249, |
|
"grad_norm": 0.528044581413269, |
|
"learning_rate": 0.0001912423580079774, |
|
"loss": 8.6003, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.1412275936990766, |
|
"grad_norm": 0.5066649913787842, |
|
"learning_rate": 0.00019114834117111814, |
|
"loss": 8.5842, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.14195183776932827, |
|
"grad_norm": 0.49779024720191956, |
|
"learning_rate": 0.00019105384573346463, |
|
"loss": 8.6286, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.14267608183957994, |
|
"grad_norm": 0.5297601222991943, |
|
"learning_rate": 0.00019095887219119256, |
|
"loss": 8.5814, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.14340032590983162, |
|
"grad_norm": 0.5315724015235901, |
|
"learning_rate": 0.000190863421042988, |
|
"loss": 8.5826, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.1441245699800833, |
|
"grad_norm": 0.5855052471160889, |
|
"learning_rate": 0.00019076749279004496, |
|
"loss": 8.5369, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.14484881405033495, |
|
"grad_norm": 0.5880944728851318, |
|
"learning_rate": 0.0001906710879360625, |
|
"loss": 8.5681, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14557305812058663, |
|
"grad_norm": 0.5085429549217224, |
|
"learning_rate": 0.00019057420698724223, |
|
"loss": 8.5739, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.1462973021908383, |
|
"grad_norm": 0.541529655456543, |
|
"learning_rate": 0.00019047685045228569, |
|
"loss": 8.5405, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.14702154626108999, |
|
"grad_norm": 0.5351887941360474, |
|
"learning_rate": 0.0001903790188423916, |
|
"loss": 8.5307, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.14774579033134166, |
|
"grad_norm": 0.4913345277309418, |
|
"learning_rate": 0.00019028071267125323, |
|
"loss": 8.5303, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.14847003440159334, |
|
"grad_norm": 0.476534903049469, |
|
"learning_rate": 0.0001901819324550556, |
|
"loss": 8.546, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.14919427847184502, |
|
"grad_norm": 0.511883020401001, |
|
"learning_rate": 0.00019008267871247286, |
|
"loss": 8.523, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.1499185225420967, |
|
"grad_norm": 0.5081936717033386, |
|
"learning_rate": 0.0001899829519646656, |
|
"loss": 8.5286, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.15064276661234835, |
|
"grad_norm": 0.4755452275276184, |
|
"learning_rate": 0.000189882752735278, |
|
"loss": 8.5464, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.15136701068260003, |
|
"grad_norm": 0.5027110576629639, |
|
"learning_rate": 0.0001897820815504352, |
|
"loss": 8.5272, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.1520912547528517, |
|
"grad_norm": 0.5083355903625488, |
|
"learning_rate": 0.0001896809389387404, |
|
"loss": 8.4798, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15281549882310339, |
|
"grad_norm": 0.5499453544616699, |
|
"learning_rate": 0.00018957932543127226, |
|
"loss": 8.5331, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.15353974289335506, |
|
"grad_norm": 0.51893150806427, |
|
"learning_rate": 0.00018947724156158192, |
|
"loss": 8.4911, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.15426398696360674, |
|
"grad_norm": 0.5075845718383789, |
|
"learning_rate": 0.00018937468786569034, |
|
"loss": 8.4956, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.15498823103385842, |
|
"grad_norm": 0.5007838606834412, |
|
"learning_rate": 0.00018927166488208548, |
|
"loss": 8.4777, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.1557124751041101, |
|
"grad_norm": 0.5123993158340454, |
|
"learning_rate": 0.00018916817315171934, |
|
"loss": 8.4535, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.15643671917436175, |
|
"grad_norm": 0.5103585124015808, |
|
"learning_rate": 0.00018906421321800528, |
|
"loss": 8.4328, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.15716096324461343, |
|
"grad_norm": 0.49165982007980347, |
|
"learning_rate": 0.00018895978562681506, |
|
"loss": 8.4659, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.1578852073148651, |
|
"grad_norm": 0.5067943930625916, |
|
"learning_rate": 0.00018885489092647606, |
|
"loss": 8.4503, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.15860945138511678, |
|
"grad_norm": 0.5133797526359558, |
|
"learning_rate": 0.0001887495296677683, |
|
"loss": 8.4473, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.15933369545536846, |
|
"grad_norm": 0.4692786633968353, |
|
"learning_rate": 0.0001886437024039216, |
|
"loss": 8.4552, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16005793952562014, |
|
"grad_norm": 0.4994044005870819, |
|
"learning_rate": 0.00018853740969061272, |
|
"loss": 8.4293, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.16078218359587182, |
|
"grad_norm": 0.5162525773048401, |
|
"learning_rate": 0.00018843065208596236, |
|
"loss": 8.3714, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.16150642766612347, |
|
"grad_norm": 0.48502400517463684, |
|
"learning_rate": 0.00018832343015053228, |
|
"loss": 8.4212, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.16223067173637515, |
|
"grad_norm": 0.48316872119903564, |
|
"learning_rate": 0.00018821574444732235, |
|
"loss": 8.3908, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.16295491580662683, |
|
"grad_norm": 0.5301130414009094, |
|
"learning_rate": 0.0001881075955417676, |
|
"loss": 8.4019, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1636791598768785, |
|
"grad_norm": 0.5196858048439026, |
|
"learning_rate": 0.0001879989840017351, |
|
"loss": 8.3669, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.16440340394713018, |
|
"grad_norm": 0.4746645390987396, |
|
"learning_rate": 0.0001878899103975214, |
|
"loss": 8.3958, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.16512764801738186, |
|
"grad_norm": 0.4931572675704956, |
|
"learning_rate": 0.000187780375301849, |
|
"loss": 8.3942, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.16585189208763354, |
|
"grad_norm": 0.5066322684288025, |
|
"learning_rate": 0.00018767037928986367, |
|
"loss": 8.3708, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.16657613615788522, |
|
"grad_norm": 0.46926993131637573, |
|
"learning_rate": 0.00018755992293913135, |
|
"loss": 8.3315, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.16730038022813687, |
|
"grad_norm": 0.4935818314552307, |
|
"learning_rate": 0.00018744900682963523, |
|
"loss": 8.3768, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.16802462429838855, |
|
"grad_norm": 0.5058098435401917, |
|
"learning_rate": 0.0001873376315437724, |
|
"loss": 8.332, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.16874886836864023, |
|
"grad_norm": 0.4851613938808441, |
|
"learning_rate": 0.00018722579766635117, |
|
"loss": 8.3578, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.1694731124388919, |
|
"grad_norm": 0.4681190550327301, |
|
"learning_rate": 0.00018711350578458767, |
|
"loss": 8.3757, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.17019735650914358, |
|
"grad_norm": 0.48669615387916565, |
|
"learning_rate": 0.00018700075648810303, |
|
"loss": 8.3425, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.17092160057939526, |
|
"grad_norm": 0.4987237751483917, |
|
"learning_rate": 0.00018688755036892012, |
|
"loss": 8.3175, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.17164584464964694, |
|
"grad_norm": 0.4954805076122284, |
|
"learning_rate": 0.0001867738880214605, |
|
"loss": 8.3055, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.17237008871989862, |
|
"grad_norm": 0.5057269930839539, |
|
"learning_rate": 0.00018665977004254125, |
|
"loss": 8.3266, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.17309433279015027, |
|
"grad_norm": 0.5142120122909546, |
|
"learning_rate": 0.00018654519703137191, |
|
"loss": 8.2739, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.17381857686040195, |
|
"grad_norm": 0.49817487597465515, |
|
"learning_rate": 0.00018643016958955135, |
|
"loss": 8.292, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.17454282093065362, |
|
"grad_norm": 0.49923181533813477, |
|
"learning_rate": 0.00018631468832106446, |
|
"loss": 8.2962, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.1752670650009053, |
|
"grad_norm": 0.5132392048835754, |
|
"learning_rate": 0.00018619875383227912, |
|
"loss": 8.3461, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.17599130907115698, |
|
"grad_norm": 0.5324509739875793, |
|
"learning_rate": 0.000186082366731943, |
|
"loss": 8.2242, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.17671555314140866, |
|
"grad_norm": 0.5011392831802368, |
|
"learning_rate": 0.0001859655276311803, |
|
"loss": 8.2918, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.17743979721166034, |
|
"grad_norm": 0.5092015862464905, |
|
"learning_rate": 0.0001858482371434886, |
|
"loss": 8.2892, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.17816404128191202, |
|
"grad_norm": 0.531007707118988, |
|
"learning_rate": 0.00018573049588473564, |
|
"loss": 8.2796, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.17888828535216367, |
|
"grad_norm": 0.5116980075836182, |
|
"learning_rate": 0.00018561230447315604, |
|
"loss": 8.2792, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.17961252942241535, |
|
"grad_norm": 0.5366231203079224, |
|
"learning_rate": 0.000185493663529348, |
|
"loss": 8.2506, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.18033677349266702, |
|
"grad_norm": 0.595176637172699, |
|
"learning_rate": 0.0001853745736762703, |
|
"loss": 8.2615, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.1810610175629187, |
|
"grad_norm": 0.5762497186660767, |
|
"learning_rate": 0.0001852550355392387, |
|
"loss": 8.2754, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18178526163317038, |
|
"grad_norm": 0.5076980590820312, |
|
"learning_rate": 0.00018513504974592283, |
|
"loss": 8.2569, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.18250950570342206, |
|
"grad_norm": 0.5133912563323975, |
|
"learning_rate": 0.0001850146169263429, |
|
"loss": 8.2293, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.18323374977367374, |
|
"grad_norm": 0.5173690915107727, |
|
"learning_rate": 0.00018489373771286637, |
|
"loss": 8.2451, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.18395799384392542, |
|
"grad_norm": 0.5147438645362854, |
|
"learning_rate": 0.00018477241274020458, |
|
"loss": 8.1972, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.18468223791417707, |
|
"grad_norm": 0.48920467495918274, |
|
"learning_rate": 0.00018465064264540945, |
|
"loss": 8.2376, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.18540648198442874, |
|
"grad_norm": 0.4959597587585449, |
|
"learning_rate": 0.00018452842806787026, |
|
"loss": 8.2552, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.18613072605468042, |
|
"grad_norm": 0.48315390944480896, |
|
"learning_rate": 0.00018440576964930998, |
|
"loss": 8.2317, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.1868549701249321, |
|
"grad_norm": 0.5170494914054871, |
|
"learning_rate": 0.00018428266803378226, |
|
"loss": 8.2036, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.18757921419518378, |
|
"grad_norm": 0.4896693229675293, |
|
"learning_rate": 0.00018415912386766781, |
|
"loss": 8.2285, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.18830345826543546, |
|
"grad_norm": 0.5025380849838257, |
|
"learning_rate": 0.00018403513779967115, |
|
"loss": 8.2247, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.18902770233568714, |
|
"grad_norm": 0.4876105785369873, |
|
"learning_rate": 0.000183910710480817, |
|
"loss": 8.2347, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.18975194640593882, |
|
"grad_norm": 0.4978480041027069, |
|
"learning_rate": 0.00018378584256444712, |
|
"loss": 8.1992, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.19047619047619047, |
|
"grad_norm": 0.47433796525001526, |
|
"learning_rate": 0.00018366053470621668, |
|
"loss": 8.2212, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.19120043454644214, |
|
"grad_norm": 0.4850807785987854, |
|
"learning_rate": 0.00018353478756409096, |
|
"loss": 8.2226, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.19192467861669382, |
|
"grad_norm": 0.5015024542808533, |
|
"learning_rate": 0.00018340860179834177, |
|
"loss": 8.1905, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.1926489226869455, |
|
"grad_norm": 0.5145589113235474, |
|
"learning_rate": 0.00018328197807154407, |
|
"loss": 8.1793, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.19337316675719718, |
|
"grad_norm": 0.48222506046295166, |
|
"learning_rate": 0.00018315491704857246, |
|
"loss": 8.1716, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.19409741082744886, |
|
"grad_norm": 0.5012249946594238, |
|
"learning_rate": 0.00018302741939659763, |
|
"loss": 8.1869, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.19482165489770054, |
|
"grad_norm": 0.494550496339798, |
|
"learning_rate": 0.00018289948578508307, |
|
"loss": 8.1733, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.1955458989679522, |
|
"grad_norm": 0.51914381980896, |
|
"learning_rate": 0.00018277111688578122, |
|
"loss": 8.1836, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.19627014303820386, |
|
"grad_norm": 0.5137649774551392, |
|
"learning_rate": 0.00018264231337273022, |
|
"loss": 8.1376, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.19699438710845554, |
|
"grad_norm": 0.4887925982475281, |
|
"learning_rate": 0.0001825130759222503, |
|
"loss": 8.1645, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.19771863117870722, |
|
"grad_norm": 0.48226258158683777, |
|
"learning_rate": 0.0001823834052129401, |
|
"loss": 8.1425, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.1984428752489589, |
|
"grad_norm": 0.4919883906841278, |
|
"learning_rate": 0.00018225330192567335, |
|
"loss": 8.1622, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.19916711931921058, |
|
"grad_norm": 0.49928978085517883, |
|
"learning_rate": 0.00018212276674359508, |
|
"loss": 8.1414, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.19989136338946226, |
|
"grad_norm": 0.5267078280448914, |
|
"learning_rate": 0.00018199180035211805, |
|
"loss": 8.0985, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.20061560745971393, |
|
"grad_norm": 0.5004377365112305, |
|
"learning_rate": 0.0001818604034389193, |
|
"loss": 8.1365, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.20133985152996559, |
|
"grad_norm": 0.5070396661758423, |
|
"learning_rate": 0.00018172857669393645, |
|
"loss": 8.1672, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.20206409560021726, |
|
"grad_norm": 0.49480506777763367, |
|
"learning_rate": 0.000181596320809364, |
|
"loss": 8.1374, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.20278833967046894, |
|
"grad_norm": 0.4668210446834564, |
|
"learning_rate": 0.0001814636364796499, |
|
"loss": 8.1812, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.20351258374072062, |
|
"grad_norm": 0.49613460898399353, |
|
"learning_rate": 0.00018133052440149163, |
|
"loss": 8.1357, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.2042368278109723, |
|
"grad_norm": 0.5054727792739868, |
|
"learning_rate": 0.00018119698527383274, |
|
"loss": 8.0969, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.20496107188122398, |
|
"grad_norm": 0.49716633558273315, |
|
"learning_rate": 0.0001810630197978592, |
|
"loss": 8.1176, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.20568531595147566, |
|
"grad_norm": 0.491693377494812, |
|
"learning_rate": 0.00018092862867699557, |
|
"loss": 8.118, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.20640956002172733, |
|
"grad_norm": 0.498813658952713, |
|
"learning_rate": 0.00018079381261690134, |
|
"loss": 8.0726, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.20713380409197898, |
|
"grad_norm": 0.5227881669998169, |
|
"learning_rate": 0.00018065857232546736, |
|
"loss": 8.0672, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.20785804816223066, |
|
"grad_norm": 0.4753732979297638, |
|
"learning_rate": 0.00018052290851281204, |
|
"loss": 8.06, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.20858229223248234, |
|
"grad_norm": 0.510330080986023, |
|
"learning_rate": 0.0001803868218912775, |
|
"loss": 8.0716, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.20930653630273402, |
|
"grad_norm": 0.5118574500083923, |
|
"learning_rate": 0.000180250313175426, |
|
"loss": 8.1125, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.2100307803729857, |
|
"grad_norm": 0.5137168169021606, |
|
"learning_rate": 0.00018011338308203623, |
|
"loss": 8.0358, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21075502444323738, |
|
"grad_norm": 0.48737525939941406, |
|
"learning_rate": 0.00017997603233009922, |
|
"loss": 8.1156, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.21147926851348905, |
|
"grad_norm": 0.5204469561576843, |
|
"learning_rate": 0.00017983826164081503, |
|
"loss": 8.0484, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.21220351258374073, |
|
"grad_norm": 0.4880678951740265, |
|
"learning_rate": 0.00017970007173758856, |
|
"loss": 8.0651, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.21292775665399238, |
|
"grad_norm": 0.47253793478012085, |
|
"learning_rate": 0.00017956146334602595, |
|
"loss": 8.0648, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.21365200072424406, |
|
"grad_norm": 0.5147983431816101, |
|
"learning_rate": 0.00017942243719393076, |
|
"loss": 8.0625, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.21437624479449574, |
|
"grad_norm": 0.5061482191085815, |
|
"learning_rate": 0.00017928299401130012, |
|
"loss": 8.059, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.21510048886474742, |
|
"grad_norm": 0.5227460265159607, |
|
"learning_rate": 0.00017914313453032093, |
|
"loss": 8.0338, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.2158247329349991, |
|
"grad_norm": 0.5411704182624817, |
|
"learning_rate": 0.00017900285948536587, |
|
"loss": 8.0671, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.21654897700525078, |
|
"grad_norm": 0.5224537253379822, |
|
"learning_rate": 0.00017886216961298981, |
|
"loss": 8.0801, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.21727322107550245, |
|
"grad_norm": 0.5319260954856873, |
|
"learning_rate": 0.00017872106565192567, |
|
"loss": 8.1099, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.21799746514575413, |
|
"grad_norm": 0.5229703783988953, |
|
"learning_rate": 0.00017857954834308074, |
|
"loss": 8.0345, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.21872170921600578, |
|
"grad_norm": 0.4948280453681946, |
|
"learning_rate": 0.0001784376184295327, |
|
"loss": 8.0936, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.21944595328625746, |
|
"grad_norm": 0.4888227581977844, |
|
"learning_rate": 0.00017829527665652562, |
|
"loss": 8.062, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.22017019735650914, |
|
"grad_norm": 0.49134767055511475, |
|
"learning_rate": 0.00017815252377146638, |
|
"loss": 8.0392, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.22089444142676082, |
|
"grad_norm": 0.483167827129364, |
|
"learning_rate": 0.0001780093605239203, |
|
"loss": 8.07, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2216186854970125, |
|
"grad_norm": 0.4626474380493164, |
|
"learning_rate": 0.00017786578766560758, |
|
"loss": 8.0982, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.22234292956726417, |
|
"grad_norm": 0.4981623888015747, |
|
"learning_rate": 0.0001777218059503991, |
|
"loss": 8.0233, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.22306717363751585, |
|
"grad_norm": 0.5132870674133301, |
|
"learning_rate": 0.00017757741613431263, |
|
"loss": 8.0564, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.22379141770776753, |
|
"grad_norm": 0.4659659266471863, |
|
"learning_rate": 0.00017743261897550875, |
|
"loss": 8.0599, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.22451566177801918, |
|
"grad_norm": 0.48992919921875, |
|
"learning_rate": 0.00017728741523428696, |
|
"loss": 8.0318, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.22523990584827086, |
|
"grad_norm": 0.465867817401886, |
|
"learning_rate": 0.00017714180567308157, |
|
"loss": 8.042, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.22596414991852254, |
|
"grad_norm": 0.48665499687194824, |
|
"learning_rate": 0.0001769957910564578, |
|
"loss": 8.0519, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.22668839398877422, |
|
"grad_norm": 0.47514137625694275, |
|
"learning_rate": 0.00017684937215110778, |
|
"loss": 8.0537, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.2274126380590259, |
|
"grad_norm": 0.5170972943305969, |
|
"learning_rate": 0.00017670254972584638, |
|
"loss": 8.0094, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.22813688212927757, |
|
"grad_norm": 0.5160325765609741, |
|
"learning_rate": 0.0001765553245516073, |
|
"loss": 8.021, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.22886112619952925, |
|
"grad_norm": 0.48066890239715576, |
|
"learning_rate": 0.00017640769740143904, |
|
"loss": 8.0412, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.2295853702697809, |
|
"grad_norm": 0.4954223334789276, |
|
"learning_rate": 0.00017625966905050077, |
|
"loss": 8.0338, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.23030961434003258, |
|
"grad_norm": 0.5118348002433777, |
|
"learning_rate": 0.00017611124027605825, |
|
"loss": 8.0008, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.23103385841028426, |
|
"grad_norm": 0.5256980657577515, |
|
"learning_rate": 0.00017596241185747978, |
|
"loss": 7.9931, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.23175810248053594, |
|
"grad_norm": 0.48749062418937683, |
|
"learning_rate": 0.00017581318457623218, |
|
"loss": 7.9715, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.23248234655078762, |
|
"grad_norm": 0.5072448253631592, |
|
"learning_rate": 0.0001756635592158765, |
|
"loss": 8.011, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.2332065906210393, |
|
"grad_norm": 0.49068352580070496, |
|
"learning_rate": 0.00017551353656206412, |
|
"loss": 8.0219, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.23393083469129097, |
|
"grad_norm": 0.47556236386299133, |
|
"learning_rate": 0.00017536311740253243, |
|
"loss": 8.0213, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.23465507876154265, |
|
"grad_norm": 0.5052993297576904, |
|
"learning_rate": 0.0001752123025271009, |
|
"loss": 7.9925, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.2353793228317943, |
|
"grad_norm": 0.49751394987106323, |
|
"learning_rate": 0.00017506109272766673, |
|
"loss": 7.9627, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.23610356690204598, |
|
"grad_norm": 0.5014733672142029, |
|
"learning_rate": 0.00017490948879820084, |
|
"loss": 7.9567, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.23682781097229766, |
|
"grad_norm": 0.49920758605003357, |
|
"learning_rate": 0.0001747574915347436, |
|
"loss": 8.0418, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.23755205504254934, |
|
"grad_norm": 0.5061244368553162, |
|
"learning_rate": 0.00017460510173540072, |
|
"loss": 7.9813, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.23827629911280102, |
|
"grad_norm": 0.5036886930465698, |
|
"learning_rate": 0.00017445232020033902, |
|
"loss": 7.9537, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.2390005431830527, |
|
"grad_norm": 0.48648810386657715, |
|
"learning_rate": 0.00017429914773178228, |
|
"loss": 7.9614, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.23972478725330437, |
|
"grad_norm": 0.47009527683258057, |
|
"learning_rate": 0.00017414558513400693, |
|
"loss": 8.0103, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.24044903132355605, |
|
"grad_norm": 0.4699738919734955, |
|
"learning_rate": 0.00017399163321333793, |
|
"loss": 7.9483, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.2411732753938077, |
|
"grad_norm": 0.4880366623401642, |
|
"learning_rate": 0.00017383729277814446, |
|
"loss": 8.0285, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.24189751946405938, |
|
"grad_norm": 0.4812169373035431, |
|
"learning_rate": 0.00017368256463883578, |
|
"loss": 8.0094, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.24262176353431106, |
|
"grad_norm": 0.5153531432151794, |
|
"learning_rate": 0.00017352744960785676, |
|
"loss": 7.9488, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.24334600760456274, |
|
"grad_norm": 0.4880661368370056, |
|
"learning_rate": 0.0001733719484996839, |
|
"loss": 7.9457, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.24407025167481441, |
|
"grad_norm": 0.5033547282218933, |
|
"learning_rate": 0.00017321606213082088, |
|
"loss": 7.9726, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.2447944957450661, |
|
"grad_norm": 0.4773882329463959, |
|
"learning_rate": 0.0001730597913197942, |
|
"loss": 8.0068, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.24551873981531777, |
|
"grad_norm": 0.48063740134239197, |
|
"learning_rate": 0.00017290313688714915, |
|
"loss": 7.9865, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.24624298388556945, |
|
"grad_norm": 0.5227491855621338, |
|
"learning_rate": 0.00017274609965544523, |
|
"loss": 7.8864, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2469672279558211, |
|
"grad_norm": 0.490004301071167, |
|
"learning_rate": 0.00017258868044925195, |
|
"loss": 7.9176, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.24769147202607278, |
|
"grad_norm": 0.49324241280555725, |
|
"learning_rate": 0.0001724308800951445, |
|
"loss": 7.9429, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.24841571609632446, |
|
"grad_norm": 0.5013875961303711, |
|
"learning_rate": 0.00017227269942169936, |
|
"loss": 8.0167, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.24913996016657614, |
|
"grad_norm": 0.49397408962249756, |
|
"learning_rate": 0.00017211413925949005, |
|
"loss": 7.9341, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.2498642042368278, |
|
"grad_norm": 0.48179903626441956, |
|
"learning_rate": 0.00017195520044108268, |
|
"loss": 7.9138, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.2505884483070795, |
|
"grad_norm": 0.5134261846542358, |
|
"learning_rate": 0.00017179588380103163, |
|
"loss": 7.9132, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.2505884483070795, |
|
"eval_loss": 7.9483160972595215, |
|
"eval_runtime": 5.1008, |
|
"eval_samples_per_second": 228.001, |
|
"eval_steps_per_second": 114.099, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.25131269237733117, |
|
"grad_norm": 0.5437692403793335, |
|
"learning_rate": 0.00017163619017587504, |
|
"loss": 7.9127, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.25203693644758285, |
|
"grad_norm": 0.5261752605438232, |
|
"learning_rate": 0.00017147612040413065, |
|
"loss": 7.9209, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.2527611805178345, |
|
"grad_norm": 0.5796931385993958, |
|
"learning_rate": 0.0001713156753262912, |
|
"loss": 7.8785, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.2534854245880862, |
|
"grad_norm": 0.6015821099281311, |
|
"learning_rate": 0.00017115485578482006, |
|
"loss": 8.0264, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2542096686583379, |
|
"grad_norm": 0.5010411739349365, |
|
"learning_rate": 0.00017099366262414694, |
|
"loss": 7.9666, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.25493391272858956, |
|
"grad_norm": 0.4945250451564789, |
|
"learning_rate": 0.00017083209669066317, |
|
"loss": 7.9885, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.2556581567988412, |
|
"grad_norm": 0.5084425210952759, |
|
"learning_rate": 0.0001706701588327176, |
|
"loss": 7.9407, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.25638240086909286, |
|
"grad_norm": 0.47225329279899597, |
|
"learning_rate": 0.0001705078499006119, |
|
"loss": 7.9456, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.25710664493934454, |
|
"grad_norm": 0.5019710659980774, |
|
"learning_rate": 0.00017034517074659617, |
|
"loss": 7.9546, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.2578308890095962, |
|
"grad_norm": 0.49253225326538086, |
|
"learning_rate": 0.00017018212222486446, |
|
"loss": 7.9919, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.2585551330798479, |
|
"grad_norm": 0.4921097457408905, |
|
"learning_rate": 0.0001700187051915503, |
|
"loss": 7.9367, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.2592793771500996, |
|
"grad_norm": 0.4596784710884094, |
|
"learning_rate": 0.00016985492050472227, |
|
"loss": 7.9454, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.26000362122035126, |
|
"grad_norm": 0.48740360140800476, |
|
"learning_rate": 0.00016969076902437932, |
|
"loss": 7.9385, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.26072786529060293, |
|
"grad_norm": 0.4537501931190491, |
|
"learning_rate": 0.00016952625161244638, |
|
"loss": 7.9553, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2614521093608546, |
|
"grad_norm": 0.5013776421546936, |
|
"learning_rate": 0.00016936136913276982, |
|
"loss": 7.9313, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.2621763534311063, |
|
"grad_norm": 0.49748286604881287, |
|
"learning_rate": 0.00016919612245111295, |
|
"loss": 7.9337, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.26290059750135797, |
|
"grad_norm": 0.4852049648761749, |
|
"learning_rate": 0.0001690305124351514, |
|
"loss": 7.9322, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.26362484157160965, |
|
"grad_norm": 0.4738268554210663, |
|
"learning_rate": 0.0001688645399544685, |
|
"loss": 7.9062, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.2643490856418613, |
|
"grad_norm": 0.5000738501548767, |
|
"learning_rate": 0.00016869820588055095, |
|
"loss": 7.9441, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.265073329712113, |
|
"grad_norm": 0.4679807424545288, |
|
"learning_rate": 0.00016853151108678398, |
|
"loss": 7.9791, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.2657975737823647, |
|
"grad_norm": 0.4647904336452484, |
|
"learning_rate": 0.00016836445644844697, |
|
"loss": 7.9007, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.2665218178526163, |
|
"grad_norm": 0.491827130317688, |
|
"learning_rate": 0.00016819704284270874, |
|
"loss": 7.9301, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.267246061922868, |
|
"grad_norm": 0.4953727126121521, |
|
"learning_rate": 0.000168029271148623, |
|
"loss": 7.8832, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.26797030599311966, |
|
"grad_norm": 0.5065143704414368, |
|
"learning_rate": 0.0001678611422471236, |
|
"loss": 7.8904, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.26869455006337134, |
|
"grad_norm": 0.49114301800727844, |
|
"learning_rate": 0.00016769265702102018, |
|
"loss": 7.8696, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.269418794133623, |
|
"grad_norm": 0.4655497670173645, |
|
"learning_rate": 0.00016752381635499317, |
|
"loss": 7.9315, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.2701430382038747, |
|
"grad_norm": 0.4854629933834076, |
|
"learning_rate": 0.0001673546211355895, |
|
"loss": 7.8832, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.2708672822741264, |
|
"grad_norm": 0.4579346776008606, |
|
"learning_rate": 0.0001671850722512178, |
|
"loss": 7.8855, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.27159152634437805, |
|
"grad_norm": 0.4732455909252167, |
|
"learning_rate": 0.00016701517059214348, |
|
"loss": 7.9345, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.27231577041462973, |
|
"grad_norm": 0.4654392600059509, |
|
"learning_rate": 0.00016684491705048457, |
|
"loss": 7.9374, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.2730400144848814, |
|
"grad_norm": 0.4884020686149597, |
|
"learning_rate": 0.0001666743125202067, |
|
"loss": 7.9046, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.2737642585551331, |
|
"grad_norm": 0.4872707724571228, |
|
"learning_rate": 0.00016650335789711833, |
|
"loss": 7.9233, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.27448850262538477, |
|
"grad_norm": 0.47144293785095215, |
|
"learning_rate": 0.0001663320540788663, |
|
"loss": 7.9096, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.27521274669563645, |
|
"grad_norm": 0.45968198776245117, |
|
"learning_rate": 0.00016616040196493103, |
|
"loss": 7.8761, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2759369907658881, |
|
"grad_norm": 0.47557854652404785, |
|
"learning_rate": 0.00016598840245662166, |
|
"loss": 7.8756, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.2766612348361398, |
|
"grad_norm": 0.45728030800819397, |
|
"learning_rate": 0.0001658160564570715, |
|
"loss": 7.9193, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.2773854789063915, |
|
"grad_norm": 0.5167350172996521, |
|
"learning_rate": 0.0001656433648712332, |
|
"loss": 7.8849, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.2781097229766431, |
|
"grad_norm": 0.4883110523223877, |
|
"learning_rate": 0.00016547032860587398, |
|
"loss": 7.8263, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.2788339670468948, |
|
"grad_norm": 0.47416940331459045, |
|
"learning_rate": 0.00016529694856957098, |
|
"loss": 7.9048, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.27955821111714646, |
|
"grad_norm": 0.4862803816795349, |
|
"learning_rate": 0.0001651232256727063, |
|
"loss": 7.8855, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.28028245518739814, |
|
"grad_norm": 0.4723484516143799, |
|
"learning_rate": 0.0001649491608274624, |
|
"loss": 7.9657, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.2810066992576498, |
|
"grad_norm": 0.4739847779273987, |
|
"learning_rate": 0.00016477475494781717, |
|
"loss": 7.8765, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.2817309433279015, |
|
"grad_norm": 0.4680808484554291, |
|
"learning_rate": 0.00016460000894953934, |
|
"loss": 7.9019, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.2824551873981532, |
|
"grad_norm": 0.5003647804260254, |
|
"learning_rate": 0.00016442492375018343, |
|
"loss": 7.839, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.28317943146840485, |
|
"grad_norm": 0.5076538324356079, |
|
"learning_rate": 0.00016424950026908497, |
|
"loss": 7.8935, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.28390367553865653, |
|
"grad_norm": 0.47889307141304016, |
|
"learning_rate": 0.0001640737394273559, |
|
"loss": 7.9099, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.2846279196089082, |
|
"grad_norm": 0.48014241456985474, |
|
"learning_rate": 0.0001638976421478794, |
|
"loss": 7.9593, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.2853521636791599, |
|
"grad_norm": 0.48349729180336, |
|
"learning_rate": 0.00016372120935530536, |
|
"loss": 7.8625, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.28607640774941157, |
|
"grad_norm": 0.4960671067237854, |
|
"learning_rate": 0.00016354444197604529, |
|
"loss": 7.8534, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.28680065181966324, |
|
"grad_norm": 0.515612006187439, |
|
"learning_rate": 0.00016336734093826756, |
|
"loss": 7.872, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.2875248958899149, |
|
"grad_norm": 0.49004271626472473, |
|
"learning_rate": 0.00016318990717189256, |
|
"loss": 7.9025, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.2882491399601666, |
|
"grad_norm": 0.5171812176704407, |
|
"learning_rate": 0.00016301214160858768, |
|
"loss": 7.8026, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.2889733840304182, |
|
"grad_norm": 0.5030158162117004, |
|
"learning_rate": 0.00016283404518176257, |
|
"loss": 7.9481, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.2896976281006699, |
|
"grad_norm": 0.5945943593978882, |
|
"learning_rate": 0.0001626556188265642, |
|
"loss": 7.9135, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2904218721709216, |
|
"grad_norm": 0.5005214810371399, |
|
"learning_rate": 0.00016247686347987183, |
|
"loss": 7.9069, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.29114611624117326, |
|
"grad_norm": 0.4819411635398865, |
|
"learning_rate": 0.00016229778008029224, |
|
"loss": 7.8972, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.29187036031142494, |
|
"grad_norm": 0.48914363980293274, |
|
"learning_rate": 0.00016211836956815477, |
|
"loss": 7.9025, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.2925946043816766, |
|
"grad_norm": 0.4711272418498993, |
|
"learning_rate": 0.00016193863288550638, |
|
"loss": 7.923, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.2933188484519283, |
|
"grad_norm": 0.4771362841129303, |
|
"learning_rate": 0.00016175857097610653, |
|
"loss": 7.8727, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.29404309252217997, |
|
"grad_norm": 0.4811650216579437, |
|
"learning_rate": 0.00016157818478542254, |
|
"loss": 7.8895, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.29476733659243165, |
|
"grad_norm": 0.49894389510154724, |
|
"learning_rate": 0.00016139747526062442, |
|
"loss": 7.8914, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.29549158066268333, |
|
"grad_norm": 0.48443737626075745, |
|
"learning_rate": 0.00016121644335057993, |
|
"loss": 7.8962, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.296215824732935, |
|
"grad_norm": 0.4693881869316101, |
|
"learning_rate": 0.00016103509000584958, |
|
"loss": 7.8654, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.2969400688031867, |
|
"grad_norm": 0.4889278709888458, |
|
"learning_rate": 0.0001608534161786817, |
|
"loss": 7.8896, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.29766431287343836, |
|
"grad_norm": 0.48877543210983276, |
|
"learning_rate": 0.0001606714228230074, |
|
"loss": 7.9108, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.29838855694369004, |
|
"grad_norm": 0.46180957555770874, |
|
"learning_rate": 0.00016048911089443558, |
|
"loss": 7.9667, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.2991128010139417, |
|
"grad_norm": 0.466511994600296, |
|
"learning_rate": 0.00016030648135024786, |
|
"loss": 7.9088, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.2998370450841934, |
|
"grad_norm": 0.4758537709712982, |
|
"learning_rate": 0.00016012353514939363, |
|
"loss": 7.8952, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.300561289154445, |
|
"grad_norm": 0.5132808089256287, |
|
"learning_rate": 0.00015994027325248492, |
|
"loss": 7.8671, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3012855332246967, |
|
"grad_norm": 0.45275020599365234, |
|
"learning_rate": 0.00015975669662179152, |
|
"loss": 7.8962, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.3020097772949484, |
|
"grad_norm": 0.5065222382545471, |
|
"learning_rate": 0.00015957280622123574, |
|
"loss": 7.8559, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.30273402136520006, |
|
"grad_norm": 0.4880678057670593, |
|
"learning_rate": 0.00015938860301638742, |
|
"loss": 7.8497, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.30345826543545174, |
|
"grad_norm": 0.4863564968109131, |
|
"learning_rate": 0.0001592040879744589, |
|
"loss": 7.8586, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.3041825095057034, |
|
"grad_norm": 0.49322959780693054, |
|
"learning_rate": 0.0001590192620643, |
|
"loss": 7.8449, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3049067535759551, |
|
"grad_norm": 0.4878312349319458, |
|
"learning_rate": 0.00015883412625639263, |
|
"loss": 7.8653, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.30563099764620677, |
|
"grad_norm": 0.5146703720092773, |
|
"learning_rate": 0.00015864868152284608, |
|
"loss": 7.9, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.30635524171645845, |
|
"grad_norm": 0.46097350120544434, |
|
"learning_rate": 0.00015846292883739171, |
|
"loss": 7.8997, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.3070794857867101, |
|
"grad_norm": 0.48234981298446655, |
|
"learning_rate": 0.00015827686917537783, |
|
"loss": 7.9008, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.3078037298569618, |
|
"grad_norm": 0.49214646220207214, |
|
"learning_rate": 0.00015809050351376467, |
|
"loss": 7.9094, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3085279739272135, |
|
"grad_norm": 0.4872777462005615, |
|
"learning_rate": 0.00015790383283111913, |
|
"loss": 7.8567, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.30925221799746516, |
|
"grad_norm": 0.4729660451412201, |
|
"learning_rate": 0.00015771685810760978, |
|
"loss": 7.8577, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.30997646206771684, |
|
"grad_norm": 0.4986751973628998, |
|
"learning_rate": 0.00015752958032500165, |
|
"loss": 7.8491, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.3107007061379685, |
|
"grad_norm": 0.4702273905277252, |
|
"learning_rate": 0.000157342000466651, |
|
"loss": 7.8647, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.3114249502082202, |
|
"grad_norm": 0.48451560735702515, |
|
"learning_rate": 0.0001571541195175003, |
|
"loss": 7.8365, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3121491942784718, |
|
"grad_norm": 0.4823257029056549, |
|
"learning_rate": 0.0001569659384640729, |
|
"loss": 7.8654, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.3128734383487235, |
|
"grad_norm": 0.4614291191101074, |
|
"learning_rate": 0.00015677745829446803, |
|
"loss": 7.8945, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3135976824189752, |
|
"grad_norm": 0.47423475980758667, |
|
"learning_rate": 0.00015658867999835546, |
|
"loss": 7.8436, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.31432192648922685, |
|
"grad_norm": 0.486380398273468, |
|
"learning_rate": 0.00015639960456697037, |
|
"loss": 7.836, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.31504617055947853, |
|
"grad_norm": 0.4982565641403198, |
|
"learning_rate": 0.00015621023299310812, |
|
"loss": 7.8526, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.3157704146297302, |
|
"grad_norm": 0.4835852086544037, |
|
"learning_rate": 0.00015602056627111907, |
|
"loss": 7.831, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.3164946586999819, |
|
"grad_norm": 0.508026659488678, |
|
"learning_rate": 0.0001558306053969034, |
|
"loss": 7.837, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.31721890277023357, |
|
"grad_norm": 0.5414742231369019, |
|
"learning_rate": 0.00015564035136790566, |
|
"loss": 7.8466, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.31794314684048525, |
|
"grad_norm": 0.5005372762680054, |
|
"learning_rate": 0.00015544980518310988, |
|
"loss": 7.8585, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.3186673909107369, |
|
"grad_norm": 0.485451877117157, |
|
"learning_rate": 0.00015525896784303398, |
|
"loss": 7.8713, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3193916349809886, |
|
"grad_norm": 0.4814086854457855, |
|
"learning_rate": 0.0001550678403497248, |
|
"loss": 7.8185, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.3201158790512403, |
|
"grad_norm": 0.4870661497116089, |
|
"learning_rate": 0.00015487642370675265, |
|
"loss": 7.8919, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.32084012312149196, |
|
"grad_norm": 0.530167281627655, |
|
"learning_rate": 0.00015468471891920613, |
|
"loss": 7.8137, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.32156436719174364, |
|
"grad_norm": 0.4842655658721924, |
|
"learning_rate": 0.0001544927269936868, |
|
"loss": 7.7832, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.3222886112619953, |
|
"grad_norm": 0.47289198637008667, |
|
"learning_rate": 0.0001543004489383039, |
|
"loss": 7.8456, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.32301285533224694, |
|
"grad_norm": 0.5019034147262573, |
|
"learning_rate": 0.00015410788576266916, |
|
"loss": 7.8209, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.3237370994024986, |
|
"grad_norm": 0.5903533697128296, |
|
"learning_rate": 0.00015391503847789136, |
|
"loss": 7.9144, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.3244613434727503, |
|
"grad_norm": 0.5589230060577393, |
|
"learning_rate": 0.00015372190809657106, |
|
"loss": 7.7157, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.325185587543002, |
|
"grad_norm": 0.5412530899047852, |
|
"learning_rate": 0.00015352849563279536, |
|
"loss": 7.7944, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.32590983161325365, |
|
"grad_norm": 0.6175811290740967, |
|
"learning_rate": 0.00015333480210213244, |
|
"loss": 7.8087, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.32663407568350533, |
|
"grad_norm": 0.504089891910553, |
|
"learning_rate": 0.0001531408285216264, |
|
"loss": 7.8893, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.327358319753757, |
|
"grad_norm": 0.5161882638931274, |
|
"learning_rate": 0.00015294657590979172, |
|
"loss": 7.8682, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.3280825638240087, |
|
"grad_norm": 0.4699746072292328, |
|
"learning_rate": 0.0001527520452866081, |
|
"loss": 7.8638, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.32880680789426037, |
|
"grad_norm": 0.48906633257865906, |
|
"learning_rate": 0.00015255723767351495, |
|
"loss": 7.8385, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.32953105196451205, |
|
"grad_norm": 0.48731744289398193, |
|
"learning_rate": 0.00015236215409340616, |
|
"loss": 7.8139, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3302552960347637, |
|
"grad_norm": 0.4950321614742279, |
|
"learning_rate": 0.0001521667955706246, |
|
"loss": 7.8719, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.3309795401050154, |
|
"grad_norm": 0.4786604046821594, |
|
"learning_rate": 0.00015197116313095683, |
|
"loss": 7.8451, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.3317037841752671, |
|
"grad_norm": 0.46545907855033875, |
|
"learning_rate": 0.00015177525780162775, |
|
"loss": 7.8802, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.33242802824551876, |
|
"grad_norm": 0.4908168315887451, |
|
"learning_rate": 0.00015157908061129508, |
|
"loss": 7.8576, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.33315227231577044, |
|
"grad_norm": 0.5020480751991272, |
|
"learning_rate": 0.00015138263259004402, |
|
"loss": 7.8606, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3338765163860221, |
|
"grad_norm": 0.48662593960762024, |
|
"learning_rate": 0.00015118591476938188, |
|
"loss": 7.8735, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.33460076045627374, |
|
"grad_norm": 0.46343091130256653, |
|
"learning_rate": 0.00015098892818223258, |
|
"loss": 7.9559, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.3353250045265254, |
|
"grad_norm": 0.467752069234848, |
|
"learning_rate": 0.0001507916738629314, |
|
"loss": 7.8742, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.3360492485967771, |
|
"grad_norm": 0.4827868342399597, |
|
"learning_rate": 0.00015059415284721924, |
|
"loss": 7.8779, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.3367734926670288, |
|
"grad_norm": 0.4806783199310303, |
|
"learning_rate": 0.00015039636617223754, |
|
"loss": 7.8949, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.33749773673728045, |
|
"grad_norm": 0.4673584997653961, |
|
"learning_rate": 0.00015019831487652255, |
|
"loss": 7.8997, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.33822198080753213, |
|
"grad_norm": 0.5170037746429443, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 7.8233, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.3389462248777838, |
|
"grad_norm": 0.5105298757553101, |
|
"learning_rate": 0.00014980142258397972, |
|
"loss": 7.8252, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.3396704689480355, |
|
"grad_norm": 0.476662814617157, |
|
"learning_rate": 0.00014960258367114997, |
|
"loss": 7.843, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.34039471301828716, |
|
"grad_norm": 0.49604159593582153, |
|
"learning_rate": 0.0001494034843055721, |
|
"loss": 7.8556, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.34111895708853884, |
|
"grad_norm": 0.48032739758491516, |
|
"learning_rate": 0.00014920412553267508, |
|
"loss": 7.8577, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.3418432011587905, |
|
"grad_norm": 0.49723532795906067, |
|
"learning_rate": 0.00014900450839924994, |
|
"loss": 7.864, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.3425674452290422, |
|
"grad_norm": 0.47292062640190125, |
|
"learning_rate": 0.00014880463395344434, |
|
"loss": 7.8405, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.3432916892992939, |
|
"grad_norm": 0.49656134843826294, |
|
"learning_rate": 0.00014860450324475703, |
|
"loss": 7.832, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.34401593336954556, |
|
"grad_norm": 0.4985675811767578, |
|
"learning_rate": 0.0001484041173240323, |
|
"loss": 7.8131, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.34474017743979724, |
|
"grad_norm": 0.4732464551925659, |
|
"learning_rate": 0.0001482034772434545, |
|
"loss": 7.8819, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.3454644215100489, |
|
"grad_norm": 0.4878876507282257, |
|
"learning_rate": 0.00014800258405654257, |
|
"loss": 7.8256, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.34618866558030054, |
|
"grad_norm": 0.49212250113487244, |
|
"learning_rate": 0.00014780143881814442, |
|
"loss": 7.816, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.3469129096505522, |
|
"grad_norm": 0.4787651598453522, |
|
"learning_rate": 0.00014760004258443151, |
|
"loss": 7.856, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.3476371537208039, |
|
"grad_norm": 0.48412755131721497, |
|
"learning_rate": 0.00014739839641289313, |
|
"loss": 7.8497, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.34836139779105557, |
|
"grad_norm": 0.4971597492694855, |
|
"learning_rate": 0.00014719650136233096, |
|
"loss": 7.8446, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.34908564186130725, |
|
"grad_norm": 0.4982038736343384, |
|
"learning_rate": 0.00014699435849285352, |
|
"loss": 7.7839, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.34980988593155893, |
|
"grad_norm": 0.5108397603034973, |
|
"learning_rate": 0.00014679196886587052, |
|
"loss": 7.7931, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.3505341300018106, |
|
"grad_norm": 0.4721708297729492, |
|
"learning_rate": 0.00014658933354408743, |
|
"loss": 7.8607, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.3512583740720623, |
|
"grad_norm": 0.4693983495235443, |
|
"learning_rate": 0.0001463864535914997, |
|
"loss": 7.8773, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.35198261814231396, |
|
"grad_norm": 0.5026690363883972, |
|
"learning_rate": 0.00014618333007338744, |
|
"loss": 7.8628, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.35270686221256564, |
|
"grad_norm": 0.48804745078086853, |
|
"learning_rate": 0.00014597996405630947, |
|
"loss": 7.84, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.3534311062828173, |
|
"grad_norm": 0.4530211389064789, |
|
"learning_rate": 0.0001457763566080981, |
|
"loss": 7.859, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.354155350353069, |
|
"grad_norm": 0.48494988679885864, |
|
"learning_rate": 0.0001455725087978533, |
|
"loss": 7.7981, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.3548795944233207, |
|
"grad_norm": 0.4591243267059326, |
|
"learning_rate": 0.00014536842169593703, |
|
"loss": 7.8371, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.35560383849357236, |
|
"grad_norm": 0.49914512038230896, |
|
"learning_rate": 0.00014516409637396787, |
|
"loss": 7.829, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.35632808256382403, |
|
"grad_norm": 0.48346269130706787, |
|
"learning_rate": 0.00014495953390481506, |
|
"loss": 7.7837, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.35705232663407566, |
|
"grad_norm": 0.4839797019958496, |
|
"learning_rate": 0.00014475473536259325, |
|
"loss": 7.8779, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.35777657070432733, |
|
"grad_norm": 0.4998239576816559, |
|
"learning_rate": 0.00014454970182265655, |
|
"loss": 7.8213, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.358500814774579, |
|
"grad_norm": 0.5087825059890747, |
|
"learning_rate": 0.000144344434361593, |
|
"loss": 7.8142, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3592250588448307, |
|
"grad_norm": 0.47627493739128113, |
|
"learning_rate": 0.00014413893405721895, |
|
"loss": 7.8951, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.35994930291508237, |
|
"grad_norm": 0.5072402954101562, |
|
"learning_rate": 0.0001439332019885733, |
|
"loss": 7.8125, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.36067354698533405, |
|
"grad_norm": 0.5031032562255859, |
|
"learning_rate": 0.0001437272392359119, |
|
"loss": 7.8159, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.3613977910555857, |
|
"grad_norm": 0.5236200094223022, |
|
"learning_rate": 0.000143521046880702, |
|
"loss": 7.8519, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.3621220351258374, |
|
"grad_norm": 0.5609799027442932, |
|
"learning_rate": 0.00014331462600561626, |
|
"loss": 7.8537, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3628462791960891, |
|
"grad_norm": 0.4694886803627014, |
|
"learning_rate": 0.0001431079776945274, |
|
"loss": 7.9079, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.36357052326634076, |
|
"grad_norm": 0.5094586610794067, |
|
"learning_rate": 0.00014290110303250225, |
|
"loss": 7.8228, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.36429476733659244, |
|
"grad_norm": 0.4992700219154358, |
|
"learning_rate": 0.00014269400310579623, |
|
"loss": 7.8663, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.3650190114068441, |
|
"grad_norm": 0.4908367395401001, |
|
"learning_rate": 0.00014248667900184752, |
|
"loss": 7.8586, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.3657432554770958, |
|
"grad_norm": 0.46805062890052795, |
|
"learning_rate": 0.00014227913180927152, |
|
"loss": 7.8644, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.3664674995473475, |
|
"grad_norm": 0.49678292870521545, |
|
"learning_rate": 0.00014207136261785484, |
|
"loss": 7.8759, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.36719174361759915, |
|
"grad_norm": 0.46577930450439453, |
|
"learning_rate": 0.00014186337251854994, |
|
"loss": 7.8668, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.36791598768785083, |
|
"grad_norm": 0.4829874038696289, |
|
"learning_rate": 0.00014165516260346913, |
|
"loss": 7.8516, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.36864023175810245, |
|
"grad_norm": 0.4831247925758362, |
|
"learning_rate": 0.00014144673396587892, |
|
"loss": 7.8148, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.36936447582835413, |
|
"grad_norm": 0.49740880727767944, |
|
"learning_rate": 0.00014123808770019432, |
|
"loss": 7.864, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3700887198986058, |
|
"grad_norm": 0.4826888144016266, |
|
"learning_rate": 0.00014102922490197308, |
|
"loss": 7.887, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.3708129639688575, |
|
"grad_norm": 0.4531131684780121, |
|
"learning_rate": 0.0001408201466679098, |
|
"loss": 7.901, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.37153720803910917, |
|
"grad_norm": 0.4634702801704407, |
|
"learning_rate": 0.00014061085409583043, |
|
"loss": 7.9295, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.37226145210936085, |
|
"grad_norm": 0.48286962509155273, |
|
"learning_rate": 0.0001404013482846863, |
|
"loss": 7.8609, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.3729856961796125, |
|
"grad_norm": 0.49540573358535767, |
|
"learning_rate": 0.00014019163033454843, |
|
"loss": 7.8966, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.3737099402498642, |
|
"grad_norm": 0.471544474363327, |
|
"learning_rate": 0.00013998170134660169, |
|
"loss": 7.9148, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.3744341843201159, |
|
"grad_norm": 0.4708135426044464, |
|
"learning_rate": 0.0001397715624231391, |
|
"loss": 7.882, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.37515842839036756, |
|
"grad_norm": 0.5138322710990906, |
|
"learning_rate": 0.0001395612146675561, |
|
"loss": 7.8323, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.37588267246061924, |
|
"grad_norm": 0.5133697390556335, |
|
"learning_rate": 0.00013935065918434445, |
|
"loss": 7.867, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.3766069165308709, |
|
"grad_norm": 0.44749224185943604, |
|
"learning_rate": 0.00013913989707908683, |
|
"loss": 7.8333, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3773311606011226, |
|
"grad_norm": 0.47725266218185425, |
|
"learning_rate": 0.00013892892945845077, |
|
"loss": 7.8102, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.3780554046713743, |
|
"grad_norm": 0.4651695787906647, |
|
"learning_rate": 0.00013871775743018293, |
|
"loss": 7.9104, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.37877964874162595, |
|
"grad_norm": 0.4996732771396637, |
|
"learning_rate": 0.0001385063821031033, |
|
"loss": 7.8928, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.37950389281187763, |
|
"grad_norm": 0.465753436088562, |
|
"learning_rate": 0.00013829480458709927, |
|
"loss": 7.8479, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.38022813688212925, |
|
"grad_norm": 0.4831571877002716, |
|
"learning_rate": 0.00013808302599312, |
|
"loss": 7.8302, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.38095238095238093, |
|
"grad_norm": 0.513978123664856, |
|
"learning_rate": 0.0001378710474331704, |
|
"loss": 7.8111, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.3816766250226326, |
|
"grad_norm": 0.44832077622413635, |
|
"learning_rate": 0.00013765887002030529, |
|
"loss": 7.9085, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.3824008690928843, |
|
"grad_norm": 0.46730518341064453, |
|
"learning_rate": 0.00013744649486862378, |
|
"loss": 7.8403, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.38312511316313597, |
|
"grad_norm": 0.47098594903945923, |
|
"learning_rate": 0.00013723392309326316, |
|
"loss": 7.8301, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.38384935723338764, |
|
"grad_norm": 0.46994248032569885, |
|
"learning_rate": 0.00013702115581039313, |
|
"loss": 7.8654, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3845736013036393, |
|
"grad_norm": 0.46916016936302185, |
|
"learning_rate": 0.00013680819413721003, |
|
"loss": 7.8193, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.385297845373891, |
|
"grad_norm": 0.48833534121513367, |
|
"learning_rate": 0.00013659503919193089, |
|
"loss": 7.7667, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.3860220894441427, |
|
"grad_norm": 0.444770485162735, |
|
"learning_rate": 0.00013638169209378756, |
|
"loss": 7.8987, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.38674633351439436, |
|
"grad_norm": 0.46796807646751404, |
|
"learning_rate": 0.00013616815396302081, |
|
"loss": 7.8328, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.38747057758464604, |
|
"grad_norm": 0.49306872487068176, |
|
"learning_rate": 0.00013595442592087453, |
|
"loss": 7.8562, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.3881948216548977, |
|
"grad_norm": 0.45947471261024475, |
|
"learning_rate": 0.00013574050908958976, |
|
"loss": 7.8148, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.3889190657251494, |
|
"grad_norm": 0.48289933800697327, |
|
"learning_rate": 0.00013552640459239888, |
|
"loss": 7.8374, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.38964330979540107, |
|
"grad_norm": 0.49065014719963074, |
|
"learning_rate": 0.00013531211355351962, |
|
"loss": 7.782, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.39036755386565275, |
|
"grad_norm": 0.48593437671661377, |
|
"learning_rate": 0.00013509763709814923, |
|
"loss": 7.8095, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.3910917979359044, |
|
"grad_norm": 0.4995581805706024, |
|
"learning_rate": 0.00013488297635245848, |
|
"loss": 7.8081, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.39181604200615605, |
|
"grad_norm": 0.4646143317222595, |
|
"learning_rate": 0.0001346681324435859, |
|
"loss": 7.9134, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.39254028607640773, |
|
"grad_norm": 0.49974748492240906, |
|
"learning_rate": 0.00013445310649963169, |
|
"loss": 7.8236, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.3932645301466594, |
|
"grad_norm": 0.4993489384651184, |
|
"learning_rate": 0.00013423789964965194, |
|
"loss": 7.8141, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.3939887742169111, |
|
"grad_norm": 0.4687751829624176, |
|
"learning_rate": 0.00013402251302365264, |
|
"loss": 7.8152, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.39471301828716276, |
|
"grad_norm": 0.47237464785575867, |
|
"learning_rate": 0.00013380694775258367, |
|
"loss": 7.8618, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.39543726235741444, |
|
"grad_norm": 0.4689088463783264, |
|
"learning_rate": 0.00013359120496833304, |
|
"loss": 7.897, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.3961615064276661, |
|
"grad_norm": 0.4774571657180786, |
|
"learning_rate": 0.00013337528580372078, |
|
"loss": 7.8143, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.3968857504979178, |
|
"grad_norm": 0.49935322999954224, |
|
"learning_rate": 0.00013315919139249307, |
|
"loss": 7.8004, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.3976099945681695, |
|
"grad_norm": 0.5764620304107666, |
|
"learning_rate": 0.00013294292286931627, |
|
"loss": 7.8003, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.39833423863842116, |
|
"grad_norm": 0.5829343795776367, |
|
"learning_rate": 0.00013272648136977092, |
|
"loss": 7.771, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.39905848270867283, |
|
"grad_norm": 0.49663594365119934, |
|
"learning_rate": 0.00013250986803034598, |
|
"loss": 7.8275, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.3997827267789245, |
|
"grad_norm": 0.4658512473106384, |
|
"learning_rate": 0.0001322930839884325, |
|
"loss": 7.8529, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.4005069708491762, |
|
"grad_norm": 0.47856324911117554, |
|
"learning_rate": 0.000132076130382318, |
|
"loss": 7.8692, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.40123121491942787, |
|
"grad_norm": 0.5036735534667969, |
|
"learning_rate": 0.00013185900835118025, |
|
"loss": 7.8315, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.40195545898967955, |
|
"grad_norm": 0.4810890853404999, |
|
"learning_rate": 0.00013164171903508153, |
|
"loss": 7.8672, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.40267970305993117, |
|
"grad_norm": 0.5102052092552185, |
|
"learning_rate": 0.00013142426357496225, |
|
"loss": 7.8202, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.40340394713018285, |
|
"grad_norm": 0.4854986071586609, |
|
"learning_rate": 0.0001312066431126355, |
|
"loss": 7.8389, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.40412819120043453, |
|
"grad_norm": 0.4611469805240631, |
|
"learning_rate": 0.0001309888587907805, |
|
"loss": 7.8226, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.4048524352706862, |
|
"grad_norm": 0.49772679805755615, |
|
"learning_rate": 0.00013077091175293706, |
|
"loss": 7.8254, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.4055766793409379, |
|
"grad_norm": 0.47026780247688293, |
|
"learning_rate": 0.00013055280314349928, |
|
"loss": 7.8621, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.40630092341118956, |
|
"grad_norm": 0.4732389748096466, |
|
"learning_rate": 0.00013033453410770963, |
|
"loss": 7.877, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.40702516748144124, |
|
"grad_norm": 0.47972285747528076, |
|
"learning_rate": 0.000130116105791653, |
|
"loss": 7.8747, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.4077494115516929, |
|
"grad_norm": 0.48406916856765747, |
|
"learning_rate": 0.0001298975193422506, |
|
"loss": 7.8502, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.4084736556219446, |
|
"grad_norm": 0.47579431533813477, |
|
"learning_rate": 0.0001296787759072539, |
|
"loss": 7.8294, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.4091978996921963, |
|
"grad_norm": 0.48034512996673584, |
|
"learning_rate": 0.0001294598766352388, |
|
"loss": 7.8261, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.40992214376244795, |
|
"grad_norm": 0.44753938913345337, |
|
"learning_rate": 0.00012924082267559939, |
|
"loss": 7.8905, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.41064638783269963, |
|
"grad_norm": 0.4587608575820923, |
|
"learning_rate": 0.00012902161517854197, |
|
"loss": 7.8683, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.4113706319029513, |
|
"grad_norm": 0.47434911131858826, |
|
"learning_rate": 0.00012880225529507912, |
|
"loss": 7.8635, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.412094875973203, |
|
"grad_norm": 0.4594435691833496, |
|
"learning_rate": 0.00012858274417702344, |
|
"loss": 7.8942, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.41281912004345467, |
|
"grad_norm": 0.4879729747772217, |
|
"learning_rate": 0.00012836308297698175, |
|
"loss": 7.8623, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.41354336411370635, |
|
"grad_norm": 0.5010982155799866, |
|
"learning_rate": 0.00012814327284834886, |
|
"loss": 7.8099, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.41426760818395797, |
|
"grad_norm": 0.47567158937454224, |
|
"learning_rate": 0.00012792331494530158, |
|
"loss": 7.8321, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.41499185225420965, |
|
"grad_norm": 0.47433096170425415, |
|
"learning_rate": 0.00012770321042279264, |
|
"loss": 7.8615, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.4157160963244613, |
|
"grad_norm": 0.47150593996047974, |
|
"learning_rate": 0.00012748296043654472, |
|
"loss": 7.8348, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.416440340394713, |
|
"grad_norm": 0.43587684631347656, |
|
"learning_rate": 0.0001272625661430442, |
|
"loss": 7.8704, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4171645844649647, |
|
"grad_norm": 0.4814154803752899, |
|
"learning_rate": 0.00012704202869953521, |
|
"loss": 7.8232, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.41788882853521636, |
|
"grad_norm": 0.4678634703159332, |
|
"learning_rate": 0.00012682134926401354, |
|
"loss": 7.8351, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.41861307260546804, |
|
"grad_norm": 0.48871251940727234, |
|
"learning_rate": 0.00012660052899522058, |
|
"loss": 7.7851, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.4193373166757197, |
|
"grad_norm": 0.48269665241241455, |
|
"learning_rate": 0.00012637956905263718, |
|
"loss": 7.8597, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.4200615607459714, |
|
"grad_norm": 0.46834123134613037, |
|
"learning_rate": 0.00012615847059647752, |
|
"loss": 7.8509, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4207858048162231, |
|
"grad_norm": 0.47934436798095703, |
|
"learning_rate": 0.00012593723478768323, |
|
"loss": 7.8651, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.42151004888647475, |
|
"grad_norm": 0.45488452911376953, |
|
"learning_rate": 0.00012571586278791705, |
|
"loss": 7.8629, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.42223429295672643, |
|
"grad_norm": 0.4674306809902191, |
|
"learning_rate": 0.00012549435575955683, |
|
"loss": 7.8591, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.4229585370269781, |
|
"grad_norm": 0.47342684864997864, |
|
"learning_rate": 0.0001252727148656895, |
|
"loss": 7.7871, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.4236827810972298, |
|
"grad_norm": 0.49287477135658264, |
|
"learning_rate": 0.0001250509412701048, |
|
"loss": 7.8044, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.42440702516748147, |
|
"grad_norm": 0.47948122024536133, |
|
"learning_rate": 0.00012482903613728928, |
|
"loss": 7.872, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.4251312692377331, |
|
"grad_norm": 0.46859344840049744, |
|
"learning_rate": 0.00012460700063242027, |
|
"loss": 7.8114, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.42585551330798477, |
|
"grad_norm": 0.49053072929382324, |
|
"learning_rate": 0.00012438483592135948, |
|
"loss": 7.7944, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.42657975737823645, |
|
"grad_norm": 0.5255662202835083, |
|
"learning_rate": 0.00012416254317064714, |
|
"loss": 7.7864, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.4273040014484881, |
|
"grad_norm": 0.5337214469909668, |
|
"learning_rate": 0.00012394012354749584, |
|
"loss": 7.8267, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4280282455187398, |
|
"grad_norm": 0.48263901472091675, |
|
"learning_rate": 0.0001237175782197843, |
|
"loss": 7.7757, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.4287524895889915, |
|
"grad_norm": 0.468227356672287, |
|
"learning_rate": 0.00012349490835605127, |
|
"loss": 7.8892, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.42947673365924316, |
|
"grad_norm": 0.4746133089065552, |
|
"learning_rate": 0.00012327211512548945, |
|
"loss": 7.8251, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.43020097772949484, |
|
"grad_norm": 0.4931454062461853, |
|
"learning_rate": 0.00012304919969793928, |
|
"loss": 7.7892, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.4309252217997465, |
|
"grad_norm": 0.44330355525016785, |
|
"learning_rate": 0.00012282616324388283, |
|
"loss": 7.8673, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.4316494658699982, |
|
"grad_norm": 0.5240752100944519, |
|
"learning_rate": 0.00012260300693443777, |
|
"loss": 7.8191, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.4323737099402499, |
|
"grad_norm": 0.5226469039916992, |
|
"learning_rate": 0.00012237973194135086, |
|
"loss": 7.799, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.43309795401050155, |
|
"grad_norm": 0.5419146418571472, |
|
"learning_rate": 0.00012215633943699232, |
|
"loss": 7.753, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.43382219808075323, |
|
"grad_norm": 0.5095767974853516, |
|
"learning_rate": 0.00012193283059434918, |
|
"loss": 7.899, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.4345464421510049, |
|
"grad_norm": 0.5662295818328857, |
|
"learning_rate": 0.0001217092065870195, |
|
"loss": 7.8183, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4352706862212566, |
|
"grad_norm": 0.46471965312957764, |
|
"learning_rate": 0.00012148546858920591, |
|
"loss": 7.8282, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.43599493029150826, |
|
"grad_norm": 0.5106796622276306, |
|
"learning_rate": 0.00012126161777570967, |
|
"loss": 7.8318, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.4367191743617599, |
|
"grad_norm": 0.48581328988075256, |
|
"learning_rate": 0.00012103765532192437, |
|
"loss": 7.9178, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.43744341843201157, |
|
"grad_norm": 0.4774625301361084, |
|
"learning_rate": 0.00012081358240382983, |
|
"loss": 7.861, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.43816766250226324, |
|
"grad_norm": 0.4969928562641144, |
|
"learning_rate": 0.00012058940019798588, |
|
"loss": 7.8667, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.4388919065725149, |
|
"grad_norm": 0.4804958403110504, |
|
"learning_rate": 0.00012036510988152618, |
|
"loss": 7.8884, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.4396161506427666, |
|
"grad_norm": 0.4955686330795288, |
|
"learning_rate": 0.0001201407126321521, |
|
"loss": 7.8276, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.4403403947130183, |
|
"grad_norm": 0.49440160393714905, |
|
"learning_rate": 0.00011991620962812638, |
|
"loss": 7.8653, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.44106463878326996, |
|
"grad_norm": 0.49569594860076904, |
|
"learning_rate": 0.00011969160204826721, |
|
"loss": 7.8461, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.44178888285352164, |
|
"grad_norm": 0.4727310240268707, |
|
"learning_rate": 0.00011946689107194182, |
|
"loss": 7.8543, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4425131269237733, |
|
"grad_norm": 0.4789380729198456, |
|
"learning_rate": 0.00011924207787906032, |
|
"loss": 7.8264, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.443237370994025, |
|
"grad_norm": 0.4611952006816864, |
|
"learning_rate": 0.00011901716365006956, |
|
"loss": 7.868, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.44396161506427667, |
|
"grad_norm": 0.4959096312522888, |
|
"learning_rate": 0.00011879214956594693, |
|
"loss": 7.8141, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.44468585913452835, |
|
"grad_norm": 0.4807751178741455, |
|
"learning_rate": 0.00011856703680819414, |
|
"loss": 7.8085, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.44541010320478003, |
|
"grad_norm": 0.4968527555465698, |
|
"learning_rate": 0.00011834182655883097, |
|
"loss": 7.8014, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4461343472750317, |
|
"grad_norm": 0.4878495931625366, |
|
"learning_rate": 0.00011811652000038915, |
|
"loss": 7.7984, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.4468585913452834, |
|
"grad_norm": 0.4903671443462372, |
|
"learning_rate": 0.00011789111831590606, |
|
"loss": 7.862, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.44758283541553506, |
|
"grad_norm": 0.4804092347621918, |
|
"learning_rate": 0.00011766562268891864, |
|
"loss": 7.798, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.4483070794857867, |
|
"grad_norm": 0.5215314626693726, |
|
"learning_rate": 0.00011744003430345705, |
|
"loss": 7.7989, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.44903132355603836, |
|
"grad_norm": 0.49652373790740967, |
|
"learning_rate": 0.00011721435434403849, |
|
"loss": 7.8358, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.44975556762629004, |
|
"grad_norm": 0.4879864454269409, |
|
"learning_rate": 0.00011698858399566109, |
|
"loss": 7.8349, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.4504798116965417, |
|
"grad_norm": 0.4709030091762543, |
|
"learning_rate": 0.00011676272444379748, |
|
"loss": 7.7974, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.4512040557667934, |
|
"grad_norm": 0.47246500849723816, |
|
"learning_rate": 0.00011653677687438874, |
|
"loss": 7.8776, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.4519282998370451, |
|
"grad_norm": 0.46343374252319336, |
|
"learning_rate": 0.00011631074247383808, |
|
"loss": 7.8622, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.45265254390729676, |
|
"grad_norm": 0.4682675898075104, |
|
"learning_rate": 0.00011608462242900471, |
|
"loss": 7.8559, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.45337678797754843, |
|
"grad_norm": 0.4654393494129181, |
|
"learning_rate": 0.00011585841792719741, |
|
"loss": 7.8423, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.4541010320478001, |
|
"grad_norm": 0.44032153487205505, |
|
"learning_rate": 0.00011563213015616856, |
|
"loss": 7.8894, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.4548252761180518, |
|
"grad_norm": 0.4499657452106476, |
|
"learning_rate": 0.0001154057603041077, |
|
"loss": 7.8771, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.45554952018830347, |
|
"grad_norm": 0.4855179190635681, |
|
"learning_rate": 0.0001151793095596354, |
|
"loss": 7.8613, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.45627376425855515, |
|
"grad_norm": 0.4786494970321655, |
|
"learning_rate": 0.00011495277911179694, |
|
"loss": 7.8229, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4569980083288068, |
|
"grad_norm": 0.48533812165260315, |
|
"learning_rate": 0.00011472617015005609, |
|
"loss": 7.8304, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.4577222523990585, |
|
"grad_norm": 0.4566459059715271, |
|
"learning_rate": 0.00011449948386428894, |
|
"loss": 7.8429, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.4584464964693102, |
|
"grad_norm": 0.4726155400276184, |
|
"learning_rate": 0.00011427272144477757, |
|
"loss": 7.7996, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.4591707405395618, |
|
"grad_norm": 0.44978711009025574, |
|
"learning_rate": 0.0001140458840822038, |
|
"loss": 7.8547, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.4598949846098135, |
|
"grad_norm": 0.503386378288269, |
|
"learning_rate": 0.00011381897296764296, |
|
"loss": 7.8874, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.46061922868006516, |
|
"grad_norm": 0.4718452990055084, |
|
"learning_rate": 0.0001135919892925577, |
|
"loss": 7.7894, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.46134347275031684, |
|
"grad_norm": 0.47960880398750305, |
|
"learning_rate": 0.00011336493424879158, |
|
"loss": 7.8652, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.4620677168205685, |
|
"grad_norm": 0.48030349612236023, |
|
"learning_rate": 0.00011313780902856294, |
|
"loss": 7.8594, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.4627919608908202, |
|
"grad_norm": 0.448631227016449, |
|
"learning_rate": 0.00011291061482445861, |
|
"loss": 7.9241, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.4635162049610719, |
|
"grad_norm": 0.5188454985618591, |
|
"learning_rate": 0.00011268335282942765, |
|
"loss": 7.7786, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.46424044903132355, |
|
"grad_norm": 0.47939813137054443, |
|
"learning_rate": 0.00011245602423677506, |
|
"loss": 7.8136, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.46496469310157523, |
|
"grad_norm": 0.5121312737464905, |
|
"learning_rate": 0.00011222863024015551, |
|
"loss": 7.91, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.4656889371718269, |
|
"grad_norm": 0.49285969138145447, |
|
"learning_rate": 0.00011200117203356715, |
|
"loss": 7.8379, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.4664131812420786, |
|
"grad_norm": 0.4866684079170227, |
|
"learning_rate": 0.0001117736508113452, |
|
"loss": 7.8206, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.46713742531233027, |
|
"grad_norm": 0.46585652232170105, |
|
"learning_rate": 0.00011154606776815587, |
|
"loss": 7.8355, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.46786166938258195, |
|
"grad_norm": 0.49136561155319214, |
|
"learning_rate": 0.00011131842409898982, |
|
"loss": 7.8392, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.4685859134528336, |
|
"grad_norm": 0.4672096073627472, |
|
"learning_rate": 0.00011109072099915625, |
|
"loss": 7.8878, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.4693101575230853, |
|
"grad_norm": 0.5394557118415833, |
|
"learning_rate": 0.00011086295966427622, |
|
"loss": 7.819, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.470034401593337, |
|
"grad_norm": 0.4872957766056061, |
|
"learning_rate": 0.00011063514129027672, |
|
"loss": 7.8399, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.4707586456635886, |
|
"grad_norm": 0.5272302627563477, |
|
"learning_rate": 0.00011040726707338416, |
|
"loss": 7.9755, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4714828897338403, |
|
"grad_norm": 0.4649381935596466, |
|
"learning_rate": 0.00011017933821011819, |
|
"loss": 7.8441, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.47220713380409196, |
|
"grad_norm": 0.4975007474422455, |
|
"learning_rate": 0.0001099513558972854, |
|
"loss": 7.8123, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.47293137787434364, |
|
"grad_norm": 0.4799993336200714, |
|
"learning_rate": 0.000109723321331973, |
|
"loss": 7.8462, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.4736556219445953, |
|
"grad_norm": 0.46020984649658203, |
|
"learning_rate": 0.00010949523571154266, |
|
"loss": 7.8492, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.474379866014847, |
|
"grad_norm": 0.48827457427978516, |
|
"learning_rate": 0.00010926710023362398, |
|
"loss": 7.8295, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.4751041100850987, |
|
"grad_norm": 0.509655773639679, |
|
"learning_rate": 0.0001090389160961085, |
|
"loss": 7.8165, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.47582835415535035, |
|
"grad_norm": 0.48283475637435913, |
|
"learning_rate": 0.00010881068449714315, |
|
"loss": 7.8341, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.47655259822560203, |
|
"grad_norm": 0.4844430088996887, |
|
"learning_rate": 0.00010858240663512416, |
|
"loss": 7.8634, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.4772768422958537, |
|
"grad_norm": 0.4722267985343933, |
|
"learning_rate": 0.00010835408370869063, |
|
"loss": 7.8826, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.4780010863661054, |
|
"grad_norm": 0.4804382026195526, |
|
"learning_rate": 0.00010812571691671826, |
|
"loss": 7.8242, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.47872533043635707, |
|
"grad_norm": 0.46896272897720337, |
|
"learning_rate": 0.00010789730745831312, |
|
"loss": 7.9247, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.47944957450660874, |
|
"grad_norm": 0.5058935284614563, |
|
"learning_rate": 0.00010766885653280532, |
|
"loss": 7.8299, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.4801738185768604, |
|
"grad_norm": 0.49389636516571045, |
|
"learning_rate": 0.00010744036533974267, |
|
"loss": 7.8462, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.4808980626471121, |
|
"grad_norm": 0.47579723596572876, |
|
"learning_rate": 0.00010721183507888442, |
|
"loss": 7.8666, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.4816223067173637, |
|
"grad_norm": 0.47958090901374817, |
|
"learning_rate": 0.00010698326695019496, |
|
"loss": 7.8241, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.4823465507876154, |
|
"grad_norm": 0.5065767765045166, |
|
"learning_rate": 0.00010675466215383758, |
|
"loss": 7.8085, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.4830707948578671, |
|
"grad_norm": 0.4563339352607727, |
|
"learning_rate": 0.00010652602189016799, |
|
"loss": 7.8053, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.48379503892811876, |
|
"grad_norm": 0.4857904314994812, |
|
"learning_rate": 0.00010629734735972818, |
|
"loss": 7.8281, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.48451928299837044, |
|
"grad_norm": 0.4706745743751526, |
|
"learning_rate": 0.00010606863976324015, |
|
"loss": 7.8073, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.4852435270686221, |
|
"grad_norm": 0.49210935831069946, |
|
"learning_rate": 0.00010583990030159939, |
|
"loss": 7.8297, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4859677711388738, |
|
"grad_norm": 0.4665246307849884, |
|
"learning_rate": 0.00010561113017586878, |
|
"loss": 7.8966, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.4866920152091255, |
|
"grad_norm": 0.47893619537353516, |
|
"learning_rate": 0.00010538233058727225, |
|
"loss": 7.8491, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.48741625927937715, |
|
"grad_norm": 0.4599825441837311, |
|
"learning_rate": 0.00010515350273718829, |
|
"loss": 7.8459, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.48814050334962883, |
|
"grad_norm": 0.47248682379722595, |
|
"learning_rate": 0.00010492464782714395, |
|
"loss": 7.8785, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.4888647474198805, |
|
"grad_norm": 0.46690088510513306, |
|
"learning_rate": 0.00010469576705880826, |
|
"loss": 7.8327, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.4895889914901322, |
|
"grad_norm": 0.48305100202560425, |
|
"learning_rate": 0.00010446686163398604, |
|
"loss": 7.8324, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.49031323556038386, |
|
"grad_norm": 0.47751349210739136, |
|
"learning_rate": 0.00010423793275461162, |
|
"loss": 7.8515, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.49103747963063554, |
|
"grad_norm": 0.46247628331184387, |
|
"learning_rate": 0.00010400898162274248, |
|
"loss": 7.8304, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.4917617237008872, |
|
"grad_norm": 0.4933788478374481, |
|
"learning_rate": 0.00010378000944055291, |
|
"loss": 7.7668, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.4924859677711389, |
|
"grad_norm": 0.4568544626235962, |
|
"learning_rate": 0.00010355101741032771, |
|
"loss": 7.8139, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4932102118413905, |
|
"grad_norm": 0.5135279297828674, |
|
"learning_rate": 0.000103322006734456, |
|
"loss": 7.8382, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.4939344559116422, |
|
"grad_norm": 0.4543817639350891, |
|
"learning_rate": 0.00010309297861542468, |
|
"loss": 7.8125, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.4946586999818939, |
|
"grad_norm": 0.48243218660354614, |
|
"learning_rate": 0.00010286393425581231, |
|
"loss": 7.8269, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.49538294405214556, |
|
"grad_norm": 0.4711065888404846, |
|
"learning_rate": 0.00010263487485828271, |
|
"loss": 7.881, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.49610718812239724, |
|
"grad_norm": 0.48097798228263855, |
|
"learning_rate": 0.0001024058016255787, |
|
"loss": 7.8061, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.4968314321926489, |
|
"grad_norm": 0.4807778000831604, |
|
"learning_rate": 0.00010217671576051564, |
|
"loss": 7.8052, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.4975556762629006, |
|
"grad_norm": 0.48832613229751587, |
|
"learning_rate": 0.00010194761846597534, |
|
"loss": 7.814, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.49827992033315227, |
|
"grad_norm": 0.4787043035030365, |
|
"learning_rate": 0.00010171851094489957, |
|
"loss": 7.829, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.49900416440340395, |
|
"grad_norm": 0.4968035817146301, |
|
"learning_rate": 0.0001014893944002838, |
|
"loss": 7.7458, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.4997284084736556, |
|
"grad_norm": 0.458019495010376, |
|
"learning_rate": 0.00010126027003517086, |
|
"loss": 7.8039, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5004526525439073, |
|
"grad_norm": 0.4970747232437134, |
|
"learning_rate": 0.0001010311390526447, |
|
"loss": 7.7751, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.501176896614159, |
|
"grad_norm": 0.45407259464263916, |
|
"learning_rate": 0.00010080200265582394, |
|
"loss": 7.836, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.501176896614159, |
|
"eval_loss": 7.830474376678467, |
|
"eval_runtime": 4.779, |
|
"eval_samples_per_second": 243.355, |
|
"eval_steps_per_second": 121.782, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.5019011406844106, |
|
"grad_norm": 0.5260797142982483, |
|
"learning_rate": 0.0001005728620478557, |
|
"loss": 7.7982, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.5026253847546623, |
|
"grad_norm": 0.4505165219306946, |
|
"learning_rate": 0.00010034371843190915, |
|
"loss": 7.8403, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.503349628824914, |
|
"grad_norm": 0.5103182196617126, |
|
"learning_rate": 0.00010011457301116933, |
|
"loss": 7.7234, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5040738728951657, |
|
"grad_norm": 0.5652449727058411, |
|
"learning_rate": 9.988542698883068e-05, |
|
"loss": 7.8412, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5047981169654173, |
|
"grad_norm": 0.5191313624382019, |
|
"learning_rate": 9.965628156809087e-05, |
|
"loss": 7.7998, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.505522361035669, |
|
"grad_norm": 0.4937920570373535, |
|
"learning_rate": 9.94271379521443e-05, |
|
"loss": 7.8072, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.5062466051059207, |
|
"grad_norm": 0.547224760055542, |
|
"learning_rate": 9.919799734417608e-05, |
|
"loss": 7.8387, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.5069708491761724, |
|
"grad_norm": 0.5999129414558411, |
|
"learning_rate": 9.896886094735535e-05, |
|
"loss": 7.8445, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.507695093246424, |
|
"grad_norm": 0.5010003447532654, |
|
"learning_rate": 9.873972996482916e-05, |
|
"loss": 7.8172, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.5084193373166758, |
|
"grad_norm": 0.48822104930877686, |
|
"learning_rate": 9.851060559971624e-05, |
|
"loss": 7.8536, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5091435813869274, |
|
"grad_norm": 0.5012331604957581, |
|
"learning_rate": 9.828148905510044e-05, |
|
"loss": 7.8445, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.5098678254571791, |
|
"grad_norm": 0.4834780693054199, |
|
"learning_rate": 9.805238153402469e-05, |
|
"loss": 7.822, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.5105920695274307, |
|
"grad_norm": 0.4714999496936798, |
|
"learning_rate": 9.782328423948435e-05, |
|
"loss": 7.877, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5113163135976824, |
|
"grad_norm": 0.46891549229621887, |
|
"learning_rate": 9.759419837442134e-05, |
|
"loss": 7.8555, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.5120405576679341, |
|
"grad_norm": 0.4694564938545227, |
|
"learning_rate": 9.736512514171732e-05, |
|
"loss": 7.8477, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.5127648017381857, |
|
"grad_norm": 0.48359888792037964, |
|
"learning_rate": 9.71360657441877e-05, |
|
"loss": 7.8765, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.5134890458084375, |
|
"grad_norm": 0.47419044375419617, |
|
"learning_rate": 9.690702138457534e-05, |
|
"loss": 7.8814, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.5142132898786891, |
|
"grad_norm": 0.47822949290275574, |
|
"learning_rate": 9.667799326554401e-05, |
|
"loss": 7.8609, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5149375339489408, |
|
"grad_norm": 0.4817030429840088, |
|
"learning_rate": 9.644898258967232e-05, |
|
"loss": 7.8694, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.5156617780191924, |
|
"grad_norm": 0.4727485775947571, |
|
"learning_rate": 9.62199905594471e-05, |
|
"loss": 7.903, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.5163860220894442, |
|
"grad_norm": 0.5098878741264343, |
|
"learning_rate": 9.599101837725753e-05, |
|
"loss": 7.8711, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.5171102661596958, |
|
"grad_norm": 0.449669748544693, |
|
"learning_rate": 9.576206724538837e-05, |
|
"loss": 7.868, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5178345102299475, |
|
"grad_norm": 0.4713840186595917, |
|
"learning_rate": 9.553313836601398e-05, |
|
"loss": 7.8244, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5185587543001992, |
|
"grad_norm": 0.4921199381351471, |
|
"learning_rate": 9.53042329411918e-05, |
|
"loss": 7.8841, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.5192829983704509, |
|
"grad_norm": 0.47159916162490845, |
|
"learning_rate": 9.507535217285607e-05, |
|
"loss": 7.8828, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5200072424407025, |
|
"grad_norm": 0.48348769545555115, |
|
"learning_rate": 9.484649726281173e-05, |
|
"loss": 7.8346, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.5207314865109542, |
|
"grad_norm": 0.48519793152809143, |
|
"learning_rate": 9.461766941272778e-05, |
|
"loss": 7.8751, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.5214557305812059, |
|
"grad_norm": 0.4665411412715912, |
|
"learning_rate": 9.438886982413124e-05, |
|
"loss": 7.9164, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5221799746514575, |
|
"grad_norm": 0.4838716387748718, |
|
"learning_rate": 9.416009969840061e-05, |
|
"loss": 7.8461, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5229042187217092, |
|
"grad_norm": 0.4719375669956207, |
|
"learning_rate": 9.393136023675988e-05, |
|
"loss": 7.8157, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5236284627919608, |
|
"grad_norm": 0.5168206691741943, |
|
"learning_rate": 9.370265264027185e-05, |
|
"loss": 7.8118, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5243527068622126, |
|
"grad_norm": 0.466214656829834, |
|
"learning_rate": 9.347397810983204e-05, |
|
"loss": 7.7755, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.5250769509324642, |
|
"grad_norm": 0.5822585821151733, |
|
"learning_rate": 9.324533784616247e-05, |
|
"loss": 7.85, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5258011950027159, |
|
"grad_norm": 0.48217180371284485, |
|
"learning_rate": 9.301673304980504e-05, |
|
"loss": 7.8665, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5265254390729676, |
|
"grad_norm": 0.5124320983886719, |
|
"learning_rate": 9.278816492111562e-05, |
|
"loss": 7.8112, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.5272496831432193, |
|
"grad_norm": 0.4656013548374176, |
|
"learning_rate": 9.255963466025735e-05, |
|
"loss": 7.8952, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.5279739272134709, |
|
"grad_norm": 0.4809459447860718, |
|
"learning_rate": 9.233114346719472e-05, |
|
"loss": 7.8659, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5286981712837227, |
|
"grad_norm": 0.46756666898727417, |
|
"learning_rate": 9.21026925416869e-05, |
|
"loss": 7.8359, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5294224153539743, |
|
"grad_norm": 0.503579318523407, |
|
"learning_rate": 9.187428308328176e-05, |
|
"loss": 7.8012, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.530146659424226, |
|
"grad_norm": 0.46779781579971313, |
|
"learning_rate": 9.164591629130941e-05, |
|
"loss": 7.8359, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5308709034944776, |
|
"grad_norm": 0.4926343560218811, |
|
"learning_rate": 9.141759336487584e-05, |
|
"loss": 7.8221, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.5315951475647294, |
|
"grad_norm": 0.464432954788208, |
|
"learning_rate": 9.118931550285687e-05, |
|
"loss": 7.8713, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.532319391634981, |
|
"grad_norm": 0.49543848633766174, |
|
"learning_rate": 9.096108390389151e-05, |
|
"loss": 7.8084, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5330436357052326, |
|
"grad_norm": 0.45875051617622375, |
|
"learning_rate": 9.073289976637603e-05, |
|
"loss": 7.8152, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5337678797754843, |
|
"grad_norm": 0.4713042974472046, |
|
"learning_rate": 9.050476428845739e-05, |
|
"loss": 7.7912, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.534492123845736, |
|
"grad_norm": 0.4827256500720978, |
|
"learning_rate": 9.027667866802701e-05, |
|
"loss": 7.8026, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5352163679159877, |
|
"grad_norm": 0.45872411131858826, |
|
"learning_rate": 9.004864410271462e-05, |
|
"loss": 7.8695, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.5359406119862393, |
|
"grad_norm": 0.4700596034526825, |
|
"learning_rate": 8.982066178988182e-05, |
|
"loss": 7.8606, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5366648560564911, |
|
"grad_norm": 0.45632404088974, |
|
"learning_rate": 8.959273292661586e-05, |
|
"loss": 7.7807, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5373891001267427, |
|
"grad_norm": 0.4901478886604309, |
|
"learning_rate": 8.936485870972328e-05, |
|
"loss": 7.8481, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.5381133441969944, |
|
"grad_norm": 0.4826202392578125, |
|
"learning_rate": 8.913704033572379e-05, |
|
"loss": 7.7886, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.538837588267246, |
|
"grad_norm": 0.5499749779701233, |
|
"learning_rate": 8.89092790008438e-05, |
|
"loss": 7.8111, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.5395618323374978, |
|
"grad_norm": 0.4294477105140686, |
|
"learning_rate": 8.868157590101019e-05, |
|
"loss": 7.8344, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5402860764077494, |
|
"grad_norm": 0.4754398763179779, |
|
"learning_rate": 8.845393223184418e-05, |
|
"loss": 7.9187, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.5410103204780011, |
|
"grad_norm": 0.526314377784729, |
|
"learning_rate": 8.822634918865482e-05, |
|
"loss": 7.7336, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.5417345645482528, |
|
"grad_norm": 0.5151805281639099, |
|
"learning_rate": 8.799882796643288e-05, |
|
"loss": 7.7485, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.5424588086185045, |
|
"grad_norm": 0.56130450963974, |
|
"learning_rate": 8.777136975984448e-05, |
|
"loss": 7.8075, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.5431830526887561, |
|
"grad_norm": 0.6159570813179016, |
|
"learning_rate": 8.754397576322498e-05, |
|
"loss": 7.7995, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5439072967590078, |
|
"grad_norm": 0.49635598063468933, |
|
"learning_rate": 8.731664717057236e-05, |
|
"loss": 7.8466, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.5446315408292595, |
|
"grad_norm": 0.49500927329063416, |
|
"learning_rate": 8.708938517554141e-05, |
|
"loss": 7.8124, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.5453557848995111, |
|
"grad_norm": 0.48374736309051514, |
|
"learning_rate": 8.68621909714371e-05, |
|
"loss": 7.8808, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.5460800289697628, |
|
"grad_norm": 0.4718135893344879, |
|
"learning_rate": 8.663506575120845e-05, |
|
"loss": 7.8721, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.5468042730400144, |
|
"grad_norm": 0.49202045798301697, |
|
"learning_rate": 8.640801070744233e-05, |
|
"loss": 7.8248, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.5475285171102662, |
|
"grad_norm": 0.4670674502849579, |
|
"learning_rate": 8.618102703235702e-05, |
|
"loss": 7.8563, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.5482527611805178, |
|
"grad_norm": 0.5087535381317139, |
|
"learning_rate": 8.59541159177962e-05, |
|
"loss": 7.8356, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.5489770052507695, |
|
"grad_norm": 0.4710357189178467, |
|
"learning_rate": 8.572727855522243e-05, |
|
"loss": 7.8835, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.5497012493210212, |
|
"grad_norm": 0.48206937313079834, |
|
"learning_rate": 8.550051613571108e-05, |
|
"loss": 7.8386, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.5504254933912729, |
|
"grad_norm": 0.4598138630390167, |
|
"learning_rate": 8.527382984994394e-05, |
|
"loss": 7.9155, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5511497374615245, |
|
"grad_norm": 0.46072736382484436, |
|
"learning_rate": 8.504722088820309e-05, |
|
"loss": 7.9004, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.5518739815317762, |
|
"grad_norm": 0.47069671750068665, |
|
"learning_rate": 8.482069044036462e-05, |
|
"loss": 7.8558, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.5525982256020279, |
|
"grad_norm": 0.48600372672080994, |
|
"learning_rate": 8.459423969589228e-05, |
|
"loss": 7.8204, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.5533224696722796, |
|
"grad_norm": 0.456483393907547, |
|
"learning_rate": 8.436786984383146e-05, |
|
"loss": 7.8135, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.5540467137425312, |
|
"grad_norm": 0.4625873565673828, |
|
"learning_rate": 8.414158207280259e-05, |
|
"loss": 7.8563, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.554770957812783, |
|
"grad_norm": 0.47437793016433716, |
|
"learning_rate": 8.391537757099533e-05, |
|
"loss": 7.8649, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.5554952018830346, |
|
"grad_norm": 0.48973995447158813, |
|
"learning_rate": 8.368925752616194e-05, |
|
"loss": 7.8504, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.5562194459532862, |
|
"grad_norm": 0.49295151233673096, |
|
"learning_rate": 8.346322312561127e-05, |
|
"loss": 7.8525, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.5569436900235379, |
|
"grad_norm": 0.4774750769138336, |
|
"learning_rate": 8.323727555620256e-05, |
|
"loss": 7.8677, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.5576679340937896, |
|
"grad_norm": 0.4751247465610504, |
|
"learning_rate": 8.301141600433891e-05, |
|
"loss": 7.8658, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5583921781640413, |
|
"grad_norm": 0.48602089285850525, |
|
"learning_rate": 8.278564565596152e-05, |
|
"loss": 7.8284, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.5591164222342929, |
|
"grad_norm": 0.4605376124382019, |
|
"learning_rate": 8.255996569654296e-05, |
|
"loss": 7.8761, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.5598406663045447, |
|
"grad_norm": 0.46493852138519287, |
|
"learning_rate": 8.233437731108139e-05, |
|
"loss": 7.8206, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.5605649103747963, |
|
"grad_norm": 0.4919988512992859, |
|
"learning_rate": 8.210888168409399e-05, |
|
"loss": 7.7783, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.561289154445048, |
|
"grad_norm": 0.4530481994152069, |
|
"learning_rate": 8.188347999961087e-05, |
|
"loss": 7.8679, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5620133985152996, |
|
"grad_norm": 0.4982988238334656, |
|
"learning_rate": 8.165817344116906e-05, |
|
"loss": 7.7636, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.5627376425855514, |
|
"grad_norm": 0.49008551239967346, |
|
"learning_rate": 8.143296319180588e-05, |
|
"loss": 7.864, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.563461886655803, |
|
"grad_norm": 0.4614430367946625, |
|
"learning_rate": 8.120785043405309e-05, |
|
"loss": 7.8705, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.5641861307260547, |
|
"grad_norm": 0.46965253353118896, |
|
"learning_rate": 8.098283634993045e-05, |
|
"loss": 7.8487, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.5649103747963063, |
|
"grad_norm": 0.4675351083278656, |
|
"learning_rate": 8.07579221209397e-05, |
|
"loss": 7.8998, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5656346188665581, |
|
"grad_norm": 0.5022285580635071, |
|
"learning_rate": 8.053310892805823e-05, |
|
"loss": 7.7928, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.5663588629368097, |
|
"grad_norm": 0.46838775277137756, |
|
"learning_rate": 8.03083979517328e-05, |
|
"loss": 7.8427, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.5670831070070613, |
|
"grad_norm": 0.501288652420044, |
|
"learning_rate": 8.008379037187366e-05, |
|
"loss": 7.7874, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.5678073510773131, |
|
"grad_norm": 0.49566730856895447, |
|
"learning_rate": 7.985928736784794e-05, |
|
"loss": 7.7951, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.5685315951475647, |
|
"grad_norm": 0.4716828167438507, |
|
"learning_rate": 7.963489011847385e-05, |
|
"loss": 7.7954, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.5692558392178164, |
|
"grad_norm": 0.47677019238471985, |
|
"learning_rate": 7.941059980201413e-05, |
|
"loss": 7.8447, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.569980083288068, |
|
"grad_norm": 0.4770268499851227, |
|
"learning_rate": 7.918641759617018e-05, |
|
"loss": 7.804, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.5707043273583198, |
|
"grad_norm": 0.48790669441223145, |
|
"learning_rate": 7.896234467807561e-05, |
|
"loss": 7.7361, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.49754399061203003, |
|
"learning_rate": 7.873838222429035e-05, |
|
"loss": 7.7453, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.5721528154988231, |
|
"grad_norm": 0.4493372142314911, |
|
"learning_rate": 7.851453141079413e-05, |
|
"loss": 7.8176, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5728770595690748, |
|
"grad_norm": 0.4799216091632843, |
|
"learning_rate": 7.829079341298051e-05, |
|
"loss": 7.814, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.5736013036393265, |
|
"grad_norm": 0.4880140721797943, |
|
"learning_rate": 7.806716940565084e-05, |
|
"loss": 7.7657, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.5743255477095781, |
|
"grad_norm": 0.46878695487976074, |
|
"learning_rate": 7.784366056300769e-05, |
|
"loss": 7.8544, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.5750497917798298, |
|
"grad_norm": 0.5069106817245483, |
|
"learning_rate": 7.762026805864915e-05, |
|
"loss": 7.8384, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.5757740358500815, |
|
"grad_norm": 0.521747350692749, |
|
"learning_rate": 7.739699306556228e-05, |
|
"loss": 7.7701, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.5764982799203332, |
|
"grad_norm": 0.5047403573989868, |
|
"learning_rate": 7.717383675611718e-05, |
|
"loss": 7.8321, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.5772225239905848, |
|
"grad_norm": 0.48073387145996094, |
|
"learning_rate": 7.695080030206076e-05, |
|
"loss": 7.7588, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.5779467680608364, |
|
"grad_norm": 0.4945087730884552, |
|
"learning_rate": 7.672788487451058e-05, |
|
"loss": 7.8004, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.5786710121310882, |
|
"grad_norm": 0.537676215171814, |
|
"learning_rate": 7.650509164394876e-05, |
|
"loss": 7.7014, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.5793952562013398, |
|
"grad_norm": 0.5863990783691406, |
|
"learning_rate": 7.628242178021572e-05, |
|
"loss": 7.9051, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5801195002715915, |
|
"grad_norm": 0.5090913772583008, |
|
"learning_rate": 7.605987645250419e-05, |
|
"loss": 7.8515, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.5808437443418432, |
|
"grad_norm": 0.507625162601471, |
|
"learning_rate": 7.583745682935286e-05, |
|
"loss": 7.8064, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.5815679884120949, |
|
"grad_norm": 0.4949605166912079, |
|
"learning_rate": 7.561516407864055e-05, |
|
"loss": 7.7985, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.5822922324823465, |
|
"grad_norm": 0.48158496618270874, |
|
"learning_rate": 7.539299936757978e-05, |
|
"loss": 7.8514, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.5830164765525983, |
|
"grad_norm": 0.48431625962257385, |
|
"learning_rate": 7.517096386271072e-05, |
|
"loss": 7.8289, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.5837407206228499, |
|
"grad_norm": 0.5053116679191589, |
|
"learning_rate": 7.494905872989522e-05, |
|
"loss": 7.8756, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.5844649646931016, |
|
"grad_norm": 0.49544015526771545, |
|
"learning_rate": 7.472728513431051e-05, |
|
"loss": 7.8154, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.5851892087633532, |
|
"grad_norm": 0.4702587425708771, |
|
"learning_rate": 7.450564424044318e-05, |
|
"loss": 7.8741, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.585913452833605, |
|
"grad_norm": 0.47806039452552795, |
|
"learning_rate": 7.428413721208296e-05, |
|
"loss": 7.8546, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.5866376969038566, |
|
"grad_norm": 0.49512267112731934, |
|
"learning_rate": 7.40627652123168e-05, |
|
"loss": 7.8856, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5873619409741083, |
|
"grad_norm": 0.5060679316520691, |
|
"learning_rate": 7.384152940352253e-05, |
|
"loss": 7.7994, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.5880861850443599, |
|
"grad_norm": 0.5014981627464294, |
|
"learning_rate": 7.362043094736287e-05, |
|
"loss": 7.8225, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.5888104291146117, |
|
"grad_norm": 0.5063635110855103, |
|
"learning_rate": 7.339947100477947e-05, |
|
"loss": 7.8719, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.5895346731848633, |
|
"grad_norm": 0.4955052137374878, |
|
"learning_rate": 7.317865073598648e-05, |
|
"loss": 7.9004, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.5902589172551149, |
|
"grad_norm": 0.4668295085430145, |
|
"learning_rate": 7.295797130046482e-05, |
|
"loss": 7.8558, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.5909831613253667, |
|
"grad_norm": 0.47807928919792175, |
|
"learning_rate": 7.273743385695582e-05, |
|
"loss": 7.8619, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.5917074053956183, |
|
"grad_norm": 0.48628920316696167, |
|
"learning_rate": 7.25170395634553e-05, |
|
"loss": 7.8668, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.59243164946587, |
|
"grad_norm": 0.4596603214740753, |
|
"learning_rate": 7.229678957720738e-05, |
|
"loss": 7.8634, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.5931558935361216, |
|
"grad_norm": 0.4702138900756836, |
|
"learning_rate": 7.207668505469844e-05, |
|
"loss": 7.8422, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.5938801376063734, |
|
"grad_norm": 0.4690391421318054, |
|
"learning_rate": 7.185672715165119e-05, |
|
"loss": 7.8206, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.594604381676625, |
|
"grad_norm": 0.5008434057235718, |
|
"learning_rate": 7.163691702301827e-05, |
|
"loss": 7.8105, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.5953286257468767, |
|
"grad_norm": 0.5092424750328064, |
|
"learning_rate": 7.14172558229766e-05, |
|
"loss": 7.8332, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.5960528698171284, |
|
"grad_norm": 0.4641984701156616, |
|
"learning_rate": 7.119774470492092e-05, |
|
"loss": 7.8529, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.5967771138873801, |
|
"grad_norm": 0.4499223828315735, |
|
"learning_rate": 7.097838482145802e-05, |
|
"loss": 7.8605, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.5975013579576317, |
|
"grad_norm": 0.47683218121528625, |
|
"learning_rate": 7.075917732440061e-05, |
|
"loss": 7.8361, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.5982256020278834, |
|
"grad_norm": 0.48220375180244446, |
|
"learning_rate": 7.054012336476121e-05, |
|
"loss": 7.8493, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.5989498460981351, |
|
"grad_norm": 0.5097104907035828, |
|
"learning_rate": 7.032122409274613e-05, |
|
"loss": 7.894, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.5996740901683868, |
|
"grad_norm": 0.44973528385162354, |
|
"learning_rate": 7.010248065774943e-05, |
|
"loss": 7.8853, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.6003983342386384, |
|
"grad_norm": 0.48968738317489624, |
|
"learning_rate": 6.988389420834703e-05, |
|
"loss": 7.8434, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.60112257830889, |
|
"grad_norm": 0.4859766960144043, |
|
"learning_rate": 6.966546589229037e-05, |
|
"loss": 7.8775, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6018468223791418, |
|
"grad_norm": 0.5055286884307861, |
|
"learning_rate": 6.944719685650075e-05, |
|
"loss": 7.793, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.6025710664493934, |
|
"grad_norm": 0.4732415974140167, |
|
"learning_rate": 6.922908824706295e-05, |
|
"loss": 7.8474, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.6032953105196451, |
|
"grad_norm": 0.5133833885192871, |
|
"learning_rate": 6.90111412092195e-05, |
|
"loss": 7.7488, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.6040195545898968, |
|
"grad_norm": 0.47122323513031006, |
|
"learning_rate": 6.879335688736454e-05, |
|
"loss": 7.8389, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.6047437986601485, |
|
"grad_norm": 0.5096733570098877, |
|
"learning_rate": 6.857573642503776e-05, |
|
"loss": 7.791, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6054680427304001, |
|
"grad_norm": 0.4919492304325104, |
|
"learning_rate": 6.835828096491854e-05, |
|
"loss": 7.7837, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.6061922868006518, |
|
"grad_norm": 0.48628994822502136, |
|
"learning_rate": 6.814099164881975e-05, |
|
"loss": 7.8275, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.6069165308709035, |
|
"grad_norm": 0.5138300061225891, |
|
"learning_rate": 6.792386961768204e-05, |
|
"loss": 7.8181, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.6076407749411552, |
|
"grad_norm": 0.4759021997451782, |
|
"learning_rate": 6.77069160115675e-05, |
|
"loss": 7.7909, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.6083650190114068, |
|
"grad_norm": 0.5031505227088928, |
|
"learning_rate": 6.749013196965406e-05, |
|
"loss": 7.8561, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6090892630816586, |
|
"grad_norm": 0.45728060603141785, |
|
"learning_rate": 6.72735186302291e-05, |
|
"loss": 7.8218, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.6098135071519102, |
|
"grad_norm": 0.4515739381313324, |
|
"learning_rate": 6.705707713068376e-05, |
|
"loss": 7.8055, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.6105377512221619, |
|
"grad_norm": 0.507045567035675, |
|
"learning_rate": 6.684080860750697e-05, |
|
"loss": 7.8195, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6112619952924135, |
|
"grad_norm": 0.5178161263465881, |
|
"learning_rate": 6.662471419627924e-05, |
|
"loss": 7.8287, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.6119862393626652, |
|
"grad_norm": 0.5024030804634094, |
|
"learning_rate": 6.640879503166698e-05, |
|
"loss": 7.7915, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6127104834329169, |
|
"grad_norm": 0.5156453847885132, |
|
"learning_rate": 6.619305224741632e-05, |
|
"loss": 7.8114, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.6134347275031685, |
|
"grad_norm": 0.50360107421875, |
|
"learning_rate": 6.597748697634739e-05, |
|
"loss": 7.8968, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.6141589715734203, |
|
"grad_norm": 0.5216660499572754, |
|
"learning_rate": 6.57621003503481e-05, |
|
"loss": 7.8246, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.6148832156436719, |
|
"grad_norm": 0.48182037472724915, |
|
"learning_rate": 6.554689350036834e-05, |
|
"loss": 7.8687, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.6156074597139236, |
|
"grad_norm": 0.5355033874511719, |
|
"learning_rate": 6.533186755641416e-05, |
|
"loss": 7.8434, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6163317037841752, |
|
"grad_norm": 0.49577125906944275, |
|
"learning_rate": 6.511702364754153e-05, |
|
"loss": 7.8567, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.617055947854427, |
|
"grad_norm": 0.4947705566883087, |
|
"learning_rate": 6.49023629018508e-05, |
|
"loss": 7.9015, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6177801919246786, |
|
"grad_norm": 0.4933142364025116, |
|
"learning_rate": 6.468788644648039e-05, |
|
"loss": 7.8388, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.6185044359949303, |
|
"grad_norm": 0.48275384306907654, |
|
"learning_rate": 6.447359540760114e-05, |
|
"loss": 7.8168, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.619228680065182, |
|
"grad_norm": 0.4693118929862976, |
|
"learning_rate": 6.425949091041027e-05, |
|
"loss": 7.8784, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6199529241354337, |
|
"grad_norm": 0.49441099166870117, |
|
"learning_rate": 6.404557407912549e-05, |
|
"loss": 7.8285, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.6206771682056853, |
|
"grad_norm": 0.4684365689754486, |
|
"learning_rate": 6.383184603697922e-05, |
|
"loss": 7.8689, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.621401412275937, |
|
"grad_norm": 0.4639764130115509, |
|
"learning_rate": 6.361830790621246e-05, |
|
"loss": 7.8302, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.6221256563461887, |
|
"grad_norm": 0.5014294385910034, |
|
"learning_rate": 6.340496080806914e-05, |
|
"loss": 7.8206, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.6228499004164404, |
|
"grad_norm": 0.4852553904056549, |
|
"learning_rate": 6.319180586278996e-05, |
|
"loss": 7.8383, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.623574144486692, |
|
"grad_norm": 0.4782876670360565, |
|
"learning_rate": 6.297884418960691e-05, |
|
"loss": 7.8477, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.6242983885569436, |
|
"grad_norm": 0.49755236506462097, |
|
"learning_rate": 6.276607690673688e-05, |
|
"loss": 7.8563, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.6250226326271954, |
|
"grad_norm": 0.485841304063797, |
|
"learning_rate": 6.255350513137626e-05, |
|
"loss": 7.8672, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.625746876697447, |
|
"grad_norm": 0.4920748770236969, |
|
"learning_rate": 6.234112997969475e-05, |
|
"loss": 7.8371, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6264711207676987, |
|
"grad_norm": 0.4922964572906494, |
|
"learning_rate": 6.212895256682964e-05, |
|
"loss": 7.7465, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6271953648379504, |
|
"grad_norm": 0.46496015787124634, |
|
"learning_rate": 6.191697400688001e-05, |
|
"loss": 7.8745, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.6279196089082021, |
|
"grad_norm": 0.4966541826725006, |
|
"learning_rate": 6.170519541290072e-05, |
|
"loss": 7.8253, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6286438529784537, |
|
"grad_norm": 0.47178417444229126, |
|
"learning_rate": 6.149361789689674e-05, |
|
"loss": 7.8381, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6293680970487054, |
|
"grad_norm": 0.4999862611293793, |
|
"learning_rate": 6.128224256981706e-05, |
|
"loss": 7.796, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.6300923411189571, |
|
"grad_norm": 0.5017781853675842, |
|
"learning_rate": 6.107107054154924e-05, |
|
"loss": 7.8559, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6308165851892088, |
|
"grad_norm": 0.46709203720092773, |
|
"learning_rate": 6.0860102920913196e-05, |
|
"loss": 7.8881, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.6315408292594604, |
|
"grad_norm": 0.49039193987846375, |
|
"learning_rate": 6.064934081565557e-05, |
|
"loss": 7.7982, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6322650733297122, |
|
"grad_norm": 0.4402787685394287, |
|
"learning_rate": 6.0438785332443946e-05, |
|
"loss": 7.8388, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6329893173999638, |
|
"grad_norm": 0.48174357414245605, |
|
"learning_rate": 6.0228437576860874e-05, |
|
"loss": 7.8222, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.6337135614702155, |
|
"grad_norm": 0.4780054986476898, |
|
"learning_rate": 6.0018298653398345e-05, |
|
"loss": 7.8427, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6344378055404671, |
|
"grad_norm": 0.48912590742111206, |
|
"learning_rate": 5.980836966545158e-05, |
|
"loss": 7.8059, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.6351620496107188, |
|
"grad_norm": 0.5173190832138062, |
|
"learning_rate": 5.9598651715313715e-05, |
|
"loss": 7.8253, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.6358862936809705, |
|
"grad_norm": 0.4782562255859375, |
|
"learning_rate": 5.9389145904169595e-05, |
|
"loss": 7.8532, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.6366105377512221, |
|
"grad_norm": 0.47767338156700134, |
|
"learning_rate": 5.917985333209022e-05, |
|
"loss": 7.8198, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.6373347818214739, |
|
"grad_norm": 0.4958306849002838, |
|
"learning_rate": 5.8970775098026973e-05, |
|
"loss": 7.8114, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6380590258917255, |
|
"grad_norm": 0.49203237891197205, |
|
"learning_rate": 5.87619122998057e-05, |
|
"loss": 7.8273, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.6387832699619772, |
|
"grad_norm": 0.5006452798843384, |
|
"learning_rate": 5.8553266034121124e-05, |
|
"loss": 7.7925, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.6395075140322288, |
|
"grad_norm": 0.4803057312965393, |
|
"learning_rate": 5.834483739653089e-05, |
|
"loss": 7.8057, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.6402317581024806, |
|
"grad_norm": 0.4590706527233124, |
|
"learning_rate": 5.813662748145008e-05, |
|
"loss": 7.8248, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.6409560021727322, |
|
"grad_norm": 0.4630698263645172, |
|
"learning_rate": 5.7928637382145203e-05, |
|
"loss": 7.8183, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.6416802462429839, |
|
"grad_norm": 0.5254594683647156, |
|
"learning_rate": 5.772086819072853e-05, |
|
"loss": 7.7863, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.6424044903132355, |
|
"grad_norm": 0.49607181549072266, |
|
"learning_rate": 5.75133209981525e-05, |
|
"loss": 7.8268, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.6431287343834873, |
|
"grad_norm": 0.4501619338989258, |
|
"learning_rate": 5.73059968942038e-05, |
|
"loss": 7.8368, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.6438529784537389, |
|
"grad_norm": 0.49222031235694885, |
|
"learning_rate": 5.7098896967497775e-05, |
|
"loss": 7.7554, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.6445772225239906, |
|
"grad_norm": 0.45921745896339417, |
|
"learning_rate": 5.689202230547259e-05, |
|
"loss": 7.8255, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6453014665942423, |
|
"grad_norm": 0.4936200976371765, |
|
"learning_rate": 5.668537399438374e-05, |
|
"loss": 7.8921, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.6460257106644939, |
|
"grad_norm": 0.4984248876571655, |
|
"learning_rate": 5.647895311929803e-05, |
|
"loss": 7.8222, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.6467499547347456, |
|
"grad_norm": 0.47879624366760254, |
|
"learning_rate": 5.627276076408807e-05, |
|
"loss": 7.9071, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.6474741988049972, |
|
"grad_norm": 0.49307674169540405, |
|
"learning_rate": 5.6066798011426737e-05, |
|
"loss": 7.7861, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.648198442875249, |
|
"grad_norm": 0.5343567728996277, |
|
"learning_rate": 5.586106594278109e-05, |
|
"loss": 7.8635, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.6489226869455006, |
|
"grad_norm": 0.5100454092025757, |
|
"learning_rate": 5.565556563840699e-05, |
|
"loss": 7.7338, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.6496469310157523, |
|
"grad_norm": 0.4881967306137085, |
|
"learning_rate": 5.5450298177343466e-05, |
|
"loss": 7.8221, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.650371175086004, |
|
"grad_norm": 0.5178599953651428, |
|
"learning_rate": 5.524526463740678e-05, |
|
"loss": 7.7642, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.6510954191562557, |
|
"grad_norm": 0.5256778001785278, |
|
"learning_rate": 5.5040466095184965e-05, |
|
"loss": 7.778, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.6518196632265073, |
|
"grad_norm": 0.5190709829330444, |
|
"learning_rate": 5.4835903626032195e-05, |
|
"loss": 7.8178, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.652543907296759, |
|
"grad_norm": 0.49377232789993286, |
|
"learning_rate": 5.4631578304063e-05, |
|
"loss": 7.8374, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.6532681513670107, |
|
"grad_norm": 0.4822869598865509, |
|
"learning_rate": 5.442749120214672e-05, |
|
"loss": 7.8543, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.6539923954372624, |
|
"grad_norm": 0.5066609978675842, |
|
"learning_rate": 5.4223643391901916e-05, |
|
"loss": 7.8336, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.654716639507514, |
|
"grad_norm": 0.49571654200553894, |
|
"learning_rate": 5.402003594369053e-05, |
|
"loss": 7.7989, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.6554408835777658, |
|
"grad_norm": 0.47712838649749756, |
|
"learning_rate": 5.3816669926612605e-05, |
|
"loss": 7.8491, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.6561651276480174, |
|
"grad_norm": 0.5059483647346497, |
|
"learning_rate": 5.361354640850029e-05, |
|
"loss": 7.8269, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.6568893717182691, |
|
"grad_norm": 0.4979402422904968, |
|
"learning_rate": 5.3410666455912604e-05, |
|
"loss": 7.7683, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.6576136157885207, |
|
"grad_norm": 0.4917674660682678, |
|
"learning_rate": 5.320803113412951e-05, |
|
"loss": 7.8254, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.6583378598587724, |
|
"grad_norm": 0.4933214783668518, |
|
"learning_rate": 5.3005641507146495e-05, |
|
"loss": 7.8163, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.6590621039290241, |
|
"grad_norm": 0.4836273193359375, |
|
"learning_rate": 5.280349863766906e-05, |
|
"loss": 7.8663, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6597863479992757, |
|
"grad_norm": 0.45807477831840515, |
|
"learning_rate": 5.2601603587106863e-05, |
|
"loss": 7.8553, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.6605105920695274, |
|
"grad_norm": 0.4703587293624878, |
|
"learning_rate": 5.239995741556848e-05, |
|
"loss": 7.8577, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.6612348361397791, |
|
"grad_norm": 0.48380714654922485, |
|
"learning_rate": 5.2198561181855574e-05, |
|
"loss": 7.8219, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.6619590802100308, |
|
"grad_norm": 0.4754285216331482, |
|
"learning_rate": 5.199741594345744e-05, |
|
"loss": 7.8467, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.6626833242802824, |
|
"grad_norm": 0.4489299952983856, |
|
"learning_rate": 5.179652275654554e-05, |
|
"loss": 7.8807, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.6634075683505342, |
|
"grad_norm": 0.4758758842945099, |
|
"learning_rate": 5.1595882675967755e-05, |
|
"loss": 7.7741, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.6641318124207858, |
|
"grad_norm": 0.49364110827445984, |
|
"learning_rate": 5.139549675524301e-05, |
|
"loss": 7.8486, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.6648560564910375, |
|
"grad_norm": 0.48365652561187744, |
|
"learning_rate": 5.1195366046555656e-05, |
|
"loss": 7.8576, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.6655803005612891, |
|
"grad_norm": 0.4603043794631958, |
|
"learning_rate": 5.099549160075008e-05, |
|
"loss": 7.8672, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.6663045446315409, |
|
"grad_norm": 0.46025213599205017, |
|
"learning_rate": 5.079587446732493e-05, |
|
"loss": 7.8447, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6670287887017925, |
|
"grad_norm": 0.47760942578315735, |
|
"learning_rate": 5.059651569442794e-05, |
|
"loss": 7.8622, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.6677530327720442, |
|
"grad_norm": 0.4995022416114807, |
|
"learning_rate": 5.039741632885009e-05, |
|
"loss": 7.7676, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.6684772768422959, |
|
"grad_norm": 0.48363152146339417, |
|
"learning_rate": 5.01985774160203e-05, |
|
"loss": 7.8692, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.6692015209125475, |
|
"grad_norm": 0.4707562029361725, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 7.8341, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.6699257649827992, |
|
"grad_norm": 0.46814507246017456, |
|
"learning_rate": 4.980168512347747e-05, |
|
"loss": 7.864, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6706500090530508, |
|
"grad_norm": 0.4722250699996948, |
|
"learning_rate": 4.9603633827762484e-05, |
|
"loss": 7.8269, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.6713742531233026, |
|
"grad_norm": 0.45258694887161255, |
|
"learning_rate": 4.940584715278075e-05, |
|
"loss": 7.8147, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.6720984971935542, |
|
"grad_norm": 0.4681549370288849, |
|
"learning_rate": 4.9208326137068625e-05, |
|
"loss": 7.8421, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.6728227412638059, |
|
"grad_norm": 0.49207526445388794, |
|
"learning_rate": 4.901107181776743e-05, |
|
"loss": 7.7759, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.6735469853340575, |
|
"grad_norm": 0.4637949764728546, |
|
"learning_rate": 4.881408523061813e-05, |
|
"loss": 7.8506, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6742712294043093, |
|
"grad_norm": 0.5171796679496765, |
|
"learning_rate": 4.861736740995601e-05, |
|
"loss": 7.8593, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.6749954734745609, |
|
"grad_norm": 0.47917360067367554, |
|
"learning_rate": 4.8420919388704925e-05, |
|
"loss": 7.8142, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.6757197175448126, |
|
"grad_norm": 0.4940485656261444, |
|
"learning_rate": 4.822474219837225e-05, |
|
"loss": 7.7892, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.6764439616150643, |
|
"grad_norm": 0.5015715956687927, |
|
"learning_rate": 4.802883686904318e-05, |
|
"loss": 7.9136, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.677168205685316, |
|
"grad_norm": 0.4832720160484314, |
|
"learning_rate": 4.7833204429375454e-05, |
|
"loss": 7.7575, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.6778924497555676, |
|
"grad_norm": 0.45720604062080383, |
|
"learning_rate": 4.763784590659387e-05, |
|
"loss": 7.8713, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.6786166938258194, |
|
"grad_norm": 0.47305944561958313, |
|
"learning_rate": 4.744276232648508e-05, |
|
"loss": 7.7939, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.679340937896071, |
|
"grad_norm": 0.4715365171432495, |
|
"learning_rate": 4.724795471339195e-05, |
|
"loss": 7.806, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.6800651819663226, |
|
"grad_norm": 0.468777060508728, |
|
"learning_rate": 4.7053424090208295e-05, |
|
"loss": 7.7986, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.6807894260365743, |
|
"grad_norm": 0.47867533564567566, |
|
"learning_rate": 4.685917147837364e-05, |
|
"loss": 7.8439, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.681513670106826, |
|
"grad_norm": 0.4867823123931885, |
|
"learning_rate": 4.666519789786756e-05, |
|
"loss": 7.8547, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.6822379141770777, |
|
"grad_norm": 0.48606693744659424, |
|
"learning_rate": 4.6471504367204674e-05, |
|
"loss": 7.836, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.6829621582473293, |
|
"grad_norm": 0.4838757812976837, |
|
"learning_rate": 4.6278091903428945e-05, |
|
"loss": 7.8131, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.683686402317581, |
|
"grad_norm": 0.46455007791519165, |
|
"learning_rate": 4.608496152210867e-05, |
|
"loss": 7.8649, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.6844106463878327, |
|
"grad_norm": 0.5098406076431274, |
|
"learning_rate": 4.589211423733087e-05, |
|
"loss": 7.8212, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.6851348904580844, |
|
"grad_norm": 0.48916125297546387, |
|
"learning_rate": 4.569955106169611e-05, |
|
"loss": 7.7915, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.685859134528336, |
|
"grad_norm": 0.525736927986145, |
|
"learning_rate": 4.5507273006313245e-05, |
|
"loss": 7.7649, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.6865833785985878, |
|
"grad_norm": 0.5023128986358643, |
|
"learning_rate": 4.531528108079387e-05, |
|
"loss": 7.7986, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.6873076226688394, |
|
"grad_norm": 0.5856012105941772, |
|
"learning_rate": 4.5123576293247364e-05, |
|
"loss": 7.7212, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.6880318667390911, |
|
"grad_norm": 0.5098285675048828, |
|
"learning_rate": 4.493215965027519e-05, |
|
"loss": 7.9312, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6887561108093427, |
|
"grad_norm": 0.5016021728515625, |
|
"learning_rate": 4.4741032156966025e-05, |
|
"loss": 7.8514, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.6894803548795945, |
|
"grad_norm": 0.4811919033527374, |
|
"learning_rate": 4.455019481689016e-05, |
|
"loss": 7.8562, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.6902045989498461, |
|
"grad_norm": 0.45924901962280273, |
|
"learning_rate": 4.435964863209437e-05, |
|
"loss": 7.8562, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.6909288430200978, |
|
"grad_norm": 0.5059201717376709, |
|
"learning_rate": 4.416939460309667e-05, |
|
"loss": 7.8266, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.6916530870903494, |
|
"grad_norm": 0.47285687923431396, |
|
"learning_rate": 4.3979433728880936e-05, |
|
"loss": 7.8448, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.6923773311606011, |
|
"grad_norm": 0.47951385378837585, |
|
"learning_rate": 4.378976700689192e-05, |
|
"loss": 7.8623, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.6931015752308528, |
|
"grad_norm": 0.45039263367652893, |
|
"learning_rate": 4.360039543302965e-05, |
|
"loss": 7.876, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.6938258193011044, |
|
"grad_norm": 0.47055962681770325, |
|
"learning_rate": 4.3411320001644576e-05, |
|
"loss": 7.8539, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.6945500633713562, |
|
"grad_norm": 0.4704470634460449, |
|
"learning_rate": 4.322254170553201e-05, |
|
"loss": 7.8378, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.6952743074416078, |
|
"grad_norm": 0.47437623143196106, |
|
"learning_rate": 4.303406153592712e-05, |
|
"loss": 7.8915, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6959985515118595, |
|
"grad_norm": 0.450575590133667, |
|
"learning_rate": 4.284588048249974e-05, |
|
"loss": 7.8715, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.6967227955821111, |
|
"grad_norm": 0.49726375937461853, |
|
"learning_rate": 4.2657999533349e-05, |
|
"loss": 7.8248, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.6974470396523629, |
|
"grad_norm": 0.5131885409355164, |
|
"learning_rate": 4.247041967499837e-05, |
|
"loss": 7.8141, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.6981712837226145, |
|
"grad_norm": 0.5051475167274475, |
|
"learning_rate": 4.228314189239021e-05, |
|
"loss": 7.7858, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.6988955277928662, |
|
"grad_norm": 0.49921953678131104, |
|
"learning_rate": 4.209616716888088e-05, |
|
"loss": 7.806, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.6996197718631179, |
|
"grad_norm": 0.4952443838119507, |
|
"learning_rate": 4.190949648623538e-05, |
|
"loss": 7.8111, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.7003440159333696, |
|
"grad_norm": 0.48354852199554443, |
|
"learning_rate": 4.172313082462218e-05, |
|
"loss": 7.8454, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.7010682600036212, |
|
"grad_norm": 0.4903525412082672, |
|
"learning_rate": 4.153707116260831e-05, |
|
"loss": 7.796, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.701792504073873, |
|
"grad_norm": 0.4461687505245209, |
|
"learning_rate": 4.135131847715391e-05, |
|
"loss": 7.8763, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.7025167481441246, |
|
"grad_norm": 0.4997590482234955, |
|
"learning_rate": 4.116587374360738e-05, |
|
"loss": 7.7374, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7032409922143762, |
|
"grad_norm": 0.4803599715232849, |
|
"learning_rate": 4.0980737935700045e-05, |
|
"loss": 7.8267, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.7039652362846279, |
|
"grad_norm": 0.4723648130893707, |
|
"learning_rate": 4.0795912025541106e-05, |
|
"loss": 7.8528, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.7046894803548795, |
|
"grad_norm": 0.4950774312019348, |
|
"learning_rate": 4.061139698361259e-05, |
|
"loss": 7.846, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.7054137244251313, |
|
"grad_norm": 0.4853728115558624, |
|
"learning_rate": 4.0427193778764307e-05, |
|
"loss": 7.8703, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.7061379684953829, |
|
"grad_norm": 0.4416038393974304, |
|
"learning_rate": 4.024330337820853e-05, |
|
"loss": 7.8747, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7068622125656346, |
|
"grad_norm": 0.4781404435634613, |
|
"learning_rate": 4.0059726747515104e-05, |
|
"loss": 7.8101, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.7075864566358863, |
|
"grad_norm": 0.4613541066646576, |
|
"learning_rate": 3.9876464850606435e-05, |
|
"loss": 7.8109, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.708310700706138, |
|
"grad_norm": 0.4910728633403778, |
|
"learning_rate": 3.969351864975216e-05, |
|
"loss": 7.8568, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.7090349447763896, |
|
"grad_norm": 0.45888233184814453, |
|
"learning_rate": 3.9510889105564454e-05, |
|
"loss": 7.7947, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.7097591888466414, |
|
"grad_norm": 0.4573422968387604, |
|
"learning_rate": 3.9328577176992595e-05, |
|
"loss": 7.8149, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.710483432916893, |
|
"grad_norm": 0.4781007170677185, |
|
"learning_rate": 3.914658382131832e-05, |
|
"loss": 7.8372, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7112076769871447, |
|
"grad_norm": 0.4993189871311188, |
|
"learning_rate": 3.8964909994150456e-05, |
|
"loss": 7.8337, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.7119319210573963, |
|
"grad_norm": 0.46670612692832947, |
|
"learning_rate": 3.8783556649420085e-05, |
|
"loss": 7.7961, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.7126561651276481, |
|
"grad_norm": 0.48190969228744507, |
|
"learning_rate": 3.860252473937559e-05, |
|
"loss": 7.8113, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.7133804091978997, |
|
"grad_norm": 0.4879286587238312, |
|
"learning_rate": 3.8421815214577454e-05, |
|
"loss": 7.8245, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.7141046532681513, |
|
"grad_norm": 0.4496977627277374, |
|
"learning_rate": 3.8241429023893494e-05, |
|
"loss": 7.8067, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.714828897338403, |
|
"grad_norm": 0.47890475392341614, |
|
"learning_rate": 3.806136711449363e-05, |
|
"loss": 7.8695, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.7155531414086547, |
|
"grad_norm": 0.46888604760169983, |
|
"learning_rate": 3.7881630431845215e-05, |
|
"loss": 7.775, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.7162773854789064, |
|
"grad_norm": 0.458762526512146, |
|
"learning_rate": 3.770221991970777e-05, |
|
"loss": 7.8523, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.717001629549158, |
|
"grad_norm": 0.4727189242839813, |
|
"learning_rate": 3.752313652012817e-05, |
|
"loss": 7.7896, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7177258736194098, |
|
"grad_norm": 0.4806780219078064, |
|
"learning_rate": 3.734438117343582e-05, |
|
"loss": 7.7772, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.7184501176896614, |
|
"grad_norm": 0.4579210877418518, |
|
"learning_rate": 3.7165954818237436e-05, |
|
"loss": 7.8326, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.7191743617599131, |
|
"grad_norm": 0.4929826259613037, |
|
"learning_rate": 3.698785839141236e-05, |
|
"loss": 7.8425, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.7198986058301647, |
|
"grad_norm": 0.5137585997581482, |
|
"learning_rate": 3.681009282810746e-05, |
|
"loss": 7.765, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.7206228499004165, |
|
"grad_norm": 0.4695113003253937, |
|
"learning_rate": 3.663265906173245e-05, |
|
"loss": 7.8759, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7213470939706681, |
|
"grad_norm": 0.5344631671905518, |
|
"learning_rate": 3.645555802395476e-05, |
|
"loss": 7.7362, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.7220713380409198, |
|
"grad_norm": 0.5240874886512756, |
|
"learning_rate": 3.6278790644694674e-05, |
|
"loss": 7.8663, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.7227955821111715, |
|
"grad_norm": 0.5019177198410034, |
|
"learning_rate": 3.610235785212064e-05, |
|
"loss": 7.6963, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.7235198261814232, |
|
"grad_norm": 0.5077367424964905, |
|
"learning_rate": 3.592626057264413e-05, |
|
"loss": 7.8491, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.7242440702516748, |
|
"grad_norm": 0.5551170706748962, |
|
"learning_rate": 3.575049973091506e-05, |
|
"loss": 7.8329, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7249683143219265, |
|
"grad_norm": 0.49724796414375305, |
|
"learning_rate": 3.5575076249816584e-05, |
|
"loss": 7.8, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.7256925583921782, |
|
"grad_norm": 0.4674682319164276, |
|
"learning_rate": 3.5399991050460655e-05, |
|
"loss": 7.8876, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.7264168024624298, |
|
"grad_norm": 0.49887341260910034, |
|
"learning_rate": 3.522524505218281e-05, |
|
"loss": 7.8503, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.7271410465326815, |
|
"grad_norm": 0.47441011667251587, |
|
"learning_rate": 3.505083917253763e-05, |
|
"loss": 7.8196, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.7278652906029331, |
|
"grad_norm": 0.4713391065597534, |
|
"learning_rate": 3.4876774327293734e-05, |
|
"loss": 7.8007, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.7285895346731849, |
|
"grad_norm": 0.4885476231575012, |
|
"learning_rate": 3.4703051430429024e-05, |
|
"loss": 7.8527, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.7293137787434365, |
|
"grad_norm": 0.4708382785320282, |
|
"learning_rate": 3.452967139412602e-05, |
|
"loss": 7.8204, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.7300380228136882, |
|
"grad_norm": 0.45463642477989197, |
|
"learning_rate": 3.435663512876679e-05, |
|
"loss": 7.8397, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.7307622668839399, |
|
"grad_norm": 0.46222880482673645, |
|
"learning_rate": 3.4183943542928496e-05, |
|
"loss": 7.8621, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.7314865109541916, |
|
"grad_norm": 0.4764266610145569, |
|
"learning_rate": 3.401159754337836e-05, |
|
"loss": 7.8469, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7322107550244432, |
|
"grad_norm": 0.4880512058734894, |
|
"learning_rate": 3.383959803506901e-05, |
|
"loss": 7.8698, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.732934999094695, |
|
"grad_norm": 0.4619225859642029, |
|
"learning_rate": 3.3667945921133734e-05, |
|
"loss": 7.8439, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.7336592431649466, |
|
"grad_norm": 0.4897652566432953, |
|
"learning_rate": 3.34966421028817e-05, |
|
"loss": 7.7954, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.7343834872351983, |
|
"grad_norm": 0.47381868958473206, |
|
"learning_rate": 3.332568747979335e-05, |
|
"loss": 7.8305, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.7351077313054499, |
|
"grad_norm": 0.4567718207836151, |
|
"learning_rate": 3.3155082949515424e-05, |
|
"loss": 7.8574, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.7358319753757017, |
|
"grad_norm": 0.4839675724506378, |
|
"learning_rate": 3.298482940785655e-05, |
|
"loss": 7.8414, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.7365562194459533, |
|
"grad_norm": 0.4948679804801941, |
|
"learning_rate": 3.2814927748782245e-05, |
|
"loss": 7.8058, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.7372804635162049, |
|
"grad_norm": 0.47992077469825745, |
|
"learning_rate": 3.26453788644105e-05, |
|
"loss": 7.7794, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.7380047075864566, |
|
"grad_norm": 0.4672059714794159, |
|
"learning_rate": 3.2476183645006854e-05, |
|
"loss": 7.8264, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.7387289516567083, |
|
"grad_norm": 0.4785839319229126, |
|
"learning_rate": 3.2307342978979847e-05, |
|
"loss": 7.8405, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.73945319572696, |
|
"grad_norm": 0.48339641094207764, |
|
"learning_rate": 3.2138857752876406e-05, |
|
"loss": 7.8538, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.7401774397972116, |
|
"grad_norm": 0.4682196378707886, |
|
"learning_rate": 3.1970728851377005e-05, |
|
"loss": 7.8156, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.7409016838674634, |
|
"grad_norm": 0.4627414643764496, |
|
"learning_rate": 3.1802957157291256e-05, |
|
"loss": 7.8184, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.741625927937715, |
|
"grad_norm": 0.4607759714126587, |
|
"learning_rate": 3.1635543551553015e-05, |
|
"loss": 7.8599, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.7423501720079667, |
|
"grad_norm": 0.4652021825313568, |
|
"learning_rate": 3.146848891321604e-05, |
|
"loss": 7.7955, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.7430744160782183, |
|
"grad_norm": 0.4668468236923218, |
|
"learning_rate": 3.130179411944909e-05, |
|
"loss": 7.8534, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.7437986601484701, |
|
"grad_norm": 0.46713390946388245, |
|
"learning_rate": 3.113546004553151e-05, |
|
"loss": 7.8634, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.7445229042187217, |
|
"grad_norm": 0.4680757224559784, |
|
"learning_rate": 3.096948756484863e-05, |
|
"loss": 7.8308, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.7452471482889734, |
|
"grad_norm": 0.48597460985183716, |
|
"learning_rate": 3.080387754888706e-05, |
|
"loss": 7.7773, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.745971392359225, |
|
"grad_norm": 0.4744811952114105, |
|
"learning_rate": 3.06386308672302e-05, |
|
"loss": 7.7997, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7466956364294768, |
|
"grad_norm": 0.4475364685058594, |
|
"learning_rate": 3.0473748387553647e-05, |
|
"loss": 7.8358, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.7474198804997284, |
|
"grad_norm": 0.4579427242279053, |
|
"learning_rate": 3.0309230975620717e-05, |
|
"loss": 7.8671, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.74814412456998, |
|
"grad_norm": 0.4434613883495331, |
|
"learning_rate": 3.0145079495277772e-05, |
|
"loss": 7.8496, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.7488683686402318, |
|
"grad_norm": 0.5059404373168945, |
|
"learning_rate": 2.9981294808449713e-05, |
|
"loss": 7.7336, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.7495926127104834, |
|
"grad_norm": 0.4605514407157898, |
|
"learning_rate": 2.9817877775135593e-05, |
|
"loss": 7.8329, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.7503168567807351, |
|
"grad_norm": 0.48954278230667114, |
|
"learning_rate": 2.965482925340386e-05, |
|
"loss": 7.8067, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.7510411008509867, |
|
"grad_norm": 0.4908781945705414, |
|
"learning_rate": 2.9492150099388127e-05, |
|
"loss": 7.9183, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.7517653449212385, |
|
"grad_norm": 0.46575653553009033, |
|
"learning_rate": 2.932984116728239e-05, |
|
"loss": 7.7655, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.7517653449212385, |
|
"eval_loss": 7.828281402587891, |
|
"eval_runtime": 4.8059, |
|
"eval_samples_per_second": 241.993, |
|
"eval_steps_per_second": 121.101, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.7524895889914901, |
|
"grad_norm": 0.46100151538848877, |
|
"learning_rate": 2.916790330933683e-05, |
|
"loss": 7.8197, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.7532138330617418, |
|
"grad_norm": 0.47532182931900024, |
|
"learning_rate": 2.9006337375853064e-05, |
|
"loss": 7.7012, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7539380771319935, |
|
"grad_norm": 0.486884742975235, |
|
"learning_rate": 2.884514421517993e-05, |
|
"loss": 7.8531, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.7546623212022452, |
|
"grad_norm": 0.4965648055076599, |
|
"learning_rate": 2.868432467370882e-05, |
|
"loss": 7.8061, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.7553865652724968, |
|
"grad_norm": 0.49399492144584656, |
|
"learning_rate": 2.852387959586934e-05, |
|
"loss": 7.7895, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.7561108093427485, |
|
"grad_norm": 0.48604780435562134, |
|
"learning_rate": 2.8363809824124964e-05, |
|
"loss": 7.8522, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.7568350534130002, |
|
"grad_norm": 0.46038612723350525, |
|
"learning_rate": 2.820411619896838e-05, |
|
"loss": 7.9004, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.7575592974832519, |
|
"grad_norm": 0.5330672860145569, |
|
"learning_rate": 2.8044799558917313e-05, |
|
"loss": 7.85, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.7582835415535035, |
|
"grad_norm": 0.5077647566795349, |
|
"learning_rate": 2.7885860740509963e-05, |
|
"loss": 7.845, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.7590077856237553, |
|
"grad_norm": 0.480844646692276, |
|
"learning_rate": 2.7727300578300674e-05, |
|
"loss": 7.7689, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.7597320296940069, |
|
"grad_norm": 0.49301832914352417, |
|
"learning_rate": 2.756911990485552e-05, |
|
"loss": 7.8533, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.7604562737642585, |
|
"grad_norm": 0.598520040512085, |
|
"learning_rate": 2.741131955074807e-05, |
|
"loss": 7.7991, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7611805178345102, |
|
"grad_norm": 0.4859587252140045, |
|
"learning_rate": 2.7253900344554795e-05, |
|
"loss": 7.8235, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.7619047619047619, |
|
"grad_norm": 0.49456787109375, |
|
"learning_rate": 2.7096863112850847e-05, |
|
"loss": 7.84, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.7626290059750136, |
|
"grad_norm": 0.4734579622745514, |
|
"learning_rate": 2.6940208680205802e-05, |
|
"loss": 7.8954, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.7633532500452652, |
|
"grad_norm": 0.4647412896156311, |
|
"learning_rate": 2.6783937869179143e-05, |
|
"loss": 7.8155, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.764077494115517, |
|
"grad_norm": 0.4898563623428345, |
|
"learning_rate": 2.662805150031612e-05, |
|
"loss": 7.8175, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.7648017381857686, |
|
"grad_norm": 0.5130847692489624, |
|
"learning_rate": 2.647255039214328e-05, |
|
"loss": 7.8289, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.7655259822560203, |
|
"grad_norm": 0.4688289761543274, |
|
"learning_rate": 2.6317435361164256e-05, |
|
"loss": 7.837, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.7662502263262719, |
|
"grad_norm": 0.4437507688999176, |
|
"learning_rate": 2.6162707221855552e-05, |
|
"loss": 7.8955, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.7669744703965237, |
|
"grad_norm": 0.44463828206062317, |
|
"learning_rate": 2.6008366786662073e-05, |
|
"loss": 7.8781, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.7676987144667753, |
|
"grad_norm": 0.44665560126304626, |
|
"learning_rate": 2.585441486599308e-05, |
|
"loss": 7.8517, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.768422958537027, |
|
"grad_norm": 0.45422571897506714, |
|
"learning_rate": 2.570085226821771e-05, |
|
"loss": 7.8811, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.7691472026072786, |
|
"grad_norm": 0.46825090050697327, |
|
"learning_rate": 2.554767979966097e-05, |
|
"loss": 7.8527, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.7698714466775304, |
|
"grad_norm": 0.4797806441783905, |
|
"learning_rate": 2.5394898264599297e-05, |
|
"loss": 7.8624, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.770595690747782, |
|
"grad_norm": 0.48127833008766174, |
|
"learning_rate": 2.5242508465256397e-05, |
|
"loss": 7.8368, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.7713199348180336, |
|
"grad_norm": 0.480258584022522, |
|
"learning_rate": 2.5090511201799172e-05, |
|
"loss": 7.8536, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.7720441788882854, |
|
"grad_norm": 0.495200514793396, |
|
"learning_rate": 2.493890727233329e-05, |
|
"loss": 7.8352, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.772768422958537, |
|
"grad_norm": 0.4558762013912201, |
|
"learning_rate": 2.478769747289912e-05, |
|
"loss": 7.8246, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.7734926670287887, |
|
"grad_norm": 0.46966373920440674, |
|
"learning_rate": 2.4636882597467593e-05, |
|
"loss": 7.8572, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.7742169110990403, |
|
"grad_norm": 0.471635103225708, |
|
"learning_rate": 2.4486463437935934e-05, |
|
"loss": 7.8459, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.7749411551692921, |
|
"grad_norm": 0.47857850790023804, |
|
"learning_rate": 2.433644078412355e-05, |
|
"loss": 7.8279, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7756653992395437, |
|
"grad_norm": 0.48017922043800354, |
|
"learning_rate": 2.418681542376785e-05, |
|
"loss": 7.8697, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.7763896433097954, |
|
"grad_norm": 0.4738040864467621, |
|
"learning_rate": 2.403758814252024e-05, |
|
"loss": 7.8073, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.777113887380047, |
|
"grad_norm": 0.46145448088645935, |
|
"learning_rate": 2.3888759723941766e-05, |
|
"loss": 7.8748, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.7778381314502988, |
|
"grad_norm": 0.4822898805141449, |
|
"learning_rate": 2.3740330949499257e-05, |
|
"loss": 7.8332, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.7785623755205504, |
|
"grad_norm": 0.47280868887901306, |
|
"learning_rate": 2.359230259856097e-05, |
|
"loss": 7.8453, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.7792866195908021, |
|
"grad_norm": 0.47516578435897827, |
|
"learning_rate": 2.3444675448392728e-05, |
|
"loss": 7.8826, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.7800108636610538, |
|
"grad_norm": 0.4623286724090576, |
|
"learning_rate": 2.3297450274153643e-05, |
|
"loss": 7.872, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.7807351077313055, |
|
"grad_norm": 0.5090032815933228, |
|
"learning_rate": 2.3150627848892248e-05, |
|
"loss": 7.8377, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.7814593518015571, |
|
"grad_norm": 0.4724178612232208, |
|
"learning_rate": 2.3004208943542215e-05, |
|
"loss": 7.8654, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.7821835958718087, |
|
"grad_norm": 0.48950740694999695, |
|
"learning_rate": 2.2858194326918435e-05, |
|
"loss": 7.882, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7829078399420605, |
|
"grad_norm": 0.4657925069332123, |
|
"learning_rate": 2.2712584765713064e-05, |
|
"loss": 7.8123, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.7836320840123121, |
|
"grad_norm": 0.5018364191055298, |
|
"learning_rate": 2.256738102449124e-05, |
|
"loss": 7.7877, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.7843563280825638, |
|
"grad_norm": 0.4764616787433624, |
|
"learning_rate": 2.2422583865687375e-05, |
|
"loss": 7.8885, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.7850805721528155, |
|
"grad_norm": 0.496661514043808, |
|
"learning_rate": 2.227819404960092e-05, |
|
"loss": 7.8584, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.7858048162230672, |
|
"grad_norm": 0.48577845096588135, |
|
"learning_rate": 2.2134212334392434e-05, |
|
"loss": 7.7898, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.7865290602933188, |
|
"grad_norm": 0.4589325189590454, |
|
"learning_rate": 2.1990639476079712e-05, |
|
"loss": 7.8669, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.7872533043635705, |
|
"grad_norm": 0.5179975032806396, |
|
"learning_rate": 2.1847476228533648e-05, |
|
"loss": 7.7761, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.7879775484338222, |
|
"grad_norm": 0.45577552914619446, |
|
"learning_rate": 2.1704723343474396e-05, |
|
"loss": 7.8018, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.7887017925040739, |
|
"grad_norm": 0.4571017622947693, |
|
"learning_rate": 2.156238157046734e-05, |
|
"loss": 7.8382, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.7894260365743255, |
|
"grad_norm": 0.49493101239204407, |
|
"learning_rate": 2.1420451656919284e-05, |
|
"loss": 7.7954, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7901502806445773, |
|
"grad_norm": 0.4955536723136902, |
|
"learning_rate": 2.1278934348074332e-05, |
|
"loss": 7.9138, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.7908745247148289, |
|
"grad_norm": 0.5027803182601929, |
|
"learning_rate": 2.1137830387010206e-05, |
|
"loss": 7.7502, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.7915987687850806, |
|
"grad_norm": 0.47927114367485046, |
|
"learning_rate": 2.099714051463415e-05, |
|
"loss": 7.8775, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.7923230128553322, |
|
"grad_norm": 0.47723886370658875, |
|
"learning_rate": 2.085686546967909e-05, |
|
"loss": 7.7893, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.793047256925584, |
|
"grad_norm": 0.5063787698745728, |
|
"learning_rate": 2.0717005988699887e-05, |
|
"loss": 7.8202, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.7937715009958356, |
|
"grad_norm": 0.4967922270298004, |
|
"learning_rate": 2.0577562806069238e-05, |
|
"loss": 7.8676, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.7944957450660872, |
|
"grad_norm": 0.4945312738418579, |
|
"learning_rate": 2.0438536653974073e-05, |
|
"loss": 7.7993, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.795219989136339, |
|
"grad_norm": 0.47871074080467224, |
|
"learning_rate": 2.029992826241145e-05, |
|
"loss": 7.8309, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.7959442332065906, |
|
"grad_norm": 0.5307055115699768, |
|
"learning_rate": 2.016173835918498e-05, |
|
"loss": 7.7908, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.7966684772768423, |
|
"grad_norm": 0.5632224678993225, |
|
"learning_rate": 2.0023967669900777e-05, |
|
"loss": 7.7806, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7973927213470939, |
|
"grad_norm": 0.4810802638530731, |
|
"learning_rate": 1.9886616917963785e-05, |
|
"loss": 7.8422, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.7981169654173457, |
|
"grad_norm": 0.49098509550094604, |
|
"learning_rate": 1.9749686824573987e-05, |
|
"loss": 7.8427, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.7988412094875973, |
|
"grad_norm": 0.4845903217792511, |
|
"learning_rate": 1.9613178108722507e-05, |
|
"loss": 7.8179, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.799565453557849, |
|
"grad_norm": 0.4725109040737152, |
|
"learning_rate": 1.9477091487187983e-05, |
|
"loss": 7.8455, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.8002896976281006, |
|
"grad_norm": 0.49514099955558777, |
|
"learning_rate": 1.9341427674532643e-05, |
|
"loss": 7.8411, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.8010139416983524, |
|
"grad_norm": 0.4852292239665985, |
|
"learning_rate": 1.9206187383098694e-05, |
|
"loss": 7.85, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.801738185768604, |
|
"grad_norm": 0.505436897277832, |
|
"learning_rate": 1.9071371323004494e-05, |
|
"loss": 7.8205, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.8024624298388557, |
|
"grad_norm": 0.4951551556587219, |
|
"learning_rate": 1.893698020214082e-05, |
|
"loss": 7.8409, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.8031866739091074, |
|
"grad_norm": 0.49560704827308655, |
|
"learning_rate": 1.8803014726167266e-05, |
|
"loss": 7.8177, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.8039109179793591, |
|
"grad_norm": 0.4628080725669861, |
|
"learning_rate": 1.866947559850839e-05, |
|
"loss": 7.8229, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8046351620496107, |
|
"grad_norm": 0.47996366024017334, |
|
"learning_rate": 1.853636352035012e-05, |
|
"loss": 7.8377, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.8053594061198623, |
|
"grad_norm": 0.48397380113601685, |
|
"learning_rate": 1.840367919063598e-05, |
|
"loss": 7.8254, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.8060836501901141, |
|
"grad_norm": 0.47569262981414795, |
|
"learning_rate": 1.8271423306063564e-05, |
|
"loss": 7.8227, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.8068078942603657, |
|
"grad_norm": 0.47170665860176086, |
|
"learning_rate": 1.8139596561080696e-05, |
|
"loss": 7.8185, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.8075321383306174, |
|
"grad_norm": 0.48356300592422485, |
|
"learning_rate": 1.800819964788196e-05, |
|
"loss": 7.847, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.8082563824008691, |
|
"grad_norm": 0.4541751444339752, |
|
"learning_rate": 1.7877233256404956e-05, |
|
"loss": 7.861, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.8089806264711208, |
|
"grad_norm": 0.43556642532348633, |
|
"learning_rate": 1.7746698074326638e-05, |
|
"loss": 7.8939, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.8097048705413724, |
|
"grad_norm": 0.47422105073928833, |
|
"learning_rate": 1.761659478705989e-05, |
|
"loss": 7.8543, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.8104291146116241, |
|
"grad_norm": 0.501738965511322, |
|
"learning_rate": 1.7486924077749712e-05, |
|
"loss": 7.8054, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.8111533586818758, |
|
"grad_norm": 0.48210737109184265, |
|
"learning_rate": 1.7357686627269788e-05, |
|
"loss": 7.889, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8118776027521275, |
|
"grad_norm": 0.5010181069374084, |
|
"learning_rate": 1.7228883114218775e-05, |
|
"loss": 7.8183, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.8126018468223791, |
|
"grad_norm": 0.5056195855140686, |
|
"learning_rate": 1.710051421491694e-05, |
|
"loss": 7.8112, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.8133260908926309, |
|
"grad_norm": 0.49855145812034607, |
|
"learning_rate": 1.6972580603402364e-05, |
|
"loss": 7.788, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.8140503349628825, |
|
"grad_norm": 0.479667603969574, |
|
"learning_rate": 1.6845082951427572e-05, |
|
"loss": 7.8281, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.8147745790331342, |
|
"grad_norm": 0.4779108464717865, |
|
"learning_rate": 1.671802192845594e-05, |
|
"loss": 7.8274, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8154988231033858, |
|
"grad_norm": 0.4873594641685486, |
|
"learning_rate": 1.659139820165825e-05, |
|
"loss": 7.8795, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.8162230671736375, |
|
"grad_norm": 0.47458213567733765, |
|
"learning_rate": 1.6465212435909073e-05, |
|
"loss": 7.8462, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.8169473112438892, |
|
"grad_norm": 0.4490852355957031, |
|
"learning_rate": 1.6339465293783328e-05, |
|
"loss": 7.8258, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.8176715553141408, |
|
"grad_norm": 0.4825502038002014, |
|
"learning_rate": 1.6214157435552914e-05, |
|
"loss": 7.8331, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.8183957993843926, |
|
"grad_norm": 0.4452913701534271, |
|
"learning_rate": 1.6089289519183036e-05, |
|
"loss": 7.8999, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8191200434546442, |
|
"grad_norm": 0.4826939105987549, |
|
"learning_rate": 1.596486220032888e-05, |
|
"loss": 7.8164, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.8198442875248959, |
|
"grad_norm": 0.46260425448417664, |
|
"learning_rate": 1.5840876132332195e-05, |
|
"loss": 7.8404, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.8205685315951475, |
|
"grad_norm": 0.476392537355423, |
|
"learning_rate": 1.571733196621774e-05, |
|
"loss": 7.8025, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.8212927756653993, |
|
"grad_norm": 0.43944284319877625, |
|
"learning_rate": 1.5594230350690044e-05, |
|
"loss": 7.8283, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.8220170197356509, |
|
"grad_norm": 0.48247215151786804, |
|
"learning_rate": 1.547157193212977e-05, |
|
"loss": 7.7797, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.8227412638059026, |
|
"grad_norm": 0.4725157618522644, |
|
"learning_rate": 1.5349357354590555e-05, |
|
"loss": 7.844, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.8234655078761542, |
|
"grad_norm": 0.46587786078453064, |
|
"learning_rate": 1.5227587259795462e-05, |
|
"loss": 7.8471, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.824189751946406, |
|
"grad_norm": 0.4861607253551483, |
|
"learning_rate": 1.5106262287133643e-05, |
|
"loss": 7.745, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.8249139960166576, |
|
"grad_norm": 0.4683452248573303, |
|
"learning_rate": 1.4985383073657112e-05, |
|
"loss": 7.7614, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.8256382400869093, |
|
"grad_norm": 0.47723594307899475, |
|
"learning_rate": 1.4864950254077181e-05, |
|
"loss": 7.8326, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.826362484157161, |
|
"grad_norm": 0.49085745215415955, |
|
"learning_rate": 1.4744964460761312e-05, |
|
"loss": 7.7305, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.8270867282274127, |
|
"grad_norm": 0.5058842301368713, |
|
"learning_rate": 1.4625426323729708e-05, |
|
"loss": 7.7709, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.8278109722976643, |
|
"grad_norm": 0.47824400663375854, |
|
"learning_rate": 1.4506336470651982e-05, |
|
"loss": 7.7977, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.8285352163679159, |
|
"grad_norm": 0.5179119110107422, |
|
"learning_rate": 1.438769552684398e-05, |
|
"loss": 7.7603, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.8292594604381677, |
|
"grad_norm": 0.4722190201282501, |
|
"learning_rate": 1.4269504115264376e-05, |
|
"loss": 7.7913, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.8299837045084193, |
|
"grad_norm": 0.47829577326774597, |
|
"learning_rate": 1.4151762856511419e-05, |
|
"loss": 7.8386, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.830707948578671, |
|
"grad_norm": 0.47289183735847473, |
|
"learning_rate": 1.4034472368819718e-05, |
|
"loss": 7.8422, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.8314321926489227, |
|
"grad_norm": 0.502065122127533, |
|
"learning_rate": 1.391763326805704e-05, |
|
"loss": 7.8705, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.8321564367191744, |
|
"grad_norm": 0.529735267162323, |
|
"learning_rate": 1.3801246167720904e-05, |
|
"loss": 7.7626, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.832880680789426, |
|
"grad_norm": 0.5681172609329224, |
|
"learning_rate": 1.3685311678935575e-05, |
|
"loss": 7.8454, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8336049248596777, |
|
"grad_norm": 0.4957883954048157, |
|
"learning_rate": 1.3569830410448658e-05, |
|
"loss": 7.8634, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.8343291689299294, |
|
"grad_norm": 0.4802315831184387, |
|
"learning_rate": 1.34548029686281e-05, |
|
"loss": 7.8578, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.8350534130001811, |
|
"grad_norm": 0.49144551157951355, |
|
"learning_rate": 1.3340229957458783e-05, |
|
"loss": 7.802, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.8357776570704327, |
|
"grad_norm": 0.457791805267334, |
|
"learning_rate": 1.3226111978539524e-05, |
|
"loss": 7.8177, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.8365019011406845, |
|
"grad_norm": 0.4937816858291626, |
|
"learning_rate": 1.3112449631079892e-05, |
|
"loss": 7.8694, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.8372261452109361, |
|
"grad_norm": 0.4611557126045227, |
|
"learning_rate": 1.2999243511896974e-05, |
|
"loss": 7.8976, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.8379503892811878, |
|
"grad_norm": 0.4825960099697113, |
|
"learning_rate": 1.2886494215412348e-05, |
|
"loss": 7.8651, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.8386746333514394, |
|
"grad_norm": 0.4975496530532837, |
|
"learning_rate": 1.2774202333648844e-05, |
|
"loss": 7.8353, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.8393988774216911, |
|
"grad_norm": 0.46706926822662354, |
|
"learning_rate": 1.2662368456227602e-05, |
|
"loss": 7.8567, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.8401231214919428, |
|
"grad_norm": 0.46040868759155273, |
|
"learning_rate": 1.25509931703648e-05, |
|
"loss": 7.8983, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8408473655621944, |
|
"grad_norm": 0.4722301661968231, |
|
"learning_rate": 1.2440077060868638e-05, |
|
"loss": 7.8863, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.8415716096324461, |
|
"grad_norm": 0.47437435388565063, |
|
"learning_rate": 1.2329620710136358e-05, |
|
"loss": 7.8298, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.8422958537026978, |
|
"grad_norm": 0.4701695144176483, |
|
"learning_rate": 1.2219624698151033e-05, |
|
"loss": 7.8605, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.8430200977729495, |
|
"grad_norm": 0.4844394326210022, |
|
"learning_rate": 1.2110089602478624e-05, |
|
"loss": 7.8583, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.8437443418432011, |
|
"grad_norm": 0.4796792268753052, |
|
"learning_rate": 1.2001015998264886e-05, |
|
"loss": 7.8471, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.8444685859134529, |
|
"grad_norm": 0.4662293493747711, |
|
"learning_rate": 1.1892404458232454e-05, |
|
"loss": 7.9045, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.8451928299837045, |
|
"grad_norm": 0.49967148900032043, |
|
"learning_rate": 1.178425555267768e-05, |
|
"loss": 7.847, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.8459170740539562, |
|
"grad_norm": 0.45847171545028687, |
|
"learning_rate": 1.1676569849467733e-05, |
|
"loss": 7.8271, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.8466413181242078, |
|
"grad_norm": 0.4743190407752991, |
|
"learning_rate": 1.1569347914037664e-05, |
|
"loss": 7.8559, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.8473655621944596, |
|
"grad_norm": 0.48577260971069336, |
|
"learning_rate": 1.1462590309387289e-05, |
|
"loss": 7.8242, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8480898062647112, |
|
"grad_norm": 0.4353601932525635, |
|
"learning_rate": 1.1356297596078425e-05, |
|
"loss": 7.8549, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.8488140503349629, |
|
"grad_norm": 0.46052125096321106, |
|
"learning_rate": 1.125047033223171e-05, |
|
"loss": 7.8489, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.8495382944052146, |
|
"grad_norm": 0.4780481159687042, |
|
"learning_rate": 1.1145109073523951e-05, |
|
"loss": 7.8461, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.8502625384754662, |
|
"grad_norm": 0.48932063579559326, |
|
"learning_rate": 1.1040214373184954e-05, |
|
"loss": 7.8926, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.8509867825457179, |
|
"grad_norm": 0.5182289481163025, |
|
"learning_rate": 1.0935786781994738e-05, |
|
"loss": 7.8103, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.8517110266159695, |
|
"grad_norm": 0.4565328359603882, |
|
"learning_rate": 1.0831826848280679e-05, |
|
"loss": 7.853, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.8524352706862213, |
|
"grad_norm": 0.4468229115009308, |
|
"learning_rate": 1.0728335117914534e-05, |
|
"loss": 7.8157, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.8531595147564729, |
|
"grad_norm": 0.4953078329563141, |
|
"learning_rate": 1.0625312134309662e-05, |
|
"loss": 7.8499, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.8538837588267246, |
|
"grad_norm": 0.4719950258731842, |
|
"learning_rate": 1.052275843841809e-05, |
|
"loss": 7.8703, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.8546080028969762, |
|
"grad_norm": 0.506100594997406, |
|
"learning_rate": 1.042067456872775e-05, |
|
"loss": 7.7924, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.855332246967228, |
|
"grad_norm": 0.46659165620803833, |
|
"learning_rate": 1.0319061061259606e-05, |
|
"loss": 7.8599, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.8560564910374796, |
|
"grad_norm": 0.4609448313713074, |
|
"learning_rate": 1.0217918449564812e-05, |
|
"loss": 7.8043, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.8567807351077313, |
|
"grad_norm": 0.4527445435523987, |
|
"learning_rate": 1.0117247264722008e-05, |
|
"loss": 7.8937, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.857504979177983, |
|
"grad_norm": 0.478190153837204, |
|
"learning_rate": 1.0017048035334408e-05, |
|
"loss": 7.8079, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.8582292232482347, |
|
"grad_norm": 0.46092936396598816, |
|
"learning_rate": 9.917321287527148e-06, |
|
"loss": 7.8575, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.8589534673184863, |
|
"grad_norm": 0.4953816533088684, |
|
"learning_rate": 9.81806754494441e-06, |
|
"loss": 7.8281, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.859677711388738, |
|
"grad_norm": 0.4743238687515259, |
|
"learning_rate": 9.719287328746773e-06, |
|
"loss": 7.8017, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.8604019554589897, |
|
"grad_norm": 0.48036321997642517, |
|
"learning_rate": 9.620981157608389e-06, |
|
"loss": 7.8096, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.8611261995292414, |
|
"grad_norm": 0.4978514611721039, |
|
"learning_rate": 9.523149547714327e-06, |
|
"loss": 7.7873, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.861850443599493, |
|
"grad_norm": 0.5149461627006531, |
|
"learning_rate": 9.425793012757812e-06, |
|
"loss": 7.8773, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8625746876697447, |
|
"grad_norm": 0.4775876998901367, |
|
"learning_rate": 9.328912063937544e-06, |
|
"loss": 7.8557, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.8632989317399964, |
|
"grad_norm": 0.4656752943992615, |
|
"learning_rate": 9.232507209955077e-06, |
|
"loss": 7.8102, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.864023175810248, |
|
"grad_norm": 0.49488770961761475, |
|
"learning_rate": 9.136578957011998e-06, |
|
"loss": 7.8236, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.8647474198804997, |
|
"grad_norm": 0.4837650656700134, |
|
"learning_rate": 9.04112780880747e-06, |
|
"loss": 7.7681, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.8654716639507514, |
|
"grad_norm": 0.4603443443775177, |
|
"learning_rate": 8.946154266535366e-06, |
|
"loss": 7.8176, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.8661959080210031, |
|
"grad_norm": 0.4513426423072815, |
|
"learning_rate": 8.851658828881858e-06, |
|
"loss": 7.8717, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.8669201520912547, |
|
"grad_norm": 0.48408597707748413, |
|
"learning_rate": 8.757641992022614e-06, |
|
"loss": 7.758, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.8676443961615065, |
|
"grad_norm": 0.5038301348686218, |
|
"learning_rate": 8.664104249620298e-06, |
|
"loss": 7.8486, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.8683686402317581, |
|
"grad_norm": 0.5601458549499512, |
|
"learning_rate": 8.571046092821955e-06, |
|
"loss": 7.713, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.8690928843020098, |
|
"grad_norm": 0.5590780377388, |
|
"learning_rate": 8.478468010256425e-06, |
|
"loss": 7.7364, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8698171283722614, |
|
"grad_norm": 0.4802273213863373, |
|
"learning_rate": 8.386370488031759e-06, |
|
"loss": 7.8386, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.8705413724425132, |
|
"grad_norm": 0.5128775238990784, |
|
"learning_rate": 8.294754009732696e-06, |
|
"loss": 7.7979, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.8712656165127648, |
|
"grad_norm": 0.49584537744522095, |
|
"learning_rate": 8.203619056418155e-06, |
|
"loss": 7.7934, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.8719898605830165, |
|
"grad_norm": 0.47992539405822754, |
|
"learning_rate": 8.112966106618602e-06, |
|
"loss": 7.8282, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.8727141046532682, |
|
"grad_norm": 0.4876195788383484, |
|
"learning_rate": 8.022795636333636e-06, |
|
"loss": 7.8194, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.8734383487235198, |
|
"grad_norm": 0.49370482563972473, |
|
"learning_rate": 7.933108119029475e-06, |
|
"loss": 7.8294, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.8741625927937715, |
|
"grad_norm": 0.47466379404067993, |
|
"learning_rate": 7.843904025636417e-06, |
|
"loss": 7.8737, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.8748868368640231, |
|
"grad_norm": 0.5090891122817993, |
|
"learning_rate": 7.75518382454643e-06, |
|
"loss": 7.8391, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.8756110809342749, |
|
"grad_norm": 0.5014375448226929, |
|
"learning_rate": 7.66694798161064e-06, |
|
"loss": 7.8252, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.8763353250045265, |
|
"grad_norm": 0.46806249022483826, |
|
"learning_rate": 7.579196960136959e-06, |
|
"loss": 7.8172, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8770595690747782, |
|
"grad_norm": 0.4725843071937561, |
|
"learning_rate": 7.4919312208875385e-06, |
|
"loss": 7.8868, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.8777838131450298, |
|
"grad_norm": 0.5052827000617981, |
|
"learning_rate": 7.405151222076434e-06, |
|
"loss": 7.8121, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.8785080572152816, |
|
"grad_norm": 0.4352813959121704, |
|
"learning_rate": 7.318857419367242e-06, |
|
"loss": 7.8594, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.8792323012855332, |
|
"grad_norm": 0.4714949429035187, |
|
"learning_rate": 7.233050265870534e-06, |
|
"loss": 7.8642, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.8799565453557849, |
|
"grad_norm": 0.48388224840164185, |
|
"learning_rate": 7.147730212141701e-06, |
|
"loss": 7.8853, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.8806807894260366, |
|
"grad_norm": 0.48352184891700745, |
|
"learning_rate": 7.062897706178384e-06, |
|
"loss": 7.8965, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.8814050334962883, |
|
"grad_norm": 0.46383845806121826, |
|
"learning_rate": 6.9785531934182915e-06, |
|
"loss": 7.8627, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.8821292775665399, |
|
"grad_norm": 0.48393815755844116, |
|
"learning_rate": 6.894697116736715e-06, |
|
"loss": 7.874, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.8828535216367916, |
|
"grad_norm": 0.4774603843688965, |
|
"learning_rate": 6.8113299164443205e-06, |
|
"loss": 7.8833, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.8835777657070433, |
|
"grad_norm": 0.47935643792152405, |
|
"learning_rate": 6.728452030284738e-06, |
|
"loss": 7.8931, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8843020097772949, |
|
"grad_norm": 0.45838284492492676, |
|
"learning_rate": 6.646063893432364e-06, |
|
"loss": 7.8718, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.8850262538475466, |
|
"grad_norm": 0.4871087074279785, |
|
"learning_rate": 6.564165938489996e-06, |
|
"loss": 7.8648, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.8857504979177983, |
|
"grad_norm": 0.4909784495830536, |
|
"learning_rate": 6.482758595486571e-06, |
|
"loss": 7.793, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.88647474198805, |
|
"grad_norm": 0.4485389292240143, |
|
"learning_rate": 6.401842291874982e-06, |
|
"loss": 7.8831, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.8871989860583016, |
|
"grad_norm": 0.479890376329422, |
|
"learning_rate": 6.32141745252971e-06, |
|
"loss": 7.8632, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.8879232301285533, |
|
"grad_norm": 0.456013023853302, |
|
"learning_rate": 6.241484499744732e-06, |
|
"loss": 7.8373, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.888647474198805, |
|
"grad_norm": 0.4660279452800751, |
|
"learning_rate": 6.16204385323117e-06, |
|
"loss": 7.8827, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.8893717182690567, |
|
"grad_norm": 0.4719277620315552, |
|
"learning_rate": 6.083095930115157e-06, |
|
"loss": 7.7943, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.8900959623393083, |
|
"grad_norm": 0.5206422209739685, |
|
"learning_rate": 6.004641144935696e-06, |
|
"loss": 7.7849, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.8908202064095601, |
|
"grad_norm": 0.4772622585296631, |
|
"learning_rate": 5.926679909642341e-06, |
|
"loss": 7.8206, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8915444504798117, |
|
"grad_norm": 0.4761473536491394, |
|
"learning_rate": 5.849212633593193e-06, |
|
"loss": 7.83, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.8922686945500634, |
|
"grad_norm": 0.46055835485458374, |
|
"learning_rate": 5.77223972355263e-06, |
|
"loss": 7.8043, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.892992938620315, |
|
"grad_norm": 0.5095073580741882, |
|
"learning_rate": 5.695761583689263e-06, |
|
"loss": 7.7667, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.8937171826905668, |
|
"grad_norm": 0.466761976480484, |
|
"learning_rate": 5.619778615573712e-06, |
|
"loss": 7.8028, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.8944414267608184, |
|
"grad_norm": 0.5017898082733154, |
|
"learning_rate": 5.544291218176578e-06, |
|
"loss": 7.79, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.8951656708310701, |
|
"grad_norm": 0.464933305978775, |
|
"learning_rate": 5.469299787866355e-06, |
|
"loss": 7.8344, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.8958899149013217, |
|
"grad_norm": 0.47450631856918335, |
|
"learning_rate": 5.394804718407276e-06, |
|
"loss": 7.8237, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.8966141589715734, |
|
"grad_norm": 0.4888070821762085, |
|
"learning_rate": 5.320806400957312e-06, |
|
"loss": 7.7909, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.8973384030418251, |
|
"grad_norm": 0.4835636019706726, |
|
"learning_rate": 5.247305224066079e-06, |
|
"loss": 7.8204, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.8980626471120767, |
|
"grad_norm": 0.5021434426307678, |
|
"learning_rate": 5.174301573672813e-06, |
|
"loss": 7.824, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8987868911823285, |
|
"grad_norm": 0.47264546155929565, |
|
"learning_rate": 5.101795833104362e-06, |
|
"loss": 7.8474, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.8995111352525801, |
|
"grad_norm": 0.514746367931366, |
|
"learning_rate": 5.02978838307312e-06, |
|
"loss": 7.818, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.9002353793228318, |
|
"grad_norm": 0.45614975690841675, |
|
"learning_rate": 4.958279601675109e-06, |
|
"loss": 7.7936, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.9009596233930834, |
|
"grad_norm": 0.477163165807724, |
|
"learning_rate": 4.887269864387889e-06, |
|
"loss": 7.8263, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.9016838674633352, |
|
"grad_norm": 0.4801469147205353, |
|
"learning_rate": 4.816759544068705e-06, |
|
"loss": 7.7873, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.9024081115335868, |
|
"grad_norm": 0.5064570307731628, |
|
"learning_rate": 4.746749010952412e-06, |
|
"loss": 7.7897, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.9031323556038385, |
|
"grad_norm": 0.4912499785423279, |
|
"learning_rate": 4.677238632649639e-06, |
|
"loss": 7.7679, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.9038565996740902, |
|
"grad_norm": 0.5194550156593323, |
|
"learning_rate": 4.608228774144785e-06, |
|
"loss": 7.7607, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.9045808437443419, |
|
"grad_norm": 0.5179385542869568, |
|
"learning_rate": 4.5397197977940845e-06, |
|
"loss": 7.7615, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.9053050878145935, |
|
"grad_norm": 0.5159029364585876, |
|
"learning_rate": 4.471712063323818e-06, |
|
"loss": 7.7898, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9060293318848452, |
|
"grad_norm": 0.4930214285850525, |
|
"learning_rate": 4.4042059278282865e-06, |
|
"loss": 7.8184, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.9067535759550969, |
|
"grad_norm": 0.5010021328926086, |
|
"learning_rate": 4.33720174576806e-06, |
|
"loss": 7.8191, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.9074778200253485, |
|
"grad_norm": 0.47560325264930725, |
|
"learning_rate": 4.270699868967998e-06, |
|
"loss": 7.8237, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.9082020640956002, |
|
"grad_norm": 0.4990387260913849, |
|
"learning_rate": 4.2047006466155115e-06, |
|
"loss": 7.841, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.9089263081658518, |
|
"grad_norm": 0.5125808715820312, |
|
"learning_rate": 4.139204425258625e-06, |
|
"loss": 7.8264, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.9096505522361036, |
|
"grad_norm": 0.47210493683815, |
|
"learning_rate": 4.0742115488042636e-06, |
|
"loss": 7.8051, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.9103747963063552, |
|
"grad_norm": 0.4914381206035614, |
|
"learning_rate": 4.009722358516366e-06, |
|
"loss": 7.8372, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.9110990403766069, |
|
"grad_norm": 0.48577770590782166, |
|
"learning_rate": 3.945737193014121e-06, |
|
"loss": 7.848, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.9118232844468586, |
|
"grad_norm": 0.49301815032958984, |
|
"learning_rate": 3.8822563882702e-06, |
|
"loss": 7.8581, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.9125475285171103, |
|
"grad_norm": 0.4755497872829437, |
|
"learning_rate": 3.819280277608939e-06, |
|
"loss": 7.8607, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9132717725873619, |
|
"grad_norm": 0.483230322599411, |
|
"learning_rate": 3.7568091917047244e-06, |
|
"loss": 7.8094, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.9139960166576137, |
|
"grad_norm": 0.4857102632522583, |
|
"learning_rate": 3.694843458580055e-06, |
|
"loss": 7.8308, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.9147202607278653, |
|
"grad_norm": 0.4947606921195984, |
|
"learning_rate": 3.633383403604018e-06, |
|
"loss": 7.8509, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.915444504798117, |
|
"grad_norm": 0.489041268825531, |
|
"learning_rate": 3.572429349490436e-06, |
|
"loss": 7.8645, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.9161687488683686, |
|
"grad_norm": 0.5049325227737427, |
|
"learning_rate": 3.511981616296245e-06, |
|
"loss": 7.8248, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.9168929929386204, |
|
"grad_norm": 0.4949936866760254, |
|
"learning_rate": 3.4520405214197972e-06, |
|
"loss": 7.8719, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.917617237008872, |
|
"grad_norm": 0.49333250522613525, |
|
"learning_rate": 3.3926063795991723e-06, |
|
"loss": 7.8477, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 0.9183414810791236, |
|
"grad_norm": 0.4706898331642151, |
|
"learning_rate": 3.3336795029106027e-06, |
|
"loss": 7.831, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.9190657251493753, |
|
"grad_norm": 0.5014546513557434, |
|
"learning_rate": 3.2752602007667167e-06, |
|
"loss": 7.8772, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 0.919789969219627, |
|
"grad_norm": 0.50562584400177, |
|
"learning_rate": 3.2173487799150083e-06, |
|
"loss": 7.8077, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9205142132898787, |
|
"grad_norm": 0.4861434996128082, |
|
"learning_rate": 3.159945544436171e-06, |
|
"loss": 7.8048, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.9212384573601303, |
|
"grad_norm": 0.4890470802783966, |
|
"learning_rate": 3.103050795742546e-06, |
|
"loss": 7.8218, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.9219627014303821, |
|
"grad_norm": 0.46198517084121704, |
|
"learning_rate": 3.046664832576518e-06, |
|
"loss": 7.8283, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 0.9226869455006337, |
|
"grad_norm": 0.4833485186100006, |
|
"learning_rate": 2.990787951008911e-06, |
|
"loss": 7.8617, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.9234111895708854, |
|
"grad_norm": 0.47587546706199646, |
|
"learning_rate": 2.93542044443752e-06, |
|
"loss": 7.8225, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.924135433641137, |
|
"grad_norm": 0.4739225506782532, |
|
"learning_rate": 2.8805626035854793e-06, |
|
"loss": 7.8603, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.9248596777113888, |
|
"grad_norm": 0.48468372225761414, |
|
"learning_rate": 2.8262147164997975e-06, |
|
"loss": 7.8818, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 0.9255839217816404, |
|
"grad_norm": 0.47420841455459595, |
|
"learning_rate": 2.772377068549792e-06, |
|
"loss": 7.8859, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.9263081658518921, |
|
"grad_norm": 0.4387821555137634, |
|
"learning_rate": 2.7190499424256665e-06, |
|
"loss": 7.8423, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 0.9270324099221438, |
|
"grad_norm": 0.4843416213989258, |
|
"learning_rate": 2.6662336181369485e-06, |
|
"loss": 7.8529, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9277566539923955, |
|
"grad_norm": 0.45708930492401123, |
|
"learning_rate": 2.613928373011065e-06, |
|
"loss": 7.7918, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.9284808980626471, |
|
"grad_norm": 0.44832369685173035, |
|
"learning_rate": 2.5621344816918803e-06, |
|
"loss": 7.9067, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.9292051421328987, |
|
"grad_norm": 0.485809862613678, |
|
"learning_rate": 2.5108522161382153e-06, |
|
"loss": 7.8254, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 0.9299293862031505, |
|
"grad_norm": 0.4524907171726227, |
|
"learning_rate": 2.4600818456225083e-06, |
|
"loss": 7.8376, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.9306536302734021, |
|
"grad_norm": 0.4835200309753418, |
|
"learning_rate": 2.4098236367292805e-06, |
|
"loss": 7.7911, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.9313778743436538, |
|
"grad_norm": 0.45698094367980957, |
|
"learning_rate": 2.360077853353848e-06, |
|
"loss": 7.9004, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.9321021184139054, |
|
"grad_norm": 0.47620901465415955, |
|
"learning_rate": 2.3108447567008695e-06, |
|
"loss": 7.7822, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 0.9328263624841572, |
|
"grad_norm": 0.502024233341217, |
|
"learning_rate": 2.262124605282978e-06, |
|
"loss": 7.8447, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.9335506065544088, |
|
"grad_norm": 0.5169337391853333, |
|
"learning_rate": 2.213917654919473e-06, |
|
"loss": 7.8207, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 0.9342748506246605, |
|
"grad_norm": 0.5109481811523438, |
|
"learning_rate": 2.1662241587349195e-06, |
|
"loss": 7.8592, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9349990946949122, |
|
"grad_norm": 0.47905823588371277, |
|
"learning_rate": 2.119044367157852e-06, |
|
"loss": 7.9079, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 0.9357233387651639, |
|
"grad_norm": 0.48844465613365173, |
|
"learning_rate": 2.0723785279194386e-06, |
|
"loss": 7.7795, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.9364475828354155, |
|
"grad_norm": 0.49026617407798767, |
|
"learning_rate": 2.026226886052207e-06, |
|
"loss": 7.8043, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 0.9371718269056672, |
|
"grad_norm": 0.505928635597229, |
|
"learning_rate": 1.9805896838887337e-06, |
|
"loss": 7.7842, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.9378960709759189, |
|
"grad_norm": 0.4998481869697571, |
|
"learning_rate": 1.935467161060378e-06, |
|
"loss": 7.903, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.9386203150461706, |
|
"grad_norm": 0.5379026532173157, |
|
"learning_rate": 1.8908595544960272e-06, |
|
"loss": 7.8672, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.9393445591164222, |
|
"grad_norm": 0.4858308732509613, |
|
"learning_rate": 1.8467670984208652e-06, |
|
"loss": 7.7751, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 0.940068803186674, |
|
"grad_norm": 0.51478111743927, |
|
"learning_rate": 1.8031900243550948e-06, |
|
"loss": 7.8418, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.9407930472569256, |
|
"grad_norm": 0.4762341380119324, |
|
"learning_rate": 1.760128561112795e-06, |
|
"loss": 7.8597, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.9415172913271772, |
|
"grad_norm": 0.5281409025192261, |
|
"learning_rate": 1.7175829348006767e-06, |
|
"loss": 7.7901, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9422415353974289, |
|
"grad_norm": 0.4912181496620178, |
|
"learning_rate": 1.6755533688168624e-06, |
|
"loss": 7.8392, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 0.9429657794676806, |
|
"grad_norm": 0.47923725843429565, |
|
"learning_rate": 1.634040083849786e-06, |
|
"loss": 7.8797, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.9436900235379323, |
|
"grad_norm": 0.48932698369026184, |
|
"learning_rate": 1.593043297876984e-06, |
|
"loss": 7.8072, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 0.9444142676081839, |
|
"grad_norm": 0.4930644631385803, |
|
"learning_rate": 1.5525632261639722e-06, |
|
"loss": 7.8609, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.9451385116784357, |
|
"grad_norm": 0.46192994713783264, |
|
"learning_rate": 1.5126000812631159e-06, |
|
"loss": 7.8686, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.9458627557486873, |
|
"grad_norm": 0.4716580808162689, |
|
"learning_rate": 1.4731540730124616e-06, |
|
"loss": 7.8314, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.946586999818939, |
|
"grad_norm": 0.45859482884407043, |
|
"learning_rate": 1.4342254085347506e-06, |
|
"loss": 7.8843, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 0.9473112438891906, |
|
"grad_norm": 0.4815140664577484, |
|
"learning_rate": 1.3958142922362083e-06, |
|
"loss": 7.8706, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.9480354879594424, |
|
"grad_norm": 0.46593400835990906, |
|
"learning_rate": 1.3579209258055226e-06, |
|
"loss": 7.8941, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.948759732029694, |
|
"grad_norm": 0.47369858622550964, |
|
"learning_rate": 1.3205455082128225e-06, |
|
"loss": 7.8729, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9494839760999457, |
|
"grad_norm": 0.46756669878959656, |
|
"learning_rate": 1.283688235708569e-06, |
|
"loss": 7.8424, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 0.9502082201701973, |
|
"grad_norm": 0.5048179626464844, |
|
"learning_rate": 1.2473493018225646e-06, |
|
"loss": 7.854, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.9509324642404491, |
|
"grad_norm": 0.45838841795921326, |
|
"learning_rate": 1.2115288973629014e-06, |
|
"loss": 7.8695, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 0.9516567083107007, |
|
"grad_norm": 0.5052929520606995, |
|
"learning_rate": 1.176227210415015e-06, |
|
"loss": 7.788, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 0.4793596863746643, |
|
"learning_rate": 1.1414444263406432e-06, |
|
"loss": 7.8184, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.9531051964512041, |
|
"grad_norm": 0.47426638007164, |
|
"learning_rate": 1.1071807277768798e-06, |
|
"loss": 7.8186, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.9538294405214557, |
|
"grad_norm": 0.48723068833351135, |
|
"learning_rate": 1.0734362946352107e-06, |
|
"loss": 7.8259, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 0.9545536845917074, |
|
"grad_norm": 0.4758625626564026, |
|
"learning_rate": 1.0402113041005468e-06, |
|
"loss": 7.8321, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 0.955277928661959, |
|
"grad_norm": 0.4779294431209564, |
|
"learning_rate": 1.0075059306303702e-06, |
|
"loss": 7.8489, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 0.9560021727322108, |
|
"grad_norm": 0.48141980171203613, |
|
"learning_rate": 9.753203459537009e-07, |
|
"loss": 7.8922, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9567264168024624, |
|
"grad_norm": 0.4511309862136841, |
|
"learning_rate": 9.436547190702971e-07, |
|
"loss": 7.8431, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 0.9574506608727141, |
|
"grad_norm": 0.4854510724544525, |
|
"learning_rate": 9.125092162497129e-07, |
|
"loss": 7.8434, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 0.9581749049429658, |
|
"grad_norm": 0.48352810740470886, |
|
"learning_rate": 8.818840010304308e-07, |
|
"loss": 7.864, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 0.9588991490132175, |
|
"grad_norm": 0.4762645661830902, |
|
"learning_rate": 8.5177923421903e-07, |
|
"loss": 7.8196, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.9596233930834691, |
|
"grad_norm": 0.4877321720123291, |
|
"learning_rate": 8.221950738893203e-07, |
|
"loss": 7.7954, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.9603476371537208, |
|
"grad_norm": 0.4776611924171448, |
|
"learning_rate": 7.931316753815088e-07, |
|
"loss": 7.8505, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 0.9610718812239725, |
|
"grad_norm": 0.479509562253952, |
|
"learning_rate": 7.645891913014013e-07, |
|
"loss": 7.8779, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 0.9617961252942242, |
|
"grad_norm": 0.46871259808540344, |
|
"learning_rate": 7.365677715195918e-07, |
|
"loss": 7.8008, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.9625203693644758, |
|
"grad_norm": 0.4688069820404053, |
|
"learning_rate": 7.090675631706512e-07, |
|
"loss": 7.8098, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 0.9632446134347274, |
|
"grad_norm": 0.4722239077091217, |
|
"learning_rate": 6.820887106524065e-07, |
|
"loss": 7.8363, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9639688575049792, |
|
"grad_norm": 0.4427089989185333, |
|
"learning_rate": 6.556313556251636e-07, |
|
"loss": 7.8216, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 0.9646931015752308, |
|
"grad_norm": 0.475266695022583, |
|
"learning_rate": 6.296956370109075e-07, |
|
"loss": 7.8203, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.9654173456454825, |
|
"grad_norm": 0.47641658782958984, |
|
"learning_rate": 6.042816909926585e-07, |
|
"loss": 7.85, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.9661415897157342, |
|
"grad_norm": 0.47987788915634155, |
|
"learning_rate": 5.793896510137287e-07, |
|
"loss": 7.7702, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 0.9668658337859859, |
|
"grad_norm": 0.5124553442001343, |
|
"learning_rate": 5.550196477769665e-07, |
|
"loss": 7.8409, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.9675900778562375, |
|
"grad_norm": 0.4751659333705902, |
|
"learning_rate": 5.311718092441465e-07, |
|
"loss": 7.8114, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.9683143219264893, |
|
"grad_norm": 0.45295917987823486, |
|
"learning_rate": 5.078462606352585e-07, |
|
"loss": 7.9019, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 0.9690385659967409, |
|
"grad_norm": 0.49618884921073914, |
|
"learning_rate": 4.850431244278753e-07, |
|
"loss": 7.7931, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 0.9697628100669926, |
|
"grad_norm": 0.48623406887054443, |
|
"learning_rate": 4.627625203564523e-07, |
|
"loss": 7.8004, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 0.9704870541372442, |
|
"grad_norm": 0.43766334652900696, |
|
"learning_rate": 4.4100456541177335e-07, |
|
"loss": 7.8297, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.971211298207496, |
|
"grad_norm": 0.46884864568710327, |
|
"learning_rate": 4.1976937384028417e-07, |
|
"loss": 7.8893, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 0.9719355422777476, |
|
"grad_norm": 0.48704731464385986, |
|
"learning_rate": 3.990570571435259e-07, |
|
"loss": 7.7991, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 0.9726597863479993, |
|
"grad_norm": 0.4837970733642578, |
|
"learning_rate": 3.7886772407751406e-07, |
|
"loss": 7.8021, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 0.973384030418251, |
|
"grad_norm": 0.5404148697853088, |
|
"learning_rate": 3.5920148065220484e-07, |
|
"loss": 7.7664, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.9741082744885027, |
|
"grad_norm": 0.544996976852417, |
|
"learning_rate": 3.4005843013089625e-07, |
|
"loss": 7.8745, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.9748325185587543, |
|
"grad_norm": 0.5073729753494263, |
|
"learning_rate": 3.2143867302973917e-07, |
|
"loss": 7.774, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 0.9755567626290059, |
|
"grad_norm": 0.5275909304618835, |
|
"learning_rate": 3.033423071171604e-07, |
|
"loss": 7.7614, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 0.9762810066992577, |
|
"grad_norm": 0.5046758651733398, |
|
"learning_rate": 2.857694274133849e-07, |
|
"loss": 7.7907, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.9770052507695093, |
|
"grad_norm": 0.5203927159309387, |
|
"learning_rate": 2.6872012618990306e-07, |
|
"loss": 7.852, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 0.977729494839761, |
|
"grad_norm": 0.6230973601341248, |
|
"learning_rate": 2.5219449296900455e-07, |
|
"loss": 7.8475, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9784537389100126, |
|
"grad_norm": 0.4698044955730438, |
|
"learning_rate": 2.3619261452335617e-07, |
|
"loss": 7.83, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 0.9791779829802644, |
|
"grad_norm": 0.48747655749320984, |
|
"learning_rate": 2.207145748754247e-07, |
|
"loss": 7.8553, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.979902227050516, |
|
"grad_norm": 0.5164722800254822, |
|
"learning_rate": 2.0576045529715482e-07, |
|
"loss": 7.8262, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 0.9806264711207677, |
|
"grad_norm": 0.4546986222267151, |
|
"learning_rate": 1.9133033430949186e-07, |
|
"loss": 7.9124, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 0.9813507151910194, |
|
"grad_norm": 0.4600694179534912, |
|
"learning_rate": 1.7742428768195985e-07, |
|
"loss": 7.8223, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.9820749592612711, |
|
"grad_norm": 0.4773062765598297, |
|
"learning_rate": 1.6404238843230612e-07, |
|
"loss": 7.8256, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.9827992033315227, |
|
"grad_norm": 0.46029308438301086, |
|
"learning_rate": 1.5118470682605745e-07, |
|
"loss": 7.8241, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 0.9835234474017744, |
|
"grad_norm": 0.45993316173553467, |
|
"learning_rate": 1.38851310376209e-07, |
|
"loss": 7.8153, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 0.9842476914720261, |
|
"grad_norm": 0.45841482281684875, |
|
"learning_rate": 1.2704226384282482e-07, |
|
"loss": 7.8534, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 0.9849719355422778, |
|
"grad_norm": 0.4997026026248932, |
|
"learning_rate": 1.157576292327378e-07, |
|
"loss": 7.8132, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9856961796125294, |
|
"grad_norm": 0.4676493704319, |
|
"learning_rate": 1.0499746579919478e-07, |
|
"loss": 7.8852, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 0.986420423682781, |
|
"grad_norm": 0.497711718082428, |
|
"learning_rate": 9.476183004154537e-08, |
|
"loss": 7.8372, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 0.9871446677530328, |
|
"grad_norm": 0.46188995242118835, |
|
"learning_rate": 8.505077570496456e-08, |
|
"loss": 7.8758, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 0.9878689118232844, |
|
"grad_norm": 0.46226826310157776, |
|
"learning_rate": 7.586435378016399e-08, |
|
"loss": 7.8781, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.9885931558935361, |
|
"grad_norm": 0.47206389904022217, |
|
"learning_rate": 6.720261250311444e-08, |
|
"loss": 7.8517, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.9893173999637878, |
|
"grad_norm": 0.4836471378803253, |
|
"learning_rate": 5.9065597354790445e-08, |
|
"loss": 7.8837, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 0.9900416440340395, |
|
"grad_norm": 0.4508313834667206, |
|
"learning_rate": 5.1453351060959387e-08, |
|
"loss": 7.8247, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 0.9907658881042911, |
|
"grad_norm": 0.46068304777145386, |
|
"learning_rate": 4.43659135919372e-08, |
|
"loss": 7.8923, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.9914901321745428, |
|
"grad_norm": 0.4875946342945099, |
|
"learning_rate": 3.780332216234417e-08, |
|
"loss": 7.866, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 0.9922143762447945, |
|
"grad_norm": 0.48047712445259094, |
|
"learning_rate": 3.1765611230993865e-08, |
|
"loss": 7.7948, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9929386203150462, |
|
"grad_norm": 0.4504997134208679, |
|
"learning_rate": 2.625281250061562e-08, |
|
"loss": 7.808, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 0.9936628643852978, |
|
"grad_norm": 0.44184571504592896, |
|
"learning_rate": 2.1264954917776802e-08, |
|
"loss": 7.8365, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.9943871084555496, |
|
"grad_norm": 0.46836909651756287, |
|
"learning_rate": 1.6802064672660767e-08, |
|
"loss": 7.8799, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 0.9951113525258012, |
|
"grad_norm": 0.5343976616859436, |
|
"learning_rate": 1.286416519897804e-08, |
|
"loss": 7.7852, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 0.9958355965960529, |
|
"grad_norm": 0.5096919536590576, |
|
"learning_rate": 9.451277173788687e-09, |
|
"loss": 7.7644, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.9965598406663045, |
|
"grad_norm": 0.538051426410675, |
|
"learning_rate": 6.563418517469e-09, |
|
"loss": 7.8173, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.9972840847365562, |
|
"grad_norm": 0.4815247356891632, |
|
"learning_rate": 4.200604393556073e-09, |
|
"loss": 7.8366, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 0.9980083288068079, |
|
"grad_norm": 0.49304264783859253, |
|
"learning_rate": 2.3628472086811847e-09, |
|
"loss": 7.8003, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 0.9987325728770595, |
|
"grad_norm": 0.5107442736625671, |
|
"learning_rate": 1.0501566125364904e-09, |
|
"loss": 7.7771, |
|
"step": 1379 |
|
}, |
|
{ |
|
"epoch": 0.9994568169473113, |
|
"grad_norm": 0.5584591031074524, |
|
"learning_rate": 2.6253949776400275e-10, |
|
"loss": 7.8327, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.0003621220351258, |
|
"grad_norm": 0.8555117845535278, |
|
"learning_rate": 0.0, |
|
"loss": 11.8993, |
|
"step": 1381 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1381, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 346, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 564726482337792.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|