| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.4332965821389196, |
| "eval_steps": 500, |
| "global_step": 2600, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005512679162072767, |
| "grad_norm": 12.100004196166992, |
| "learning_rate": 2.9999999999999997e-05, |
| "loss": 1.4741, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0011025358324145535, |
| "grad_norm": 11.976073265075684, |
| "learning_rate": 5.9999999999999995e-05, |
| "loss": 1.4512, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0016538037486218302, |
| "grad_norm": 4.930200576782227, |
| "learning_rate": 8.999999999999999e-05, |
| "loss": 1.3853, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.002205071664829107, |
| "grad_norm": 1.8625606298446655, |
| "learning_rate": 0.00011999999999999999, |
| "loss": 1.12, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0027563395810363835, |
| "grad_norm": 1.4577418565750122, |
| "learning_rate": 0.00015, |
| "loss": 1.005, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0033076074972436605, |
| "grad_norm": 1.1385219097137451, |
| "learning_rate": 0.00017999999999999998, |
| "loss": 0.8992, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.003858875413450937, |
| "grad_norm": 1.5815627574920654, |
| "learning_rate": 0.00020999999999999998, |
| "loss": 0.815, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.004410143329658214, |
| "grad_norm": 0.6205328702926636, |
| "learning_rate": 0.00023999999999999998, |
| "loss": 0.7967, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.004961411245865491, |
| "grad_norm": 1.6408820152282715, |
| "learning_rate": 0.00027, |
| "loss": 0.7702, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.005512679162072767, |
| "grad_norm": 0.8569570183753967, |
| "learning_rate": 0.0003, |
| "loss": 0.7845, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006063947078280044, |
| "grad_norm": 0.67384272813797, |
| "learning_rate": 0.0002999170812603648, |
| "loss": 0.7192, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.006615214994487321, |
| "grad_norm": 2.0132830142974854, |
| "learning_rate": 0.00029983416252072964, |
| "loss": 0.7354, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.007166482910694598, |
| "grad_norm": 0.6772907972335815, |
| "learning_rate": 0.0002997512437810945, |
| "loss": 0.715, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.007717750826901874, |
| "grad_norm": 0.5798671245574951, |
| "learning_rate": 0.00029966832504145936, |
| "loss": 0.7477, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.008269018743109152, |
| "grad_norm": 0.49168965220451355, |
| "learning_rate": 0.00029958540630182416, |
| "loss": 0.713, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.008820286659316428, |
| "grad_norm": 0.478697806596756, |
| "learning_rate": 0.000299502487562189, |
| "loss": 0.6915, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.009371554575523704, |
| "grad_norm": 0.4884359538555145, |
| "learning_rate": 0.0002994195688225539, |
| "loss": 0.7305, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.009922822491730982, |
| "grad_norm": 0.4691940248012543, |
| "learning_rate": 0.00029933665008291874, |
| "loss": 0.6646, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.010474090407938258, |
| "grad_norm": 0.4946594834327698, |
| "learning_rate": 0.00029925373134328354, |
| "loss": 0.7137, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.011025358324145534, |
| "grad_norm": 0.4412364363670349, |
| "learning_rate": 0.0002991708126036484, |
| "loss": 0.7063, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.011576626240352812, |
| "grad_norm": 0.5092226266860962, |
| "learning_rate": 0.0002990878938640132, |
| "loss": 0.684, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.012127894156560088, |
| "grad_norm": 0.45330244302749634, |
| "learning_rate": 0.00029900497512437807, |
| "loss": 0.6677, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.012679162072767364, |
| "grad_norm": 0.4717816710472107, |
| "learning_rate": 0.0002989220563847429, |
| "loss": 0.6898, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.013230429988974642, |
| "grad_norm": 0.41348159313201904, |
| "learning_rate": 0.0002988391376451078, |
| "loss": 0.6735, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.013781697905181918, |
| "grad_norm": 0.44471853971481323, |
| "learning_rate": 0.0002987562189054726, |
| "loss": 0.6732, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.014332965821389196, |
| "grad_norm": 0.44660595059394836, |
| "learning_rate": 0.00029867330016583745, |
| "loss": 0.7058, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.014884233737596472, |
| "grad_norm": 0.3917936086654663, |
| "learning_rate": 0.0002985903814262023, |
| "loss": 0.6486, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.015435501653803748, |
| "grad_norm": 0.3844316899776459, |
| "learning_rate": 0.00029850746268656717, |
| "loss": 0.6726, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.015986769570011026, |
| "grad_norm": 0.38220199942588806, |
| "learning_rate": 0.00029842454394693197, |
| "loss": 0.6835, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.016538037486218304, |
| "grad_norm": 0.3823130428791046, |
| "learning_rate": 0.00029834162520729683, |
| "loss": 0.6818, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.017089305402425578, |
| "grad_norm": 0.3354315161705017, |
| "learning_rate": 0.00029825870646766164, |
| "loss": 0.6421, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.017640573318632856, |
| "grad_norm": 0.3261851966381073, |
| "learning_rate": 0.0002981757877280265, |
| "loss": 0.6254, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.018191841234840134, |
| "grad_norm": 0.3275938928127289, |
| "learning_rate": 0.00029809286898839135, |
| "loss": 0.6529, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.018743109151047408, |
| "grad_norm": 0.3375149667263031, |
| "learning_rate": 0.0002980099502487562, |
| "loss": 0.664, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.019294377067254686, |
| "grad_norm": 0.33320432901382446, |
| "learning_rate": 0.000297927031509121, |
| "loss": 0.6157, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.019845644983461964, |
| "grad_norm": 0.30827271938323975, |
| "learning_rate": 0.0002978441127694859, |
| "loss": 0.6418, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.020396912899669238, |
| "grad_norm": 0.3377619683742523, |
| "learning_rate": 0.00029776119402985074, |
| "loss": 0.6454, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.020948180815876516, |
| "grad_norm": 0.32735955715179443, |
| "learning_rate": 0.0002976782752902156, |
| "loss": 0.632, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.021499448732083794, |
| "grad_norm": 0.37884464859962463, |
| "learning_rate": 0.0002975953565505804, |
| "loss": 0.6223, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.022050716648291068, |
| "grad_norm": 0.3301836848258972, |
| "learning_rate": 0.00029751243781094526, |
| "loss": 0.6654, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.022601984564498346, |
| "grad_norm": 0.3196747303009033, |
| "learning_rate": 0.00029742951907131006, |
| "loss": 0.6445, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.023153252480705624, |
| "grad_norm": 0.3292658030986786, |
| "learning_rate": 0.0002973466003316749, |
| "loss": 0.6271, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.023704520396912898, |
| "grad_norm": 0.32541969418525696, |
| "learning_rate": 0.0002972636815920398, |
| "loss": 0.6217, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.024255788313120176, |
| "grad_norm": 0.3059806823730469, |
| "learning_rate": 0.00029718076285240464, |
| "loss": 0.6029, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.024807056229327454, |
| "grad_norm": 0.3427717983722687, |
| "learning_rate": 0.00029709784411276945, |
| "loss": 0.6523, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.025358324145534728, |
| "grad_norm": 0.33184289932250977, |
| "learning_rate": 0.0002970149253731343, |
| "loss": 0.6475, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.025909592061742006, |
| "grad_norm": 0.32376739382743835, |
| "learning_rate": 0.00029693200663349917, |
| "loss": 0.6588, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.026460859977949284, |
| "grad_norm": 0.30022457242012024, |
| "learning_rate": 0.000296849087893864, |
| "loss": 0.6316, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.02701212789415656, |
| "grad_norm": 0.3170008957386017, |
| "learning_rate": 0.00029676616915422883, |
| "loss": 0.5847, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.027563395810363836, |
| "grad_norm": 0.3455023765563965, |
| "learning_rate": 0.0002966832504145937, |
| "loss": 0.6668, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.028114663726571114, |
| "grad_norm": 0.3004387617111206, |
| "learning_rate": 0.0002966003316749585, |
| "loss": 0.6599, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.02866593164277839, |
| "grad_norm": 0.33361348509788513, |
| "learning_rate": 0.00029651741293532335, |
| "loss": 0.6502, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.029217199558985666, |
| "grad_norm": 0.34541115164756775, |
| "learning_rate": 0.0002964344941956882, |
| "loss": 0.6764, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.029768467475192944, |
| "grad_norm": 0.32801833748817444, |
| "learning_rate": 0.00029635157545605307, |
| "loss": 0.6347, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.03031973539140022, |
| "grad_norm": 0.30410563945770264, |
| "learning_rate": 0.0002962686567164179, |
| "loss": 0.6117, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.030871003307607496, |
| "grad_norm": 0.31390225887298584, |
| "learning_rate": 0.00029618573797678274, |
| "loss": 0.5973, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.031422271223814774, |
| "grad_norm": 0.34744319319725037, |
| "learning_rate": 0.0002961028192371476, |
| "loss": 0.6544, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.03197353914002205, |
| "grad_norm": 0.3452775180339813, |
| "learning_rate": 0.0002960199004975124, |
| "loss": 0.6234, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.03252480705622933, |
| "grad_norm": 0.34327036142349243, |
| "learning_rate": 0.00029593698175787726, |
| "loss": 0.6485, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.03307607497243661, |
| "grad_norm": 0.317579448223114, |
| "learning_rate": 0.00029585406301824206, |
| "loss": 0.6182, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03362734288864388, |
| "grad_norm": 0.3586544692516327, |
| "learning_rate": 0.0002957711442786069, |
| "loss": 0.6149, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.034178610804851156, |
| "grad_norm": 0.3077372908592224, |
| "learning_rate": 0.0002956882255389718, |
| "loss": 0.5806, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.034729878721058434, |
| "grad_norm": 0.33191806077957153, |
| "learning_rate": 0.00029560530679933664, |
| "loss": 0.631, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.03528114663726571, |
| "grad_norm": 0.32726630568504333, |
| "learning_rate": 0.00029552238805970145, |
| "loss": 0.6364, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.03583241455347299, |
| "grad_norm": 0.3058015704154968, |
| "learning_rate": 0.0002954394693200663, |
| "loss": 0.6193, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03638368246968027, |
| "grad_norm": 0.30789121985435486, |
| "learning_rate": 0.00029535655058043116, |
| "loss": 0.6322, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.03693495038588754, |
| "grad_norm": 0.33515268564224243, |
| "learning_rate": 0.000295273631840796, |
| "loss": 0.6581, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.037486218302094816, |
| "grad_norm": 0.3196898400783539, |
| "learning_rate": 0.00029519071310116083, |
| "loss": 0.6134, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.038037486218302094, |
| "grad_norm": 0.3255867660045624, |
| "learning_rate": 0.0002951077943615257, |
| "loss": 0.6176, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.03858875413450937, |
| "grad_norm": 0.3257988691329956, |
| "learning_rate": 0.0002950248756218905, |
| "loss": 0.6214, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03914002205071665, |
| "grad_norm": 0.29037123918533325, |
| "learning_rate": 0.00029494195688225535, |
| "loss": 0.6098, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03969128996692393, |
| "grad_norm": 0.3127928674221039, |
| "learning_rate": 0.0002948590381426202, |
| "loss": 0.6532, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0402425578831312, |
| "grad_norm": 0.2821784019470215, |
| "learning_rate": 0.00029477611940298507, |
| "loss": 0.6101, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.040793825799338476, |
| "grad_norm": 0.2889716923236847, |
| "learning_rate": 0.0002946932006633499, |
| "loss": 0.6097, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.041345093715545754, |
| "grad_norm": 0.3002908527851105, |
| "learning_rate": 0.00029461028192371473, |
| "loss": 0.626, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.04189636163175303, |
| "grad_norm": 0.2943056523799896, |
| "learning_rate": 0.0002945273631840796, |
| "loss": 0.6061, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.04244762954796031, |
| "grad_norm": 0.31590160727500916, |
| "learning_rate": 0.00029444444444444445, |
| "loss": 0.6279, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.04299889746416759, |
| "grad_norm": 0.31002211570739746, |
| "learning_rate": 0.00029436152570480926, |
| "loss": 0.6066, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.043550165380374865, |
| "grad_norm": 0.27883172035217285, |
| "learning_rate": 0.0002942786069651741, |
| "loss": 0.6053, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.044101433296582136, |
| "grad_norm": 0.3098636567592621, |
| "learning_rate": 0.0002941956882255389, |
| "loss": 0.6041, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.044652701212789414, |
| "grad_norm": 0.31574317812919617, |
| "learning_rate": 0.0002941127694859038, |
| "loss": 0.6132, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.04520396912899669, |
| "grad_norm": 0.2871106266975403, |
| "learning_rate": 0.00029402985074626864, |
| "loss": 0.5759, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.04575523704520397, |
| "grad_norm": 0.2808583676815033, |
| "learning_rate": 0.0002939469320066335, |
| "loss": 0.583, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.04630650496141125, |
| "grad_norm": 0.29489415884017944, |
| "learning_rate": 0.0002938640132669983, |
| "loss": 0.6018, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.046857772877618525, |
| "grad_norm": 0.28468286991119385, |
| "learning_rate": 0.00029378109452736316, |
| "loss": 0.602, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.047409040793825796, |
| "grad_norm": 0.28690364956855774, |
| "learning_rate": 0.000293698175787728, |
| "loss": 0.5802, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.047960308710033074, |
| "grad_norm": 0.30015993118286133, |
| "learning_rate": 0.0002936152570480929, |
| "loss": 0.5889, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.04851157662624035, |
| "grad_norm": 0.3080478310585022, |
| "learning_rate": 0.0002935323383084577, |
| "loss": 0.6106, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.04906284454244763, |
| "grad_norm": 0.2852279245853424, |
| "learning_rate": 0.00029344941956882254, |
| "loss": 0.5902, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.04961411245865491, |
| "grad_norm": 0.2944631278514862, |
| "learning_rate": 0.00029336650082918735, |
| "loss": 0.6222, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.050165380374862185, |
| "grad_norm": 0.29476436972618103, |
| "learning_rate": 0.0002932835820895522, |
| "loss": 0.6151, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.050716648291069456, |
| "grad_norm": 0.2786809802055359, |
| "learning_rate": 0.00029320066334991707, |
| "loss": 0.5801, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.051267916207276734, |
| "grad_norm": 0.27844133973121643, |
| "learning_rate": 0.0002931177446102819, |
| "loss": 0.5708, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.05181918412348401, |
| "grad_norm": 0.2947113811969757, |
| "learning_rate": 0.00029303482587064673, |
| "loss": 0.5951, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.05237045203969129, |
| "grad_norm": 0.2926524877548218, |
| "learning_rate": 0.0002929519071310116, |
| "loss": 0.6281, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.05292171995589857, |
| "grad_norm": 0.27508488297462463, |
| "learning_rate": 0.00029286898839137645, |
| "loss": 0.5769, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.053472987872105845, |
| "grad_norm": 0.2983228862285614, |
| "learning_rate": 0.0002927860696517413, |
| "loss": 0.5808, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.05402425578831312, |
| "grad_norm": 0.28955212235450745, |
| "learning_rate": 0.0002927031509121061, |
| "loss": 0.6009, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.054575523704520394, |
| "grad_norm": 0.30267390608787537, |
| "learning_rate": 0.0002926202321724709, |
| "loss": 0.5938, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.05512679162072767, |
| "grad_norm": 0.2869952917098999, |
| "learning_rate": 0.0002925373134328358, |
| "loss": 0.5695, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05567805953693495, |
| "grad_norm": 0.28908076882362366, |
| "learning_rate": 0.00029245439469320064, |
| "loss": 0.5904, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.05622932745314223, |
| "grad_norm": 0.2866143584251404, |
| "learning_rate": 0.0002923714759535655, |
| "loss": 0.5945, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.056780595369349506, |
| "grad_norm": 0.2788505554199219, |
| "learning_rate": 0.0002922885572139303, |
| "loss": 0.5861, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.05733186328555678, |
| "grad_norm": 0.2852947413921356, |
| "learning_rate": 0.00029220563847429516, |
| "loss": 0.6012, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.057883131201764054, |
| "grad_norm": 0.27692896127700806, |
| "learning_rate": 0.00029212271973466, |
| "loss": 0.5797, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.05843439911797133, |
| "grad_norm": 0.27395880222320557, |
| "learning_rate": 0.0002920398009950249, |
| "loss": 0.5854, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.05898566703417861, |
| "grad_norm": 0.2730069160461426, |
| "learning_rate": 0.0002919568822553897, |
| "loss": 0.5882, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.05953693495038589, |
| "grad_norm": 0.2808207869529724, |
| "learning_rate": 0.00029187396351575454, |
| "loss": 0.5868, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.060088202866593166, |
| "grad_norm": 0.26693934202194214, |
| "learning_rate": 0.00029179104477611935, |
| "loss": 0.5656, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.06063947078280044, |
| "grad_norm": 0.29277607798576355, |
| "learning_rate": 0.0002917081260364842, |
| "loss": 0.608, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.061190738699007714, |
| "grad_norm": 0.29922837018966675, |
| "learning_rate": 0.00029162520729684907, |
| "loss": 0.5952, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.06174200661521499, |
| "grad_norm": 0.26753753423690796, |
| "learning_rate": 0.0002915422885572139, |
| "loss": 0.5964, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.06229327453142227, |
| "grad_norm": 0.2910638451576233, |
| "learning_rate": 0.00029145936981757873, |
| "loss": 0.5822, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.06284454244762955, |
| "grad_norm": 0.3202199339866638, |
| "learning_rate": 0.0002913764510779436, |
| "loss": 0.5927, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.06339581036383682, |
| "grad_norm": 0.26713207364082336, |
| "learning_rate": 0.00029129353233830845, |
| "loss": 0.5698, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0639470782800441, |
| "grad_norm": 0.3109968304634094, |
| "learning_rate": 0.0002912106135986733, |
| "loss": 0.5954, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.06449834619625137, |
| "grad_norm": 0.30233150720596313, |
| "learning_rate": 0.0002911276948590381, |
| "loss": 0.5941, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.06504961411245866, |
| "grad_norm": 0.28545138239860535, |
| "learning_rate": 0.00029104477611940297, |
| "loss": 0.5773, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.06560088202866593, |
| "grad_norm": 0.29633569717407227, |
| "learning_rate": 0.0002909618573797678, |
| "loss": 0.6014, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.06615214994487321, |
| "grad_norm": 0.29278406500816345, |
| "learning_rate": 0.00029087893864013264, |
| "loss": 0.6001, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.06670341786108049, |
| "grad_norm": 0.29871347546577454, |
| "learning_rate": 0.0002907960199004975, |
| "loss": 0.629, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.06725468577728776, |
| "grad_norm": 0.27272510528564453, |
| "learning_rate": 0.00029071310116086235, |
| "loss": 0.5502, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.06780595369349504, |
| "grad_norm": 0.2796414792537689, |
| "learning_rate": 0.00029063018242122716, |
| "loss": 0.5712, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.06835722160970231, |
| "grad_norm": 0.277700811624527, |
| "learning_rate": 0.000290547263681592, |
| "loss": 0.5654, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.0689084895259096, |
| "grad_norm": 0.2710396647453308, |
| "learning_rate": 0.0002904643449419569, |
| "loss": 0.5866, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.06945975744211687, |
| "grad_norm": 0.28910425305366516, |
| "learning_rate": 0.00029038142620232174, |
| "loss": 0.5679, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.07001102535832414, |
| "grad_norm": 0.2892954647541046, |
| "learning_rate": 0.00029029850746268654, |
| "loss": 0.5915, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.07056229327453142, |
| "grad_norm": 0.3241787552833557, |
| "learning_rate": 0.0002902155887230514, |
| "loss": 0.5818, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.0711135611907387, |
| "grad_norm": 0.29878735542297363, |
| "learning_rate": 0.0002901326699834162, |
| "loss": 0.5813, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.07166482910694598, |
| "grad_norm": 0.27833399176597595, |
| "learning_rate": 0.00029004975124378106, |
| "loss": 0.5865, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.07221609702315325, |
| "grad_norm": 0.3239665627479553, |
| "learning_rate": 0.0002899668325041459, |
| "loss": 0.5898, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.07276736493936053, |
| "grad_norm": 0.31001126766204834, |
| "learning_rate": 0.0002898839137645108, |
| "loss": 0.577, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.0733186328555678, |
| "grad_norm": 0.2673737704753876, |
| "learning_rate": 0.0002898009950248756, |
| "loss": 0.5684, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.07386990077177508, |
| "grad_norm": 0.3218002915382385, |
| "learning_rate": 0.00028971807628524045, |
| "loss": 0.5826, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.07442116868798236, |
| "grad_norm": 0.2867553234100342, |
| "learning_rate": 0.00028963515754560525, |
| "loss": 0.5679, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.07497243660418963, |
| "grad_norm": 0.2790491282939911, |
| "learning_rate": 0.00028955223880597017, |
| "loss": 0.5532, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.07552370452039692, |
| "grad_norm": 0.3101596534252167, |
| "learning_rate": 0.00028946932006633497, |
| "loss": 0.616, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.07607497243660419, |
| "grad_norm": 0.2670627534389496, |
| "learning_rate": 0.00028938640132669983, |
| "loss": 0.5147, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.07662624035281147, |
| "grad_norm": 0.28873148560523987, |
| "learning_rate": 0.00028930348258706463, |
| "loss": 0.5723, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.07717750826901874, |
| "grad_norm": 0.3042322099208832, |
| "learning_rate": 0.0002892205638474295, |
| "loss": 0.5483, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.07772877618522601, |
| "grad_norm": 0.30197396874427795, |
| "learning_rate": 0.00028913764510779435, |
| "loss": 0.5731, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0782800441014333, |
| "grad_norm": 0.2676428258419037, |
| "learning_rate": 0.0002890547263681592, |
| "loss": 0.5384, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.07883131201764057, |
| "grad_norm": 0.2983885705471039, |
| "learning_rate": 0.000288971807628524, |
| "loss": 0.5777, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.07938257993384785, |
| "grad_norm": 0.3119770586490631, |
| "learning_rate": 0.0002888888888888888, |
| "loss": 0.5682, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.07993384785005513, |
| "grad_norm": 0.28664880990982056, |
| "learning_rate": 0.0002888059701492537, |
| "loss": 0.5875, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.0804851157662624, |
| "grad_norm": 0.2691631615161896, |
| "learning_rate": 0.00028872305140961854, |
| "loss": 0.5841, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.08103638368246968, |
| "grad_norm": 0.29469335079193115, |
| "learning_rate": 0.0002886401326699834, |
| "loss": 0.6111, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.08158765159867695, |
| "grad_norm": 0.27499398589134216, |
| "learning_rate": 0.0002885572139303482, |
| "loss": 0.5984, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.08213891951488424, |
| "grad_norm": 0.2869040369987488, |
| "learning_rate": 0.00028847429519071306, |
| "loss": 0.5862, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.08269018743109151, |
| "grad_norm": 0.25979968905448914, |
| "learning_rate": 0.0002883913764510779, |
| "loss": 0.5948, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.08324145534729879, |
| "grad_norm": 0.2581140398979187, |
| "learning_rate": 0.0002883084577114428, |
| "loss": 0.543, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.08379272326350606, |
| "grad_norm": 0.3241422474384308, |
| "learning_rate": 0.0002882255389718076, |
| "loss": 0.5584, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.08434399117971333, |
| "grad_norm": 0.3122616112232208, |
| "learning_rate": 0.00028814262023217245, |
| "loss": 0.6101, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.08489525909592062, |
| "grad_norm": 0.28104907274246216, |
| "learning_rate": 0.00028805970149253725, |
| "loss": 0.5721, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.08544652701212789, |
| "grad_norm": 0.32965442538261414, |
| "learning_rate": 0.0002879767827529021, |
| "loss": 0.5396, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.08599779492833518, |
| "grad_norm": 0.32811254262924194, |
| "learning_rate": 0.00028789386401326697, |
| "loss": 0.5819, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.08654906284454245, |
| "grad_norm": 0.3046472668647766, |
| "learning_rate": 0.00028781094527363183, |
| "loss": 0.5756, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.08710033076074973, |
| "grad_norm": 0.308413028717041, |
| "learning_rate": 0.00028772802653399663, |
| "loss": 0.611, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.087651598676957, |
| "grad_norm": 0.2636229693889618, |
| "learning_rate": 0.0002876451077943615, |
| "loss": 0.5608, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.08820286659316427, |
| "grad_norm": 0.29085874557495117, |
| "learning_rate": 0.00028756218905472635, |
| "loss": 0.553, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.08875413450937156, |
| "grad_norm": 0.2887280285358429, |
| "learning_rate": 0.0002874792703150912, |
| "loss": 0.5958, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.08930540242557883, |
| "grad_norm": 0.26728978753089905, |
| "learning_rate": 0.000287396351575456, |
| "loss": 0.5487, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.08985667034178611, |
| "grad_norm": 0.25967663526535034, |
| "learning_rate": 0.0002873134328358209, |
| "loss": 0.5657, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.09040793825799338, |
| "grad_norm": 0.2513408064842224, |
| "learning_rate": 0.0002872305140961857, |
| "loss": 0.5358, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.09095920617420065, |
| "grad_norm": 0.28536808490753174, |
| "learning_rate": 0.00028714759535655054, |
| "loss": 0.6057, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.09151047409040794, |
| "grad_norm": 0.28766608238220215, |
| "learning_rate": 0.0002870646766169154, |
| "loss": 0.6108, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.09206174200661521, |
| "grad_norm": 0.25628137588500977, |
| "learning_rate": 0.00028698175787728026, |
| "loss": 0.53, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.0926130099228225, |
| "grad_norm": 0.2983819246292114, |
| "learning_rate": 0.00028689883913764506, |
| "loss": 0.5997, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.09316427783902977, |
| "grad_norm": 0.27762502431869507, |
| "learning_rate": 0.0002868159203980099, |
| "loss": 0.5833, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.09371554575523705, |
| "grad_norm": 0.28496429324150085, |
| "learning_rate": 0.0002867330016583748, |
| "loss": 0.5863, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.09426681367144432, |
| "grad_norm": 0.26081910729408264, |
| "learning_rate": 0.00028665008291873964, |
| "loss": 0.5943, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.09481808158765159, |
| "grad_norm": 0.27544835209846497, |
| "learning_rate": 0.00028656716417910444, |
| "loss": 0.6175, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.09536934950385888, |
| "grad_norm": 0.2690446972846985, |
| "learning_rate": 0.0002864842454394693, |
| "loss": 0.5473, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.09592061742006615, |
| "grad_norm": 0.2816300690174103, |
| "learning_rate": 0.0002864013266998341, |
| "loss": 0.5908, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.09647188533627343, |
| "grad_norm": 0.26558321714401245, |
| "learning_rate": 0.00028631840796019897, |
| "loss": 0.5711, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0970231532524807, |
| "grad_norm": 0.2692832946777344, |
| "learning_rate": 0.0002862354892205638, |
| "loss": 0.5731, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.09757442116868799, |
| "grad_norm": 0.2814270555973053, |
| "learning_rate": 0.0002861525704809287, |
| "loss": 0.5353, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.09812568908489526, |
| "grad_norm": 0.26562657952308655, |
| "learning_rate": 0.0002860696517412935, |
| "loss": 0.5955, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.09867695700110253, |
| "grad_norm": 0.2592059075832367, |
| "learning_rate": 0.00028598673300165835, |
| "loss": 0.5617, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.09922822491730982, |
| "grad_norm": 0.26579222083091736, |
| "learning_rate": 0.0002859038142620232, |
| "loss": 0.5725, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.09977949283351709, |
| "grad_norm": 0.2731139063835144, |
| "learning_rate": 0.00028582089552238807, |
| "loss": 0.5614, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.10033076074972437, |
| "grad_norm": 0.2470698207616806, |
| "learning_rate": 0.00028573797678275287, |
| "loss": 0.5347, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.10088202866593164, |
| "grad_norm": 0.24656972289085388, |
| "learning_rate": 0.00028565505804311773, |
| "loss": 0.5481, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.10143329658213891, |
| "grad_norm": 0.2857254445552826, |
| "learning_rate": 0.00028557213930348254, |
| "loss": 0.602, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.1019845644983462, |
| "grad_norm": 0.27286651730537415, |
| "learning_rate": 0.0002854892205638474, |
| "loss": 0.5585, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.10253583241455347, |
| "grad_norm": 0.2675493359565735, |
| "learning_rate": 0.00028540630182421225, |
| "loss": 0.567, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.10308710033076075, |
| "grad_norm": 0.26535746455192566, |
| "learning_rate": 0.00028532338308457706, |
| "loss": 0.5696, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.10363836824696802, |
| "grad_norm": 0.2633534371852875, |
| "learning_rate": 0.0002852404643449419, |
| "loss": 0.5326, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.10418963616317531, |
| "grad_norm": 0.2724531292915344, |
| "learning_rate": 0.0002851575456053068, |
| "loss": 0.5905, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.10474090407938258, |
| "grad_norm": 0.2680416405200958, |
| "learning_rate": 0.00028507462686567164, |
| "loss": 0.5924, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.10529217199558985, |
| "grad_norm": 0.28108882904052734, |
| "learning_rate": 0.00028499170812603644, |
| "loss": 0.5926, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.10584343991179714, |
| "grad_norm": 0.2787463366985321, |
| "learning_rate": 0.0002849087893864013, |
| "loss": 0.5699, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.1063947078280044, |
| "grad_norm": 0.2674010396003723, |
| "learning_rate": 0.0002848258706467661, |
| "loss": 0.587, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.10694597574421169, |
| "grad_norm": 0.27142807841300964, |
| "learning_rate": 0.00028474295190713097, |
| "loss": 0.5762, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.10749724366041896, |
| "grad_norm": 0.2817786633968353, |
| "learning_rate": 0.0002846600331674958, |
| "loss": 0.5672, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.10804851157662625, |
| "grad_norm": 0.250627338886261, |
| "learning_rate": 0.0002845771144278607, |
| "loss": 0.5425, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.10859977949283352, |
| "grad_norm": 0.2636951506137848, |
| "learning_rate": 0.0002844941956882255, |
| "loss": 0.579, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.10915104740904079, |
| "grad_norm": 0.2613438665866852, |
| "learning_rate": 0.00028441127694859035, |
| "loss": 0.5531, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.10970231532524807, |
| "grad_norm": 0.28677162528038025, |
| "learning_rate": 0.0002843283582089552, |
| "loss": 0.5875, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.11025358324145534, |
| "grad_norm": 0.2670292258262634, |
| "learning_rate": 0.00028424543946932007, |
| "loss": 0.5625, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.11080485115766263, |
| "grad_norm": 0.23815321922302246, |
| "learning_rate": 0.00028416252072968487, |
| "loss": 0.5484, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.1113561190738699, |
| "grad_norm": 0.2709272503852844, |
| "learning_rate": 0.00028407960199004973, |
| "loss": 0.5387, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.11190738699007717, |
| "grad_norm": 0.25918126106262207, |
| "learning_rate": 0.00028399668325041453, |
| "loss": 0.5686, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.11245865490628446, |
| "grad_norm": 0.27118560671806335, |
| "learning_rate": 0.0002839137645107794, |
| "loss": 0.5637, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.11300992282249173, |
| "grad_norm": 0.26395100355148315, |
| "learning_rate": 0.00028383084577114425, |
| "loss": 0.5499, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.11356119073869901, |
| "grad_norm": 0.272989422082901, |
| "learning_rate": 0.0002837479270315091, |
| "loss": 0.5606, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.11411245865490628, |
| "grad_norm": 0.2708880603313446, |
| "learning_rate": 0.0002836650082918739, |
| "loss": 0.534, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.11466372657111357, |
| "grad_norm": 0.28653857111930847, |
| "learning_rate": 0.0002835820895522388, |
| "loss": 0.5727, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.11521499448732084, |
| "grad_norm": 0.2767845392227173, |
| "learning_rate": 0.00028349917081260364, |
| "loss": 0.5664, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.11576626240352811, |
| "grad_norm": 0.27690836787223816, |
| "learning_rate": 0.0002834162520729685, |
| "loss": 0.5656, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1163175303197354, |
| "grad_norm": 0.2831721007823944, |
| "learning_rate": 0.0002833333333333333, |
| "loss": 0.596, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.11686879823594266, |
| "grad_norm": 0.3024809658527374, |
| "learning_rate": 0.00028325041459369816, |
| "loss": 0.5849, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.11742006615214995, |
| "grad_norm": 0.2787605822086334, |
| "learning_rate": 0.00028316749585406296, |
| "loss": 0.5606, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.11797133406835722, |
| "grad_norm": 0.2734401226043701, |
| "learning_rate": 0.0002830845771144278, |
| "loss": 0.5524, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1185226019845645, |
| "grad_norm": 0.2717944085597992, |
| "learning_rate": 0.0002830016583747927, |
| "loss": 0.5533, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.11907386990077178, |
| "grad_norm": 0.2634055018424988, |
| "learning_rate": 0.00028291873963515754, |
| "loss": 0.5552, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.11962513781697905, |
| "grad_norm": 0.27231520414352417, |
| "learning_rate": 0.00028283582089552235, |
| "loss": 0.5608, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.12017640573318633, |
| "grad_norm": 0.2709995210170746, |
| "learning_rate": 0.0002827529021558872, |
| "loss": 0.5608, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.1207276736493936, |
| "grad_norm": 0.24507290124893188, |
| "learning_rate": 0.00028266998341625206, |
| "loss": 0.5324, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.12127894156560089, |
| "grad_norm": 0.26341697573661804, |
| "learning_rate": 0.0002825870646766169, |
| "loss": 0.5686, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.12183020948180816, |
| "grad_norm": 0.2655317783355713, |
| "learning_rate": 0.00028250414593698173, |
| "loss": 0.5792, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.12238147739801543, |
| "grad_norm": 0.263235867023468, |
| "learning_rate": 0.0002824212271973466, |
| "loss": 0.5633, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.12293274531422271, |
| "grad_norm": 0.28087055683135986, |
| "learning_rate": 0.0002823383084577114, |
| "loss": 0.559, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.12348401323042998, |
| "grad_norm": 0.2734236717224121, |
| "learning_rate": 0.00028225538971807625, |
| "loss": 0.5772, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.12403528114663727, |
| "grad_norm": 0.2594766318798065, |
| "learning_rate": 0.0002821724709784411, |
| "loss": 0.5698, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.12458654906284454, |
| "grad_norm": 0.2490595132112503, |
| "learning_rate": 0.00028208955223880597, |
| "loss": 0.5419, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.12513781697905182, |
| "grad_norm": 0.25069767236709595, |
| "learning_rate": 0.0002820066334991708, |
| "loss": 0.531, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.1256890848952591, |
| "grad_norm": 0.2518230080604553, |
| "learning_rate": 0.00028192371475953563, |
| "loss": 0.5509, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.12624035281146637, |
| "grad_norm": 0.2488110512495041, |
| "learning_rate": 0.0002818407960199005, |
| "loss": 0.5341, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.12679162072767364, |
| "grad_norm": 0.26115381717681885, |
| "learning_rate": 0.00028175787728026535, |
| "loss": 0.5433, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.12734288864388094, |
| "grad_norm": 0.24792101979255676, |
| "learning_rate": 0.00028167495854063016, |
| "loss": 0.5672, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.1278941565600882, |
| "grad_norm": 0.2637925148010254, |
| "learning_rate": 0.00028159203980099496, |
| "loss": 0.5868, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.12844542447629548, |
| "grad_norm": 0.2799462676048279, |
| "learning_rate": 0.0002815091210613598, |
| "loss": 0.5514, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.12899669239250275, |
| "grad_norm": 0.2809968590736389, |
| "learning_rate": 0.0002814262023217247, |
| "loss": 0.5847, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.12954796030871002, |
| "grad_norm": 0.27108708024024963, |
| "learning_rate": 0.00028134328358208954, |
| "loss": 0.5718, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.13009922822491732, |
| "grad_norm": 0.2557702660560608, |
| "learning_rate": 0.00028126036484245434, |
| "loss": 0.575, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1306504961411246, |
| "grad_norm": 0.2593226134777069, |
| "learning_rate": 0.0002811774461028192, |
| "loss": 0.5534, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.13120176405733186, |
| "grad_norm": 0.2657114565372467, |
| "learning_rate": 0.00028109452736318406, |
| "loss": 0.5605, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.13175303197353913, |
| "grad_norm": 0.25616228580474854, |
| "learning_rate": 0.0002810116086235489, |
| "loss": 0.5227, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.13230429988974643, |
| "grad_norm": 0.2749009430408478, |
| "learning_rate": 0.0002809286898839137, |
| "loss": 0.536, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1328555678059537, |
| "grad_norm": 0.2617826759815216, |
| "learning_rate": 0.0002808457711442786, |
| "loss": 0.5602, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.13340683572216097, |
| "grad_norm": 0.2576202154159546, |
| "learning_rate": 0.0002807628524046434, |
| "loss": 0.5205, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.13395810363836824, |
| "grad_norm": 0.2764850854873657, |
| "learning_rate": 0.00028067993366500825, |
| "loss": 0.5752, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.1345093715545755, |
| "grad_norm": 0.2652502954006195, |
| "learning_rate": 0.0002805970149253731, |
| "loss": 0.5495, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.1350606394707828, |
| "grad_norm": 0.24600890278816223, |
| "learning_rate": 0.00028051409618573797, |
| "loss": 0.5146, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.13561190738699008, |
| "grad_norm": 0.253635048866272, |
| "learning_rate": 0.0002804311774461028, |
| "loss": 0.5483, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.13616317530319735, |
| "grad_norm": 0.24037104845046997, |
| "learning_rate": 0.00028034825870646763, |
| "loss": 0.5624, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.13671444321940462, |
| "grad_norm": 0.24676042795181274, |
| "learning_rate": 0.0002802653399668325, |
| "loss": 0.537, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.1372657111356119, |
| "grad_norm": 0.25283971428871155, |
| "learning_rate": 0.00028018242122719735, |
| "loss": 0.5705, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.1378169790518192, |
| "grad_norm": 0.2672947347164154, |
| "learning_rate": 0.00028009950248756216, |
| "loss": 0.5699, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.13836824696802646, |
| "grad_norm": 0.25930237770080566, |
| "learning_rate": 0.000280016583747927, |
| "loss": 0.5581, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.13891951488423374, |
| "grad_norm": 0.24674735963344574, |
| "learning_rate": 0.0002799336650082918, |
| "loss": 0.5282, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.139470782800441, |
| "grad_norm": 0.2826119065284729, |
| "learning_rate": 0.0002798507462686567, |
| "loss": 0.5261, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.14002205071664828, |
| "grad_norm": 0.290584534406662, |
| "learning_rate": 0.00027976782752902154, |
| "loss": 0.5245, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.14057331863285558, |
| "grad_norm": 0.25072574615478516, |
| "learning_rate": 0.0002796849087893864, |
| "loss": 0.5264, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.14112458654906285, |
| "grad_norm": 0.24929046630859375, |
| "learning_rate": 0.0002796019900497512, |
| "loss": 0.5698, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.14167585446527012, |
| "grad_norm": 0.24978522956371307, |
| "learning_rate": 0.00027951907131011606, |
| "loss": 0.5269, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.1422271223814774, |
| "grad_norm": 0.26195666193962097, |
| "learning_rate": 0.0002794361525704809, |
| "loss": 0.5801, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1427783902976847, |
| "grad_norm": 0.27321335673332214, |
| "learning_rate": 0.0002793532338308458, |
| "loss": 0.5556, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.14332965821389196, |
| "grad_norm": 0.2694965898990631, |
| "learning_rate": 0.0002792703150912106, |
| "loss": 0.5715, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.14388092613009923, |
| "grad_norm": 0.2757553160190582, |
| "learning_rate": 0.00027918739635157544, |
| "loss": 0.5645, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1444321940463065, |
| "grad_norm": 0.2602946162223816, |
| "learning_rate": 0.00027910447761194025, |
| "loss": 0.5703, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.14498346196251377, |
| "grad_norm": 0.24068838357925415, |
| "learning_rate": 0.0002790215588723051, |
| "loss": 0.5168, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.14553472987872107, |
| "grad_norm": 0.26140162348747253, |
| "learning_rate": 0.00027893864013266997, |
| "loss": 0.5271, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.14608599779492834, |
| "grad_norm": 0.26940983533859253, |
| "learning_rate": 0.0002788557213930348, |
| "loss": 0.5571, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.1466372657111356, |
| "grad_norm": 0.24524417519569397, |
| "learning_rate": 0.00027877280265339963, |
| "loss": 0.5227, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.14718853362734288, |
| "grad_norm": 0.2636984884738922, |
| "learning_rate": 0.0002786898839137645, |
| "loss": 0.5335, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.14773980154355015, |
| "grad_norm": 0.24600271880626678, |
| "learning_rate": 0.00027860696517412935, |
| "loss": 0.5601, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.14829106945975745, |
| "grad_norm": 0.24977444112300873, |
| "learning_rate": 0.0002785240464344942, |
| "loss": 0.5437, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.14884233737596472, |
| "grad_norm": 0.27960002422332764, |
| "learning_rate": 0.000278441127694859, |
| "loss": 0.548, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.149393605292172, |
| "grad_norm": 0.2514914870262146, |
| "learning_rate": 0.00027835820895522387, |
| "loss": 0.5335, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.14994487320837926, |
| "grad_norm": 0.2503030300140381, |
| "learning_rate": 0.0002782752902155887, |
| "loss": 0.5538, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.15049614112458654, |
| "grad_norm": 0.28311678767204285, |
| "learning_rate": 0.00027819237147595354, |
| "loss": 0.5649, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.15104740904079383, |
| "grad_norm": 0.27529653906822205, |
| "learning_rate": 0.0002781094527363184, |
| "loss": 0.5432, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1515986769570011, |
| "grad_norm": 0.266111820936203, |
| "learning_rate": 0.0002780265339966832, |
| "loss": 0.5475, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.15214994487320838, |
| "grad_norm": 0.2525365352630615, |
| "learning_rate": 0.00027794361525704806, |
| "loss": 0.5252, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.15270121278941565, |
| "grad_norm": 0.2655681371688843, |
| "learning_rate": 0.0002778606965174129, |
| "loss": 0.5406, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.15325248070562295, |
| "grad_norm": 0.29118314385414124, |
| "learning_rate": 0.0002777777777777778, |
| "loss": 0.5324, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.15380374862183022, |
| "grad_norm": 0.2875930070877075, |
| "learning_rate": 0.0002776948590381426, |
| "loss": 0.5804, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.1543550165380375, |
| "grad_norm": 0.26764920353889465, |
| "learning_rate": 0.00027761194029850744, |
| "loss": 0.5391, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.15490628445424476, |
| "grad_norm": 0.2753891348838806, |
| "learning_rate": 0.00027752902155887225, |
| "loss": 0.5573, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.15545755237045203, |
| "grad_norm": 0.26174411177635193, |
| "learning_rate": 0.0002774461028192371, |
| "loss": 0.5543, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.15600882028665933, |
| "grad_norm": 0.25004303455352783, |
| "learning_rate": 0.00027736318407960196, |
| "loss": 0.5546, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.1565600882028666, |
| "grad_norm": 0.2634401023387909, |
| "learning_rate": 0.0002772802653399668, |
| "loss": 0.524, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.15711135611907387, |
| "grad_norm": 0.26751798391342163, |
| "learning_rate": 0.00027719734660033163, |
| "loss": 0.574, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.15766262403528114, |
| "grad_norm": 0.2556850016117096, |
| "learning_rate": 0.0002771144278606965, |
| "loss": 0.5533, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1582138919514884, |
| "grad_norm": 0.2557762563228607, |
| "learning_rate": 0.00027703150912106135, |
| "loss": 0.546, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.1587651598676957, |
| "grad_norm": 0.25817009806632996, |
| "learning_rate": 0.0002769485903814262, |
| "loss": 0.5519, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.15931642778390298, |
| "grad_norm": 0.26580142974853516, |
| "learning_rate": 0.000276865671641791, |
| "loss": 0.5438, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.15986769570011025, |
| "grad_norm": 0.25780072808265686, |
| "learning_rate": 0.00027678275290215587, |
| "loss": 0.549, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.16041896361631752, |
| "grad_norm": 0.2627890706062317, |
| "learning_rate": 0.0002766998341625207, |
| "loss": 0.5565, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.1609702315325248, |
| "grad_norm": 0.26781341433525085, |
| "learning_rate": 0.00027661691542288553, |
| "loss": 0.542, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.1615214994487321, |
| "grad_norm": 0.253888338804245, |
| "learning_rate": 0.0002765339966832504, |
| "loss": 0.5424, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.16207276736493936, |
| "grad_norm": 0.2835153043270111, |
| "learning_rate": 0.00027645107794361525, |
| "loss": 0.5354, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.16262403528114663, |
| "grad_norm": 0.286640465259552, |
| "learning_rate": 0.00027636815920398006, |
| "loss": 0.5209, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.1631753031973539, |
| "grad_norm": 0.25742077827453613, |
| "learning_rate": 0.0002762852404643449, |
| "loss": 0.5198, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.1637265711135612, |
| "grad_norm": 0.24710626900196075, |
| "learning_rate": 0.0002762023217247098, |
| "loss": 0.5189, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.16427783902976847, |
| "grad_norm": 0.28113001585006714, |
| "learning_rate": 0.00027611940298507464, |
| "loss": 0.5519, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.16482910694597575, |
| "grad_norm": 0.2573966085910797, |
| "learning_rate": 0.00027603648424543944, |
| "loss": 0.5307, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.16538037486218302, |
| "grad_norm": 0.24416916072368622, |
| "learning_rate": 0.0002759535655058043, |
| "loss": 0.5519, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1659316427783903, |
| "grad_norm": 0.25596654415130615, |
| "learning_rate": 0.0002758706467661691, |
| "loss": 0.5344, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.16648291069459759, |
| "grad_norm": 0.25158900022506714, |
| "learning_rate": 0.00027578772802653396, |
| "loss": 0.5399, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.16703417861080486, |
| "grad_norm": 0.24854016304016113, |
| "learning_rate": 0.0002757048092868988, |
| "loss": 0.5389, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.16758544652701213, |
| "grad_norm": 0.2592412233352661, |
| "learning_rate": 0.0002756218905472637, |
| "loss": 0.5584, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.1681367144432194, |
| "grad_norm": 0.2527318298816681, |
| "learning_rate": 0.0002755389718076285, |
| "loss": 0.5604, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.16868798235942667, |
| "grad_norm": 0.26560983061790466, |
| "learning_rate": 0.00027545605306799335, |
| "loss": 0.5561, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.16923925027563397, |
| "grad_norm": 0.2634880542755127, |
| "learning_rate": 0.0002753731343283582, |
| "loss": 0.5281, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.16979051819184124, |
| "grad_norm": 0.2732850909233093, |
| "learning_rate": 0.00027529021558872306, |
| "loss": 0.5398, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.1703417861080485, |
| "grad_norm": 0.23158006370067596, |
| "learning_rate": 0.00027520729684908787, |
| "loss": 0.5325, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.17089305402425578, |
| "grad_norm": 0.24649128317832947, |
| "learning_rate": 0.00027512437810945273, |
| "loss": 0.5381, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.17144432194046305, |
| "grad_norm": 0.2770949602127075, |
| "learning_rate": 0.00027504145936981753, |
| "loss": 0.5498, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.17199558985667035, |
| "grad_norm": 0.25388598442077637, |
| "learning_rate": 0.0002749585406301824, |
| "loss": 0.5389, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.17254685777287762, |
| "grad_norm": 0.2431599199771881, |
| "learning_rate": 0.00027487562189054725, |
| "loss": 0.5343, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.1730981256890849, |
| "grad_norm": 0.24289795756340027, |
| "learning_rate": 0.0002747927031509121, |
| "loss": 0.5073, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.17364939360529216, |
| "grad_norm": 0.2458408623933792, |
| "learning_rate": 0.0002747097844112769, |
| "loss": 0.5278, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.17420066152149946, |
| "grad_norm": 0.24127742648124695, |
| "learning_rate": 0.0002746268656716418, |
| "loss": 0.5345, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.17475192943770673, |
| "grad_norm": 0.26737701892852783, |
| "learning_rate": 0.00027454394693200663, |
| "loss": 0.5395, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.175303197353914, |
| "grad_norm": 0.26361507177352905, |
| "learning_rate": 0.0002744610281923715, |
| "loss": 0.5405, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.17585446527012127, |
| "grad_norm": 0.24210020899772644, |
| "learning_rate": 0.0002743781094527363, |
| "loss": 0.5268, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.17640573318632854, |
| "grad_norm": 0.2510232627391815, |
| "learning_rate": 0.0002742951907131011, |
| "loss": 0.5373, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.17695700110253584, |
| "grad_norm": 0.23939576745033264, |
| "learning_rate": 0.00027421227197346596, |
| "loss": 0.5561, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.17750826901874311, |
| "grad_norm": 0.273258239030838, |
| "learning_rate": 0.0002741293532338308, |
| "loss": 0.5507, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.17805953693495039, |
| "grad_norm": 0.23547501862049103, |
| "learning_rate": 0.0002740464344941957, |
| "loss": 0.5293, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.17861080485115766, |
| "grad_norm": 0.24796201288700104, |
| "learning_rate": 0.0002739635157545605, |
| "loss": 0.5378, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.17916207276736493, |
| "grad_norm": 0.23436011373996735, |
| "learning_rate": 0.00027388059701492534, |
| "loss": 0.5432, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.17971334068357223, |
| "grad_norm": 0.22892701625823975, |
| "learning_rate": 0.0002737976782752902, |
| "loss": 0.5221, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.1802646085997795, |
| "grad_norm": 0.23817826807498932, |
| "learning_rate": 0.00027371475953565506, |
| "loss": 0.5284, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.18081587651598677, |
| "grad_norm": 0.23703162372112274, |
| "learning_rate": 0.00027363184079601987, |
| "loss": 0.5223, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.18136714443219404, |
| "grad_norm": 0.24087084829807281, |
| "learning_rate": 0.0002735489220563847, |
| "loss": 0.5489, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.1819184123484013, |
| "grad_norm": 0.2529735267162323, |
| "learning_rate": 0.00027346600331674953, |
| "loss": 0.5485, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1824696802646086, |
| "grad_norm": 0.23450088500976562, |
| "learning_rate": 0.0002733830845771144, |
| "loss": 0.4971, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.18302094818081588, |
| "grad_norm": 0.23895451426506042, |
| "learning_rate": 0.00027330016583747925, |
| "loss": 0.5165, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.18357221609702315, |
| "grad_norm": 0.24417142570018768, |
| "learning_rate": 0.0002732172470978441, |
| "loss": 0.5491, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.18412348401323042, |
| "grad_norm": 0.2527695596218109, |
| "learning_rate": 0.0002731343283582089, |
| "loss": 0.5255, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.18467475192943772, |
| "grad_norm": 0.24978198111057281, |
| "learning_rate": 0.00027305140961857377, |
| "loss": 0.5389, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.185226019845645, |
| "grad_norm": 0.2539977431297302, |
| "learning_rate": 0.00027296849087893863, |
| "loss": 0.5392, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.18577728776185226, |
| "grad_norm": 0.24033623933792114, |
| "learning_rate": 0.0002728855721393035, |
| "loss": 0.5356, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.18632855567805953, |
| "grad_norm": 0.24697022140026093, |
| "learning_rate": 0.0002728026533996683, |
| "loss": 0.5159, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.1868798235942668, |
| "grad_norm": 0.25741416215896606, |
| "learning_rate": 0.00027271973466003315, |
| "loss": 0.56, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.1874310915104741, |
| "grad_norm": 0.2324167639017105, |
| "learning_rate": 0.00027263681592039796, |
| "loss": 0.5379, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.18798235942668137, |
| "grad_norm": 0.24800144135951996, |
| "learning_rate": 0.0002725538971807628, |
| "loss": 0.5129, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.18853362734288864, |
| "grad_norm": 0.26905378699302673, |
| "learning_rate": 0.0002724709784411277, |
| "loss": 0.5226, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.18908489525909591, |
| "grad_norm": 0.25401249527931213, |
| "learning_rate": 0.00027238805970149254, |
| "loss": 0.5313, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.18963616317530319, |
| "grad_norm": 0.24307483434677124, |
| "learning_rate": 0.00027230514096185734, |
| "loss": 0.5427, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.19018743109151048, |
| "grad_norm": 0.25807374715805054, |
| "learning_rate": 0.0002722222222222222, |
| "loss": 0.524, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.19073869900771775, |
| "grad_norm": 0.2321993112564087, |
| "learning_rate": 0.00027213930348258706, |
| "loss": 0.5314, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.19128996692392503, |
| "grad_norm": 0.23558932542800903, |
| "learning_rate": 0.0002720563847429519, |
| "loss": 0.5223, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.1918412348401323, |
| "grad_norm": 0.25960054993629456, |
| "learning_rate": 0.0002719734660033167, |
| "loss": 0.5436, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.19239250275633957, |
| "grad_norm": 0.2273932248353958, |
| "learning_rate": 0.0002718905472636816, |
| "loss": 0.5048, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.19294377067254687, |
| "grad_norm": 0.2279786467552185, |
| "learning_rate": 0.0002718076285240464, |
| "loss": 0.5164, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.19349503858875414, |
| "grad_norm": 0.23833182454109192, |
| "learning_rate": 0.00027172470978441125, |
| "loss": 0.5378, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.1940463065049614, |
| "grad_norm": 0.2499193549156189, |
| "learning_rate": 0.0002716417910447761, |
| "loss": 0.5494, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.19459757442116868, |
| "grad_norm": 0.2734036147594452, |
| "learning_rate": 0.00027155887230514097, |
| "loss": 0.5391, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.19514884233737598, |
| "grad_norm": 0.25754764676094055, |
| "learning_rate": 0.00027147595356550577, |
| "loss": 0.5212, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.19570011025358325, |
| "grad_norm": 0.22964167594909668, |
| "learning_rate": 0.00027139303482587063, |
| "loss": 0.5301, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.19625137816979052, |
| "grad_norm": 0.24985463917255402, |
| "learning_rate": 0.0002713101160862355, |
| "loss": 0.5177, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.1968026460859978, |
| "grad_norm": 0.27296510338783264, |
| "learning_rate": 0.00027122719734660035, |
| "loss": 0.5443, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.19735391400220506, |
| "grad_norm": 0.2506982982158661, |
| "learning_rate": 0.00027114427860696515, |
| "loss": 0.5419, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.19790518191841236, |
| "grad_norm": 0.2600388526916504, |
| "learning_rate": 0.00027106135986733, |
| "loss": 0.5402, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.19845644983461963, |
| "grad_norm": 0.25040823221206665, |
| "learning_rate": 0.0002709784411276948, |
| "loss": 0.5463, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1990077177508269, |
| "grad_norm": 0.25567591190338135, |
| "learning_rate": 0.0002708955223880597, |
| "loss": 0.5189, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.19955898566703417, |
| "grad_norm": 0.24336600303649902, |
| "learning_rate": 0.00027081260364842454, |
| "loss": 0.5393, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.20011025358324144, |
| "grad_norm": 0.23660831153392792, |
| "learning_rate": 0.00027072968490878934, |
| "loss": 0.5121, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.20066152149944874, |
| "grad_norm": 0.23589812219142914, |
| "learning_rate": 0.0002706467661691542, |
| "loss": 0.5016, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.201212789415656, |
| "grad_norm": 0.2517778277397156, |
| "learning_rate": 0.000270563847429519, |
| "loss": 0.5127, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.20176405733186328, |
| "grad_norm": 0.263662189245224, |
| "learning_rate": 0.0002704809286898839, |
| "loss": 0.5518, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.20231532524807055, |
| "grad_norm": 0.25211676955223083, |
| "learning_rate": 0.0002703980099502487, |
| "loss": 0.5362, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.20286659316427783, |
| "grad_norm": 0.22718675434589386, |
| "learning_rate": 0.0002703150912106136, |
| "loss": 0.5127, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.20341786108048512, |
| "grad_norm": 0.24481582641601562, |
| "learning_rate": 0.0002702321724709784, |
| "loss": 0.5084, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.2039691289966924, |
| "grad_norm": 0.2656586766242981, |
| "learning_rate": 0.00027014925373134325, |
| "loss": 0.5454, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.20452039691289967, |
| "grad_norm": 0.2491103559732437, |
| "learning_rate": 0.0002700663349917081, |
| "loss": 0.5412, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.20507166482910694, |
| "grad_norm": 0.252030611038208, |
| "learning_rate": 0.00026998341625207296, |
| "loss": 0.5761, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.20562293274531424, |
| "grad_norm": 0.24894152581691742, |
| "learning_rate": 0.00026990049751243777, |
| "loss": 0.5264, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.2061742006615215, |
| "grad_norm": 0.25231489539146423, |
| "learning_rate": 0.00026981757877280263, |
| "loss": 0.5295, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.20672546857772878, |
| "grad_norm": 0.25147655606269836, |
| "learning_rate": 0.00026973466003316743, |
| "loss": 0.5126, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.20727673649393605, |
| "grad_norm": 0.2379835844039917, |
| "learning_rate": 0.0002696517412935323, |
| "loss": 0.4937, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.20782800441014332, |
| "grad_norm": 0.24038439989089966, |
| "learning_rate": 0.00026956882255389715, |
| "loss": 0.5426, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.20837927232635062, |
| "grad_norm": 0.24591150879859924, |
| "learning_rate": 0.000269485903814262, |
| "loss": 0.5191, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.2089305402425579, |
| "grad_norm": 0.23723675310611725, |
| "learning_rate": 0.0002694029850746268, |
| "loss": 0.5247, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.20948180815876516, |
| "grad_norm": 0.2618078887462616, |
| "learning_rate": 0.0002693200663349917, |
| "loss": 0.5559, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.21003307607497243, |
| "grad_norm": 0.2556595504283905, |
| "learning_rate": 0.00026923714759535653, |
| "loss": 0.544, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.2105843439911797, |
| "grad_norm": 0.24010786414146423, |
| "learning_rate": 0.0002691542288557214, |
| "loss": 0.4958, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.211135611907387, |
| "grad_norm": 0.253151535987854, |
| "learning_rate": 0.0002690713101160862, |
| "loss": 0.5371, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.21168687982359427, |
| "grad_norm": 0.2715364694595337, |
| "learning_rate": 0.00026898839137645106, |
| "loss": 0.5788, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.21223814773980154, |
| "grad_norm": 0.2472977191209793, |
| "learning_rate": 0.00026890547263681586, |
| "loss": 0.5359, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.2127894156560088, |
| "grad_norm": 0.2925645411014557, |
| "learning_rate": 0.0002688225538971807, |
| "loss": 0.5373, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.21334068357221608, |
| "grad_norm": 0.23534104228019714, |
| "learning_rate": 0.0002687396351575456, |
| "loss": 0.5421, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.21389195148842338, |
| "grad_norm": 0.25397318601608276, |
| "learning_rate": 0.00026865671641791044, |
| "loss": 0.5538, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.21444321940463065, |
| "grad_norm": 0.26708152890205383, |
| "learning_rate": 0.00026857379767827524, |
| "loss": 0.5088, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.21499448732083792, |
| "grad_norm": 0.24131494760513306, |
| "learning_rate": 0.0002684908789386401, |
| "loss": 0.5215, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2155457552370452, |
| "grad_norm": 0.25981369614601135, |
| "learning_rate": 0.00026840796019900496, |
| "loss": 0.5481, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.2160970231532525, |
| "grad_norm": 0.25831639766693115, |
| "learning_rate": 0.0002683250414593698, |
| "loss": 0.5352, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.21664829106945976, |
| "grad_norm": 0.24388836324214935, |
| "learning_rate": 0.0002682421227197346, |
| "loss": 0.5047, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.21719955898566704, |
| "grad_norm": 0.25614237785339355, |
| "learning_rate": 0.0002681592039800995, |
| "loss": 0.5236, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.2177508269018743, |
| "grad_norm": 0.23628944158554077, |
| "learning_rate": 0.0002680762852404643, |
| "loss": 0.5118, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.21830209481808158, |
| "grad_norm": 0.25390875339508057, |
| "learning_rate": 0.00026799336650082915, |
| "loss": 0.5231, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.21885336273428888, |
| "grad_norm": 0.27364251017570496, |
| "learning_rate": 0.000267910447761194, |
| "loss": 0.5573, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.21940463065049615, |
| "grad_norm": 0.25110650062561035, |
| "learning_rate": 0.00026782752902155887, |
| "loss": 0.5078, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.21995589856670342, |
| "grad_norm": 0.24438323080539703, |
| "learning_rate": 0.0002677446102819237, |
| "loss": 0.5026, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.2205071664829107, |
| "grad_norm": 0.23745465278625488, |
| "learning_rate": 0.00026766169154228853, |
| "loss": 0.5568, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.22105843439911796, |
| "grad_norm": 0.25559869408607483, |
| "learning_rate": 0.0002675787728026534, |
| "loss": 0.5286, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.22160970231532526, |
| "grad_norm": 0.24587516486644745, |
| "learning_rate": 0.00026749585406301825, |
| "loss": 0.5258, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.22216097023153253, |
| "grad_norm": 0.26151949167251587, |
| "learning_rate": 0.00026741293532338306, |
| "loss": 0.5426, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.2227122381477398, |
| "grad_norm": 0.2910129427909851, |
| "learning_rate": 0.0002673300165837479, |
| "loss": 0.5376, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.22326350606394707, |
| "grad_norm": 0.28276947140693665, |
| "learning_rate": 0.0002672470978441127, |
| "loss": 0.5271, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.22381477398015434, |
| "grad_norm": 0.25096046924591064, |
| "learning_rate": 0.0002671641791044776, |
| "loss": 0.5439, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.22436604189636164, |
| "grad_norm": 0.2461530715227127, |
| "learning_rate": 0.00026708126036484244, |
| "loss": 0.5239, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.2249173098125689, |
| "grad_norm": 0.2833070456981659, |
| "learning_rate": 0.00026699834162520724, |
| "loss": 0.531, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.22546857772877618, |
| "grad_norm": 0.24600760638713837, |
| "learning_rate": 0.0002669154228855721, |
| "loss": 0.5419, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.22601984564498345, |
| "grad_norm": 0.2620793581008911, |
| "learning_rate": 0.00026683250414593696, |
| "loss": 0.5033, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.22657111356119075, |
| "grad_norm": 0.27523407340049744, |
| "learning_rate": 0.0002667495854063018, |
| "loss": 0.5257, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.22712238147739802, |
| "grad_norm": 0.2630368769168854, |
| "learning_rate": 0.0002666666666666666, |
| "loss": 0.5156, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.2276736493936053, |
| "grad_norm": 0.24897338449954987, |
| "learning_rate": 0.0002665837479270315, |
| "loss": 0.5301, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.22822491730981256, |
| "grad_norm": 0.26213693618774414, |
| "learning_rate": 0.0002665008291873963, |
| "loss": 0.5563, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.22877618522601983, |
| "grad_norm": 0.23822888731956482, |
| "learning_rate": 0.00026641791044776115, |
| "loss": 0.5273, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.22932745314222713, |
| "grad_norm": 0.22970083355903625, |
| "learning_rate": 0.000266334991708126, |
| "loss": 0.5321, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.2298787210584344, |
| "grad_norm": 0.26430296897888184, |
| "learning_rate": 0.00026625207296849087, |
| "loss": 0.5539, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.23042998897464168, |
| "grad_norm": 0.25960785150527954, |
| "learning_rate": 0.00026616915422885567, |
| "loss": 0.5357, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.23098125689084895, |
| "grad_norm": 0.23449423909187317, |
| "learning_rate": 0.00026608623548922053, |
| "loss": 0.5143, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.23153252480705622, |
| "grad_norm": 0.2795349061489105, |
| "learning_rate": 0.0002660033167495854, |
| "loss": 0.5363, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.23208379272326352, |
| "grad_norm": 0.2637255787849426, |
| "learning_rate": 0.00026592039800995025, |
| "loss": 0.5607, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.2326350606394708, |
| "grad_norm": 0.23269203305244446, |
| "learning_rate": 0.00026583747927031505, |
| "loss": 0.5239, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.23318632855567806, |
| "grad_norm": 0.2501350939273834, |
| "learning_rate": 0.0002657545605306799, |
| "loss": 0.5303, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.23373759647188533, |
| "grad_norm": 0.25998207926750183, |
| "learning_rate": 0.0002656716417910447, |
| "loss": 0.5258, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.2342888643880926, |
| "grad_norm": 0.25762224197387695, |
| "learning_rate": 0.0002655887230514096, |
| "loss": 0.5427, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.2348401323042999, |
| "grad_norm": 0.2542650103569031, |
| "learning_rate": 0.00026550580431177444, |
| "loss": 0.5363, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.23539140022050717, |
| "grad_norm": 0.24817922711372375, |
| "learning_rate": 0.0002654228855721393, |
| "loss": 0.5294, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.23594266813671444, |
| "grad_norm": 0.23553630709648132, |
| "learning_rate": 0.0002653399668325041, |
| "loss": 0.5401, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.2364939360529217, |
| "grad_norm": 0.2774706184864044, |
| "learning_rate": 0.00026525704809286896, |
| "loss": 0.5352, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.237045203969129, |
| "grad_norm": 0.2383023351430893, |
| "learning_rate": 0.0002651741293532338, |
| "loss": 0.5243, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.23759647188533628, |
| "grad_norm": 0.23838096857070923, |
| "learning_rate": 0.0002650912106135987, |
| "loss": 0.5336, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.23814773980154355, |
| "grad_norm": 0.2416170984506607, |
| "learning_rate": 0.0002650082918739635, |
| "loss": 0.5044, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.23869900771775082, |
| "grad_norm": 0.24407121539115906, |
| "learning_rate": 0.00026492537313432834, |
| "loss": 0.5383, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.2392502756339581, |
| "grad_norm": 0.26349690556526184, |
| "learning_rate": 0.00026484245439469315, |
| "loss": 0.5553, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.2398015435501654, |
| "grad_norm": 0.27343693375587463, |
| "learning_rate": 0.000264759535655058, |
| "loss": 0.5593, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.24035281146637266, |
| "grad_norm": 0.22751976549625397, |
| "learning_rate": 0.00026467661691542287, |
| "loss": 0.5254, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.24090407938257993, |
| "grad_norm": 0.2342759519815445, |
| "learning_rate": 0.0002645936981757877, |
| "loss": 0.5076, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.2414553472987872, |
| "grad_norm": 0.25039923191070557, |
| "learning_rate": 0.00026451077943615253, |
| "loss": 0.4816, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.24200661521499447, |
| "grad_norm": 0.24585099518299103, |
| "learning_rate": 0.0002644278606965174, |
| "loss": 0.5132, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.24255788313120177, |
| "grad_norm": 0.24062813818454742, |
| "learning_rate": 0.00026434494195688225, |
| "loss": 0.5152, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.24310915104740904, |
| "grad_norm": 0.23549048602581024, |
| "learning_rate": 0.0002642620232172471, |
| "loss": 0.5201, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.24366041896361632, |
| "grad_norm": 0.24712547659873962, |
| "learning_rate": 0.0002641791044776119, |
| "loss": 0.5252, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.2442116868798236, |
| "grad_norm": 0.25113359093666077, |
| "learning_rate": 0.00026409618573797677, |
| "loss": 0.5593, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.24476295479603086, |
| "grad_norm": 0.24021007120609283, |
| "learning_rate": 0.0002640132669983416, |
| "loss": 0.5338, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.24531422271223816, |
| "grad_norm": 0.23334236443042755, |
| "learning_rate": 0.00026393034825870643, |
| "loss": 0.4842, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.24586549062844543, |
| "grad_norm": 0.25075432658195496, |
| "learning_rate": 0.0002638474295190713, |
| "loss": 0.5498, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.2464167585446527, |
| "grad_norm": 0.23466569185256958, |
| "learning_rate": 0.00026376451077943615, |
| "loss": 0.5125, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.24696802646085997, |
| "grad_norm": 0.23975308239459991, |
| "learning_rate": 0.00026368159203980096, |
| "loss": 0.5315, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.24751929437706727, |
| "grad_norm": 0.227213054895401, |
| "learning_rate": 0.0002635986733001658, |
| "loss": 0.4826, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.24807056229327454, |
| "grad_norm": 0.23588328063488007, |
| "learning_rate": 0.0002635157545605307, |
| "loss": 0.4902, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2486218302094818, |
| "grad_norm": 0.24110263586044312, |
| "learning_rate": 0.00026343283582089554, |
| "loss": 0.5152, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.24917309812568908, |
| "grad_norm": 0.24417544901371002, |
| "learning_rate": 0.00026334991708126034, |
| "loss": 0.5326, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.24972436604189635, |
| "grad_norm": 0.24150699377059937, |
| "learning_rate": 0.00026326699834162515, |
| "loss": 0.547, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.25027563395810365, |
| "grad_norm": 0.26009777188301086, |
| "learning_rate": 0.00026318407960199, |
| "loss": 0.5315, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2508269018743109, |
| "grad_norm": 0.2537683844566345, |
| "learning_rate": 0.00026310116086235486, |
| "loss": 0.5304, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.2513781697905182, |
| "grad_norm": 0.2526278495788574, |
| "learning_rate": 0.0002630182421227197, |
| "loss": 0.5194, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2519294377067255, |
| "grad_norm": 0.24355928599834442, |
| "learning_rate": 0.00026293532338308453, |
| "loss": 0.5096, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.25248070562293273, |
| "grad_norm": 0.243259459733963, |
| "learning_rate": 0.0002628524046434494, |
| "loss": 0.4971, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.25303197353914003, |
| "grad_norm": 0.2597525417804718, |
| "learning_rate": 0.00026276948590381425, |
| "loss": 0.5224, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.2535832414553473, |
| "grad_norm": 0.2498249113559723, |
| "learning_rate": 0.0002626865671641791, |
| "loss": 0.506, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2541345093715546, |
| "grad_norm": 0.21408714354038239, |
| "learning_rate": 0.0002626036484245439, |
| "loss": 0.5076, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.25468577728776187, |
| "grad_norm": 0.25370824337005615, |
| "learning_rate": 0.00026252072968490877, |
| "loss": 0.5065, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.2552370452039691, |
| "grad_norm": 0.25148823857307434, |
| "learning_rate": 0.0002624378109452736, |
| "loss": 0.4932, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.2557883131201764, |
| "grad_norm": 0.24903985857963562, |
| "learning_rate": 0.00026235489220563843, |
| "loss": 0.5366, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.25633958103638366, |
| "grad_norm": 0.2521916329860687, |
| "learning_rate": 0.0002622719734660033, |
| "loss": 0.5392, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.25689084895259096, |
| "grad_norm": 0.24553993344306946, |
| "learning_rate": 0.00026218905472636815, |
| "loss": 0.5382, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.25744211686879825, |
| "grad_norm": 0.23382090032100677, |
| "learning_rate": 0.00026210613598673296, |
| "loss": 0.523, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.2579933847850055, |
| "grad_norm": 0.25337761640548706, |
| "learning_rate": 0.0002620232172470978, |
| "loss": 0.5147, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.2585446527012128, |
| "grad_norm": 0.25433778762817383, |
| "learning_rate": 0.0002619402985074627, |
| "loss": 0.5012, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.25909592061742004, |
| "grad_norm": 0.2362672984600067, |
| "learning_rate": 0.00026185737976782753, |
| "loss": 0.5328, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.25964718853362734, |
| "grad_norm": 0.241427481174469, |
| "learning_rate": 0.00026177446102819234, |
| "loss": 0.5207, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.26019845644983464, |
| "grad_norm": 0.24943798780441284, |
| "learning_rate": 0.0002616915422885572, |
| "loss": 0.5607, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.2607497243660419, |
| "grad_norm": 0.21813860535621643, |
| "learning_rate": 0.000261608623548922, |
| "loss": 0.5036, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.2613009922822492, |
| "grad_norm": 0.22680509090423584, |
| "learning_rate": 0.00026152570480928686, |
| "loss": 0.4765, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.2618522601984565, |
| "grad_norm": 0.23577630519866943, |
| "learning_rate": 0.0002614427860696517, |
| "loss": 0.5267, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.2624035281146637, |
| "grad_norm": 0.22560511529445648, |
| "learning_rate": 0.0002613598673300166, |
| "loss": 0.5089, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.262954796030871, |
| "grad_norm": 0.2485722452402115, |
| "learning_rate": 0.0002612769485903814, |
| "loss": 0.5231, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.26350606394707826, |
| "grad_norm": 0.2396019846200943, |
| "learning_rate": 0.00026119402985074624, |
| "loss": 0.515, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.26405733186328556, |
| "grad_norm": 0.24977676570415497, |
| "learning_rate": 0.0002611111111111111, |
| "loss": 0.5303, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.26460859977949286, |
| "grad_norm": 0.2788902521133423, |
| "learning_rate": 0.00026102819237147596, |
| "loss": 0.5324, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2651598676957001, |
| "grad_norm": 0.2515452802181244, |
| "learning_rate": 0.00026094527363184077, |
| "loss": 0.5373, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.2657111356119074, |
| "grad_norm": 0.2408224493265152, |
| "learning_rate": 0.0002608623548922056, |
| "loss": 0.5021, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.26626240352811464, |
| "grad_norm": 0.25597700476646423, |
| "learning_rate": 0.00026077943615257043, |
| "loss": 0.5292, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.26681367144432194, |
| "grad_norm": 0.24885378777980804, |
| "learning_rate": 0.0002606965174129353, |
| "loss": 0.5047, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.26736493936052924, |
| "grad_norm": 0.24355795979499817, |
| "learning_rate": 0.00026061359867330015, |
| "loss": 0.5258, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.2679162072767365, |
| "grad_norm": 0.2580486238002777, |
| "learning_rate": 0.000260530679933665, |
| "loss": 0.5533, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.2684674751929438, |
| "grad_norm": 0.27081531286239624, |
| "learning_rate": 0.0002604477611940298, |
| "loss": 0.525, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.269018743109151, |
| "grad_norm": 0.2559351325035095, |
| "learning_rate": 0.0002603648424543947, |
| "loss": 0.5074, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.2695700110253583, |
| "grad_norm": 0.2617773711681366, |
| "learning_rate": 0.00026028192371475953, |
| "loss": 0.5244, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2701212789415656, |
| "grad_norm": 0.23218858242034912, |
| "learning_rate": 0.0002601990049751244, |
| "loss": 0.5048, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.27067254685777287, |
| "grad_norm": 0.24924521148204803, |
| "learning_rate": 0.0002601160862354892, |
| "loss": 0.521, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.27122381477398017, |
| "grad_norm": 0.26815906167030334, |
| "learning_rate": 0.00026003316749585406, |
| "loss": 0.5574, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.2717750826901874, |
| "grad_norm": 0.240220308303833, |
| "learning_rate": 0.00025995024875621886, |
| "loss": 0.483, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.2723263506063947, |
| "grad_norm": 0.24979090690612793, |
| "learning_rate": 0.0002598673300165837, |
| "loss": 0.5262, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.272877618522602, |
| "grad_norm": 0.24111522734165192, |
| "learning_rate": 0.0002597844112769486, |
| "loss": 0.5068, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.27342888643880925, |
| "grad_norm": 0.2612921893596649, |
| "learning_rate": 0.0002597014925373134, |
| "loss": 0.519, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.27398015435501655, |
| "grad_norm": 0.24324454367160797, |
| "learning_rate": 0.00025961857379767824, |
| "loss": 0.4826, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.2745314222712238, |
| "grad_norm": 0.2406265288591385, |
| "learning_rate": 0.0002595356550580431, |
| "loss": 0.5223, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.2750826901874311, |
| "grad_norm": 0.2597537934780121, |
| "learning_rate": 0.00025945273631840796, |
| "loss": 0.535, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.2756339581036384, |
| "grad_norm": 0.2446909099817276, |
| "learning_rate": 0.00025936981757877277, |
| "loss": 0.5108, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2756339581036384, |
| "eval_loss": 0.5157487988471985, |
| "eval_runtime": 312.0533, |
| "eval_samples_per_second": 3.733, |
| "eval_steps_per_second": 0.468, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.27618522601984563, |
| "grad_norm": 0.2623630166053772, |
| "learning_rate": 0.0002592868988391376, |
| "loss": 0.5414, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.27673649393605293, |
| "grad_norm": 0.2578775882720947, |
| "learning_rate": 0.00025920398009950243, |
| "loss": 0.5121, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.2772877618522602, |
| "grad_norm": 0.23712347447872162, |
| "learning_rate": 0.0002591210613598673, |
| "loss": 0.5085, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.27783902976846747, |
| "grad_norm": 0.22108785808086395, |
| "learning_rate": 0.00025903814262023215, |
| "loss": 0.5202, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.27839029768467477, |
| "grad_norm": 0.25034549832344055, |
| "learning_rate": 0.000258955223880597, |
| "loss": 0.5389, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.278941565600882, |
| "grad_norm": 0.21812468767166138, |
| "learning_rate": 0.0002588723051409618, |
| "loss": 0.4994, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.2794928335170893, |
| "grad_norm": 0.22681641578674316, |
| "learning_rate": 0.00025878938640132667, |
| "loss": 0.5219, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.28004410143329656, |
| "grad_norm": 0.25568950176239014, |
| "learning_rate": 0.00025870646766169153, |
| "loss": 0.5188, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.28059536934950385, |
| "grad_norm": 0.24642765522003174, |
| "learning_rate": 0.0002586235489220564, |
| "loss": 0.4978, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.28114663726571115, |
| "grad_norm": 0.22820910811424255, |
| "learning_rate": 0.0002585406301824212, |
| "loss": 0.5168, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.2816979051819184, |
| "grad_norm": 0.23360006511211395, |
| "learning_rate": 0.00025845771144278605, |
| "loss": 0.5059, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2822491730981257, |
| "grad_norm": 0.24599935114383698, |
| "learning_rate": 0.00025837479270315086, |
| "loss": 0.5293, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.282800441014333, |
| "grad_norm": 0.23006513714790344, |
| "learning_rate": 0.0002582918739635157, |
| "loss": 0.5028, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.28335170893054024, |
| "grad_norm": 0.22950898110866547, |
| "learning_rate": 0.0002582089552238806, |
| "loss": 0.5064, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.28390297684674753, |
| "grad_norm": 0.23649993538856506, |
| "learning_rate": 0.00025812603648424544, |
| "loss": 0.515, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.2844542447629548, |
| "grad_norm": 0.23335647583007812, |
| "learning_rate": 0.00025804311774461024, |
| "loss": 0.4977, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.2850055126791621, |
| "grad_norm": 0.21914584934711456, |
| "learning_rate": 0.0002579601990049751, |
| "loss": 0.5018, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.2855567805953694, |
| "grad_norm": 0.2474760264158249, |
| "learning_rate": 0.00025787728026533996, |
| "loss": 0.542, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.2861080485115766, |
| "grad_norm": 0.24011823534965515, |
| "learning_rate": 0.0002577943615257048, |
| "loss": 0.5243, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.2866593164277839, |
| "grad_norm": 0.2619330883026123, |
| "learning_rate": 0.0002577114427860696, |
| "loss": 0.5657, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.28721058434399116, |
| "grad_norm": 0.2715679407119751, |
| "learning_rate": 0.0002576285240464345, |
| "loss": 0.5506, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.28776185226019846, |
| "grad_norm": 0.26569628715515137, |
| "learning_rate": 0.0002575456053067993, |
| "loss": 0.5525, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.28831312017640576, |
| "grad_norm": 0.23253163695335388, |
| "learning_rate": 0.00025746268656716415, |
| "loss": 0.5184, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.288864388092613, |
| "grad_norm": 0.2698347866535187, |
| "learning_rate": 0.000257379767827529, |
| "loss": 0.5274, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.2894156560088203, |
| "grad_norm": 0.2556426227092743, |
| "learning_rate": 0.00025729684908789386, |
| "loss": 0.5032, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.28996692392502754, |
| "grad_norm": 0.252575546503067, |
| "learning_rate": 0.00025721393034825867, |
| "loss": 0.525, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.29051819184123484, |
| "grad_norm": 0.26160725951194763, |
| "learning_rate": 0.00025713101160862353, |
| "loss": 0.552, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.29106945975744214, |
| "grad_norm": 0.250885546207428, |
| "learning_rate": 0.0002570480928689884, |
| "loss": 0.5159, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.2916207276736494, |
| "grad_norm": 0.24888747930526733, |
| "learning_rate": 0.00025696517412935325, |
| "loss": 0.5104, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2921719955898567, |
| "grad_norm": 0.2554168105125427, |
| "learning_rate": 0.00025688225538971805, |
| "loss": 0.4867, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2927232635060639, |
| "grad_norm": 0.24712808430194855, |
| "learning_rate": 0.0002567993366500829, |
| "loss": 0.5087, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.2932745314222712, |
| "grad_norm": 0.26169416308403015, |
| "learning_rate": 0.0002567164179104477, |
| "loss": 0.5094, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.2938257993384785, |
| "grad_norm": 0.25625213980674744, |
| "learning_rate": 0.0002566334991708126, |
| "loss": 0.5264, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.29437706725468576, |
| "grad_norm": 0.22383877635002136, |
| "learning_rate": 0.00025655058043117743, |
| "loss": 0.4719, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.29492833517089306, |
| "grad_norm": 0.2579217851161957, |
| "learning_rate": 0.0002564676616915423, |
| "loss": 0.5254, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.2954796030871003, |
| "grad_norm": 0.25349318981170654, |
| "learning_rate": 0.0002563847429519071, |
| "loss": 0.4932, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.2960308710033076, |
| "grad_norm": 0.25384828448295593, |
| "learning_rate": 0.00025630182421227196, |
| "loss": 0.51, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.2965821389195149, |
| "grad_norm": 0.22186040878295898, |
| "learning_rate": 0.0002562189054726368, |
| "loss": 0.5074, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.29713340683572215, |
| "grad_norm": 0.2735055685043335, |
| "learning_rate": 0.0002561359867330017, |
| "loss": 0.5151, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.29768467475192945, |
| "grad_norm": 0.24992069602012634, |
| "learning_rate": 0.0002560530679933665, |
| "loss": 0.4987, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.2982359426681367, |
| "grad_norm": 0.24067966639995575, |
| "learning_rate": 0.0002559701492537313, |
| "loss": 0.5434, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.298787210584344, |
| "grad_norm": 0.22907654941082, |
| "learning_rate": 0.00025588723051409614, |
| "loss": 0.5091, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2993384785005513, |
| "grad_norm": 0.21983608603477478, |
| "learning_rate": 0.000255804311774461, |
| "loss": 0.5234, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.29988974641675853, |
| "grad_norm": 0.2439606636762619, |
| "learning_rate": 0.00025572139303482586, |
| "loss": 0.5271, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.30044101433296583, |
| "grad_norm": 0.25168585777282715, |
| "learning_rate": 0.00025563847429519067, |
| "loss": 0.4998, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.30099228224917307, |
| "grad_norm": 0.22324073314666748, |
| "learning_rate": 0.00025555555555555553, |
| "loss": 0.5086, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.30154355016538037, |
| "grad_norm": 0.22652758657932281, |
| "learning_rate": 0.0002554726368159204, |
| "loss": 0.5044, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.30209481808158767, |
| "grad_norm": 0.2422345131635666, |
| "learning_rate": 0.00025538971807628525, |
| "loss": 0.4968, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.3026460859977949, |
| "grad_norm": 0.24840863049030304, |
| "learning_rate": 0.00025530679933665005, |
| "loss": 0.5267, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.3031973539140022, |
| "grad_norm": 0.26198020577430725, |
| "learning_rate": 0.0002552238805970149, |
| "loss": 0.528, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3037486218302095, |
| "grad_norm": 0.24763406813144684, |
| "learning_rate": 0.0002551409618573797, |
| "loss": 0.5387, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.30429988974641675, |
| "grad_norm": 0.22976034879684448, |
| "learning_rate": 0.0002550580431177446, |
| "loss": 0.5171, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.30485115766262405, |
| "grad_norm": 0.26161912083625793, |
| "learning_rate": 0.00025497512437810943, |
| "loss": 0.4956, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.3054024255788313, |
| "grad_norm": 0.2695063650608063, |
| "learning_rate": 0.0002548922056384743, |
| "loss": 0.5339, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.3059536934950386, |
| "grad_norm": 0.22745662927627563, |
| "learning_rate": 0.0002548092868988391, |
| "loss": 0.4769, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.3065049614112459, |
| "grad_norm": 0.2539026439189911, |
| "learning_rate": 0.00025472636815920396, |
| "loss": 0.5085, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.30705622932745313, |
| "grad_norm": 0.25683802366256714, |
| "learning_rate": 0.0002546434494195688, |
| "loss": 0.4828, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.30760749724366043, |
| "grad_norm": 0.24806293845176697, |
| "learning_rate": 0.0002545605306799337, |
| "loss": 0.534, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.3081587651598677, |
| "grad_norm": 0.24956698715686798, |
| "learning_rate": 0.0002544776119402985, |
| "loss": 0.4988, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.308710033076075, |
| "grad_norm": 0.2466159611940384, |
| "learning_rate": 0.00025439469320066334, |
| "loss": 0.525, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.3092613009922823, |
| "grad_norm": 0.2732326090335846, |
| "learning_rate": 0.00025431177446102814, |
| "loss": 0.5096, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.3098125689084895, |
| "grad_norm": 0.257656067609787, |
| "learning_rate": 0.000254228855721393, |
| "loss": 0.5241, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.3103638368246968, |
| "grad_norm": 0.2280483990907669, |
| "learning_rate": 0.00025414593698175786, |
| "loss": 0.5051, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.31091510474090406, |
| "grad_norm": 0.24017442762851715, |
| "learning_rate": 0.0002540630182421227, |
| "loss": 0.4923, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.31146637265711136, |
| "grad_norm": 0.27770093083381653, |
| "learning_rate": 0.0002539800995024875, |
| "loss": 0.5068, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.31201764057331866, |
| "grad_norm": 0.2428130954504013, |
| "learning_rate": 0.0002538971807628524, |
| "loss": 0.5223, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.3125689084895259, |
| "grad_norm": 0.24798986315727234, |
| "learning_rate": 0.00025381426202321724, |
| "loss": 0.5269, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.3131201764057332, |
| "grad_norm": 0.2388242930173874, |
| "learning_rate": 0.0002537313432835821, |
| "loss": 0.5328, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.31367144432194044, |
| "grad_norm": 0.24993616342544556, |
| "learning_rate": 0.0002536484245439469, |
| "loss": 0.523, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.31422271223814774, |
| "grad_norm": 0.22417233884334564, |
| "learning_rate": 0.00025356550580431177, |
| "loss": 0.5162, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.31477398015435504, |
| "grad_norm": 0.25001853704452515, |
| "learning_rate": 0.00025348258706467657, |
| "loss": 0.5172, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.3153252480705623, |
| "grad_norm": 0.24982157349586487, |
| "learning_rate": 0.00025339966832504143, |
| "loss": 0.516, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.3158765159867696, |
| "grad_norm": 0.23938202857971191, |
| "learning_rate": 0.0002533167495854063, |
| "loss": 0.4984, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.3164277839029768, |
| "grad_norm": 0.23941190540790558, |
| "learning_rate": 0.00025323383084577115, |
| "loss": 0.5285, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.3169790518191841, |
| "grad_norm": 0.26152345538139343, |
| "learning_rate": 0.00025315091210613595, |
| "loss": 0.5354, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.3175303197353914, |
| "grad_norm": 0.2364695519208908, |
| "learning_rate": 0.0002530679933665008, |
| "loss": 0.4926, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.31808158765159866, |
| "grad_norm": 0.2498009353876114, |
| "learning_rate": 0.00025298507462686567, |
| "loss": 0.4879, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.31863285556780596, |
| "grad_norm": 0.2434455007314682, |
| "learning_rate": 0.00025290215588723053, |
| "loss": 0.4941, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.3191841234840132, |
| "grad_norm": 0.2500743269920349, |
| "learning_rate": 0.00025281923714759534, |
| "loss": 0.5224, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.3197353914002205, |
| "grad_norm": 0.24151727557182312, |
| "learning_rate": 0.0002527363184079602, |
| "loss": 0.5056, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3202866593164278, |
| "grad_norm": 0.23307417333126068, |
| "learning_rate": 0.000252653399668325, |
| "loss": 0.4944, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.32083792723263505, |
| "grad_norm": 0.25184640288352966, |
| "learning_rate": 0.00025257048092868986, |
| "loss": 0.5471, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.32138919514884234, |
| "grad_norm": 0.21968768537044525, |
| "learning_rate": 0.0002524875621890547, |
| "loss": 0.4773, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.3219404630650496, |
| "grad_norm": 0.22851119935512543, |
| "learning_rate": 0.0002524046434494195, |
| "loss": 0.4964, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.3224917309812569, |
| "grad_norm": 0.2595960795879364, |
| "learning_rate": 0.0002523217247097844, |
| "loss": 0.5109, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.3230429988974642, |
| "grad_norm": 0.25090447068214417, |
| "learning_rate": 0.00025223880597014924, |
| "loss": 0.4932, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.3235942668136714, |
| "grad_norm": 0.24583864212036133, |
| "learning_rate": 0.0002521558872305141, |
| "loss": 0.4779, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.3241455347298787, |
| "grad_norm": 0.23779521882534027, |
| "learning_rate": 0.0002520729684908789, |
| "loss": 0.4925, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.324696802646086, |
| "grad_norm": 0.2614596486091614, |
| "learning_rate": 0.00025199004975124377, |
| "loss": 0.5064, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.32524807056229327, |
| "grad_norm": 0.2449434995651245, |
| "learning_rate": 0.00025190713101160857, |
| "loss": 0.4768, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.32579933847850057, |
| "grad_norm": 0.24249720573425293, |
| "learning_rate": 0.00025182421227197343, |
| "loss": 0.5183, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.3263506063947078, |
| "grad_norm": 0.2366262972354889, |
| "learning_rate": 0.0002517412935323383, |
| "loss": 0.5119, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.3269018743109151, |
| "grad_norm": 0.2465352565050125, |
| "learning_rate": 0.00025165837479270315, |
| "loss": 0.5133, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.3274531422271224, |
| "grad_norm": 0.24108771979808807, |
| "learning_rate": 0.00025157545605306795, |
| "loss": 0.5139, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.32800441014332965, |
| "grad_norm": 0.25272470712661743, |
| "learning_rate": 0.0002514925373134328, |
| "loss": 0.5161, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.32855567805953695, |
| "grad_norm": 0.23254331946372986, |
| "learning_rate": 0.00025140961857379767, |
| "loss": 0.5048, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.3291069459757442, |
| "grad_norm": 0.24523723125457764, |
| "learning_rate": 0.00025132669983416253, |
| "loss": 0.5234, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.3296582138919515, |
| "grad_norm": 0.2396179735660553, |
| "learning_rate": 0.00025124378109452733, |
| "loss": 0.4865, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.3302094818081588, |
| "grad_norm": 0.24812306463718414, |
| "learning_rate": 0.0002511608623548922, |
| "loss": 0.5262, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.33076074972436603, |
| "grad_norm": 0.21982058882713318, |
| "learning_rate": 0.000251077943615257, |
| "loss": 0.5067, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.33131201764057333, |
| "grad_norm": 0.23328660428524017, |
| "learning_rate": 0.00025099502487562186, |
| "loss": 0.5166, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.3318632855567806, |
| "grad_norm": 0.23042722046375275, |
| "learning_rate": 0.0002509121061359867, |
| "loss": 0.4754, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.3324145534729879, |
| "grad_norm": 0.2361726462841034, |
| "learning_rate": 0.0002508291873963516, |
| "loss": 0.5048, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.33296582138919517, |
| "grad_norm": 0.22569622099399567, |
| "learning_rate": 0.0002507462686567164, |
| "loss": 0.5272, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.3335170893054024, |
| "grad_norm": 0.28286513686180115, |
| "learning_rate": 0.00025066334991708124, |
| "loss": 0.5316, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.3340683572216097, |
| "grad_norm": 0.2402937114238739, |
| "learning_rate": 0.0002505804311774461, |
| "loss": 0.5213, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.33461962513781696, |
| "grad_norm": 0.23157329857349396, |
| "learning_rate": 0.00025049751243781096, |
| "loss": 0.5259, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.33517089305402425, |
| "grad_norm": 0.24995861947536469, |
| "learning_rate": 0.00025041459369817576, |
| "loss": 0.4986, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.33572216097023155, |
| "grad_norm": 0.2656213939189911, |
| "learning_rate": 0.0002503316749585406, |
| "loss": 0.4951, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.3362734288864388, |
| "grad_norm": 0.2361687421798706, |
| "learning_rate": 0.00025024875621890543, |
| "loss": 0.4897, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.3368246968026461, |
| "grad_norm": 0.23117870092391968, |
| "learning_rate": 0.0002501658374792703, |
| "loss": 0.5115, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.33737596471885334, |
| "grad_norm": 0.2605067491531372, |
| "learning_rate": 0.00025008291873963515, |
| "loss": 0.4969, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.33792723263506064, |
| "grad_norm": 0.2486005276441574, |
| "learning_rate": 0.00025, |
| "loss": 0.4853, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.33847850055126794, |
| "grad_norm": 0.2559118866920471, |
| "learning_rate": 0.0002499170812603648, |
| "loss": 0.5279, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.3390297684674752, |
| "grad_norm": 0.2579089403152466, |
| "learning_rate": 0.00024983416252072967, |
| "loss": 0.4942, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.3395810363836825, |
| "grad_norm": 0.24982236325740814, |
| "learning_rate": 0.0002497512437810945, |
| "loss": 0.5061, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.3401323042998897, |
| "grad_norm": 0.22861437499523163, |
| "learning_rate": 0.0002496683250414594, |
| "loss": 0.4935, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.340683572216097, |
| "grad_norm": 0.26352861523628235, |
| "learning_rate": 0.0002495854063018242, |
| "loss": 0.4989, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.3412348401323043, |
| "grad_norm": 0.26364725828170776, |
| "learning_rate": 0.00024950248756218905, |
| "loss": 0.5178, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.34178610804851156, |
| "grad_norm": 0.2375265508890152, |
| "learning_rate": 0.00024941956882255386, |
| "loss": 0.5081, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.34233737596471886, |
| "grad_norm": 0.24559634923934937, |
| "learning_rate": 0.0002493366500829187, |
| "loss": 0.5231, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.3428886438809261, |
| "grad_norm": 0.25992295145988464, |
| "learning_rate": 0.0002492537313432836, |
| "loss": 0.4919, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.3434399117971334, |
| "grad_norm": 0.2260003536939621, |
| "learning_rate": 0.00024917081260364843, |
| "loss": 0.4798, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.3439911797133407, |
| "grad_norm": 0.24474291503429413, |
| "learning_rate": 0.00024908789386401324, |
| "loss": 0.5063, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.34454244762954794, |
| "grad_norm": 0.27368757128715515, |
| "learning_rate": 0.0002490049751243781, |
| "loss": 0.5138, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.34509371554575524, |
| "grad_norm": 0.23762589693069458, |
| "learning_rate": 0.0002489220563847429, |
| "loss": 0.4739, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.34564498346196254, |
| "grad_norm": 0.26609158515930176, |
| "learning_rate": 0.00024883913764510776, |
| "loss": 0.5017, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.3461962513781698, |
| "grad_norm": 0.26183345913887024, |
| "learning_rate": 0.0002487562189054726, |
| "loss": 0.5278, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.3467475192943771, |
| "grad_norm": 0.254160076379776, |
| "learning_rate": 0.0002486733001658374, |
| "loss": 0.5178, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.3472987872105843, |
| "grad_norm": 0.23745757341384888, |
| "learning_rate": 0.0002485903814262023, |
| "loss": 0.5152, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.3478500551267916, |
| "grad_norm": 0.24215815961360931, |
| "learning_rate": 0.00024850746268656714, |
| "loss": 0.4821, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.3484013230429989, |
| "grad_norm": 0.2696283459663391, |
| "learning_rate": 0.000248424543946932, |
| "loss": 0.4868, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.34895259095920617, |
| "grad_norm": 0.2615061402320862, |
| "learning_rate": 0.0002483416252072968, |
| "loss": 0.5066, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.34950385887541346, |
| "grad_norm": 0.2618487775325775, |
| "learning_rate": 0.00024825870646766167, |
| "loss": 0.5084, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.3500551267916207, |
| "grad_norm": 0.2500843107700348, |
| "learning_rate": 0.00024817578772802647, |
| "loss": 0.5065, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.350606394707828, |
| "grad_norm": 0.2559143304824829, |
| "learning_rate": 0.00024809286898839133, |
| "loss": 0.5058, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.3511576626240353, |
| "grad_norm": 0.2498316466808319, |
| "learning_rate": 0.0002480099502487562, |
| "loss": 0.5033, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.35170893054024255, |
| "grad_norm": 0.2778237760066986, |
| "learning_rate": 0.00024792703150912105, |
| "loss": 0.5319, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.35226019845644985, |
| "grad_norm": 0.22850993275642395, |
| "learning_rate": 0.00024784411276948585, |
| "loss": 0.4852, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.3528114663726571, |
| "grad_norm": 0.22482328116893768, |
| "learning_rate": 0.0002477611940298507, |
| "loss": 0.5044, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3533627342888644, |
| "grad_norm": 0.2470054179430008, |
| "learning_rate": 0.0002476782752902156, |
| "loss": 0.5119, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.3539140022050717, |
| "grad_norm": 0.26223158836364746, |
| "learning_rate": 0.00024759535655058043, |
| "loss": 0.5276, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.35446527012127893, |
| "grad_norm": 0.25175783038139343, |
| "learning_rate": 0.00024751243781094524, |
| "loss": 0.4963, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.35501653803748623, |
| "grad_norm": 0.26237010955810547, |
| "learning_rate": 0.0002474295190713101, |
| "loss": 0.4989, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.35556780595369347, |
| "grad_norm": 0.23380139470100403, |
| "learning_rate": 0.0002473466003316749, |
| "loss": 0.5143, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.35611907386990077, |
| "grad_norm": 0.23414726555347443, |
| "learning_rate": 0.00024726368159203976, |
| "loss": 0.4837, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.35667034178610807, |
| "grad_norm": 0.2426154464483261, |
| "learning_rate": 0.0002471807628524046, |
| "loss": 0.4953, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.3572216097023153, |
| "grad_norm": 0.25034722685813904, |
| "learning_rate": 0.0002470978441127695, |
| "loss": 0.505, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.3577728776185226, |
| "grad_norm": 0.21789918839931488, |
| "learning_rate": 0.0002470149253731343, |
| "loss": 0.5121, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.35832414553472985, |
| "grad_norm": 0.2339979112148285, |
| "learning_rate": 0.00024693200663349914, |
| "loss": 0.5065, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.35887541345093715, |
| "grad_norm": 0.22365735471248627, |
| "learning_rate": 0.000246849087893864, |
| "loss": 0.4952, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.35942668136714445, |
| "grad_norm": 0.2149263620376587, |
| "learning_rate": 0.00024676616915422886, |
| "loss": 0.4677, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.3599779492833517, |
| "grad_norm": 0.2143101543188095, |
| "learning_rate": 0.00024668325041459367, |
| "loss": 0.4881, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.360529217199559, |
| "grad_norm": 0.23739519715309143, |
| "learning_rate": 0.0002466003316749585, |
| "loss": 0.5006, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.36108048511576624, |
| "grad_norm": 0.24234917759895325, |
| "learning_rate": 0.00024651741293532333, |
| "loss": 0.5206, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.36163175303197354, |
| "grad_norm": 0.2366551011800766, |
| "learning_rate": 0.0002464344941956882, |
| "loss": 0.5075, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.36218302094818083, |
| "grad_norm": 0.2543952465057373, |
| "learning_rate": 0.00024635157545605305, |
| "loss": 0.4985, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.3627342888643881, |
| "grad_norm": 0.24470911920070648, |
| "learning_rate": 0.0002462686567164179, |
| "loss": 0.5128, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.3632855567805954, |
| "grad_norm": 0.22214102745056152, |
| "learning_rate": 0.0002461857379767827, |
| "loss": 0.5125, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.3638368246968026, |
| "grad_norm": 0.24312040209770203, |
| "learning_rate": 0.00024610281923714757, |
| "loss": 0.4936, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.3643880926130099, |
| "grad_norm": 0.25986719131469727, |
| "learning_rate": 0.00024601990049751243, |
| "loss": 0.5347, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.3649393605292172, |
| "grad_norm": 0.22576284408569336, |
| "learning_rate": 0.0002459369817578773, |
| "loss": 0.4747, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.36549062844542446, |
| "grad_norm": 0.257548451423645, |
| "learning_rate": 0.0002458540630182421, |
| "loss": 0.5083, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.36604189636163176, |
| "grad_norm": 0.26048266887664795, |
| "learning_rate": 0.00024577114427860695, |
| "loss": 0.539, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.36659316427783906, |
| "grad_norm": 0.2594940662384033, |
| "learning_rate": 0.00024568822553897176, |
| "loss": 0.5003, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.3671444321940463, |
| "grad_norm": 0.2651066482067108, |
| "learning_rate": 0.0002456053067993366, |
| "loss": 0.4979, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.3676957001102536, |
| "grad_norm": 0.2542423903942108, |
| "learning_rate": 0.0002455223880597015, |
| "loss": 0.5338, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.36824696802646084, |
| "grad_norm": 0.24032056331634521, |
| "learning_rate": 0.00024543946932006634, |
| "loss": 0.5101, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.36879823594266814, |
| "grad_norm": 0.26019784808158875, |
| "learning_rate": 0.00024535655058043114, |
| "loss": 0.5217, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.36934950385887544, |
| "grad_norm": 0.24449752271175385, |
| "learning_rate": 0.000245273631840796, |
| "loss": 0.5318, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.3699007717750827, |
| "grad_norm": 0.22685208916664124, |
| "learning_rate": 0.00024519071310116086, |
| "loss": 0.5186, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.37045203969129, |
| "grad_norm": 0.2340528517961502, |
| "learning_rate": 0.00024510779436152566, |
| "loss": 0.4879, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3710033076074972, |
| "grad_norm": 0.2637344002723694, |
| "learning_rate": 0.0002450248756218905, |
| "loss": 0.5225, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.3715545755237045, |
| "grad_norm": 0.2515370845794678, |
| "learning_rate": 0.00024494195688225533, |
| "loss": 0.4913, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.3721058434399118, |
| "grad_norm": 0.22438743710517883, |
| "learning_rate": 0.0002448590381426202, |
| "loss": 0.4733, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.37265711135611906, |
| "grad_norm": 0.24447986483573914, |
| "learning_rate": 0.00024477611940298505, |
| "loss": 0.5138, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.37320837927232636, |
| "grad_norm": 0.2652420699596405, |
| "learning_rate": 0.0002446932006633499, |
| "loss": 0.4897, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.3737596471885336, |
| "grad_norm": 0.23273025453090668, |
| "learning_rate": 0.0002446102819237147, |
| "loss": 0.4823, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.3743109151047409, |
| "grad_norm": 0.24014912545681, |
| "learning_rate": 0.00024452736318407957, |
| "loss": 0.4963, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.3748621830209482, |
| "grad_norm": 0.2454654574394226, |
| "learning_rate": 0.00024444444444444443, |
| "loss": 0.5367, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.37541345093715545, |
| "grad_norm": 0.23897579312324524, |
| "learning_rate": 0.0002443615257048093, |
| "loss": 0.5038, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.37596471885336274, |
| "grad_norm": 0.25277066230773926, |
| "learning_rate": 0.0002442786069651741, |
| "loss": 0.506, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.37651598676957, |
| "grad_norm": 0.22470998764038086, |
| "learning_rate": 0.00024419568822553895, |
| "loss": 0.5038, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.3770672546857773, |
| "grad_norm": 0.2490270882844925, |
| "learning_rate": 0.00024411276948590378, |
| "loss": 0.5073, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.3776185226019846, |
| "grad_norm": 0.23964819312095642, |
| "learning_rate": 0.00024402985074626864, |
| "loss": 0.4932, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.37816979051819183, |
| "grad_norm": 0.2595767676830292, |
| "learning_rate": 0.00024394693200663348, |
| "loss": 0.5263, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.3787210584343991, |
| "grad_norm": 0.23740339279174805, |
| "learning_rate": 0.00024386401326699833, |
| "loss": 0.5019, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.37927232635060637, |
| "grad_norm": 0.23046371340751648, |
| "learning_rate": 0.00024378109452736314, |
| "loss": 0.5071, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.37982359426681367, |
| "grad_norm": 0.24483554065227509, |
| "learning_rate": 0.000243698175787728, |
| "loss": 0.4978, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.38037486218302097, |
| "grad_norm": 0.23441949486732483, |
| "learning_rate": 0.00024361525704809283, |
| "loss": 0.5217, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3809261300992282, |
| "grad_norm": 0.23334890604019165, |
| "learning_rate": 0.0002435323383084577, |
| "loss": 0.4826, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.3814773980154355, |
| "grad_norm": 0.2869088053703308, |
| "learning_rate": 0.00024344941956882252, |
| "loss": 0.5199, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.38202866593164275, |
| "grad_norm": 0.22842839360237122, |
| "learning_rate": 0.00024336650082918738, |
| "loss": 0.4586, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.38257993384785005, |
| "grad_norm": 0.23558756709098816, |
| "learning_rate": 0.0002432835820895522, |
| "loss": 0.4775, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.38313120176405735, |
| "grad_norm": 0.2528475821018219, |
| "learning_rate": 0.00024320066334991707, |
| "loss": 0.5068, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.3836824696802646, |
| "grad_norm": 0.2580317258834839, |
| "learning_rate": 0.0002431177446102819, |
| "loss": 0.52, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.3842337375964719, |
| "grad_norm": 0.23449361324310303, |
| "learning_rate": 0.00024303482587064676, |
| "loss": 0.4776, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.38478500551267913, |
| "grad_norm": 0.2365398108959198, |
| "learning_rate": 0.00024295190713101157, |
| "loss": 0.5063, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.38533627342888643, |
| "grad_norm": 0.24017611145973206, |
| "learning_rate": 0.00024286898839137643, |
| "loss": 0.4989, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.38588754134509373, |
| "grad_norm": 0.237211212515831, |
| "learning_rate": 0.00024278606965174126, |
| "loss": 0.4942, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.386438809261301, |
| "grad_norm": 0.24133196473121643, |
| "learning_rate": 0.00024270315091210612, |
| "loss": 0.4991, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.3869900771775083, |
| "grad_norm": 0.23730522394180298, |
| "learning_rate": 0.00024262023217247095, |
| "loss": 0.4847, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.3875413450937156, |
| "grad_norm": 0.23267106711864471, |
| "learning_rate": 0.0002425373134328358, |
| "loss": 0.5304, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.3880926130099228, |
| "grad_norm": 0.22734446823596954, |
| "learning_rate": 0.00024245439469320064, |
| "loss": 0.4752, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.3886438809261301, |
| "grad_norm": 0.24138008058071136, |
| "learning_rate": 0.0002423714759535655, |
| "loss": 0.4831, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.38919514884233736, |
| "grad_norm": 0.24015116691589355, |
| "learning_rate": 0.00024228855721393033, |
| "loss": 0.506, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.38974641675854466, |
| "grad_norm": 0.23817308247089386, |
| "learning_rate": 0.0002422056384742952, |
| "loss": 0.4868, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.39029768467475195, |
| "grad_norm": 0.21546156704425812, |
| "learning_rate": 0.00024212271973466, |
| "loss": 0.5102, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.3908489525909592, |
| "grad_norm": 0.2489834874868393, |
| "learning_rate": 0.00024203980099502486, |
| "loss": 0.4985, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.3914002205071665, |
| "grad_norm": 0.23067452013492584, |
| "learning_rate": 0.0002419568822553897, |
| "loss": 0.4985, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.39195148842337374, |
| "grad_norm": 0.24763309955596924, |
| "learning_rate": 0.00024187396351575455, |
| "loss": 0.5124, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.39250275633958104, |
| "grad_norm": 0.2439269721508026, |
| "learning_rate": 0.00024179104477611938, |
| "loss": 0.4939, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.39305402425578834, |
| "grad_norm": 0.23163112998008728, |
| "learning_rate": 0.00024170812603648424, |
| "loss": 0.4954, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.3936052921719956, |
| "grad_norm": 0.24170540273189545, |
| "learning_rate": 0.00024162520729684907, |
| "loss": 0.4947, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.3941565600882029, |
| "grad_norm": 0.23549963533878326, |
| "learning_rate": 0.00024154228855721393, |
| "loss": 0.5132, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.3947078280044101, |
| "grad_norm": 0.2394574135541916, |
| "learning_rate": 0.00024145936981757876, |
| "loss": 0.5153, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.3952590959206174, |
| "grad_norm": 0.2615318298339844, |
| "learning_rate": 0.00024137645107794357, |
| "loss": 0.4971, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.3958103638368247, |
| "grad_norm": 0.2353423684835434, |
| "learning_rate": 0.00024129353233830843, |
| "loss": 0.4966, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.39636163175303196, |
| "grad_norm": 0.22130148112773895, |
| "learning_rate": 0.00024121061359867326, |
| "loss": 0.4487, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.39691289966923926, |
| "grad_norm": 0.234688401222229, |
| "learning_rate": 0.00024112769485903812, |
| "loss": 0.499, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.3974641675854465, |
| "grad_norm": 0.23247137665748596, |
| "learning_rate": 0.00024104477611940295, |
| "loss": 0.4944, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.3980154355016538, |
| "grad_norm": 0.2362777143716812, |
| "learning_rate": 0.0002409618573797678, |
| "loss": 0.481, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.3985667034178611, |
| "grad_norm": 0.24181120097637177, |
| "learning_rate": 0.00024087893864013264, |
| "loss": 0.5211, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.39911797133406834, |
| "grad_norm": 0.22298705577850342, |
| "learning_rate": 0.0002407960199004975, |
| "loss": 0.4888, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.39966923925027564, |
| "grad_norm": 0.2304617017507553, |
| "learning_rate": 0.00024071310116086233, |
| "loss": 0.4811, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.4002205071664829, |
| "grad_norm": 0.24691155552864075, |
| "learning_rate": 0.0002406301824212272, |
| "loss": 0.5189, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.4007717750826902, |
| "grad_norm": 0.25604429841041565, |
| "learning_rate": 0.000240547263681592, |
| "loss": 0.4927, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.4013230429988975, |
| "grad_norm": 0.2280474603176117, |
| "learning_rate": 0.00024046434494195685, |
| "loss": 0.4882, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.4018743109151047, |
| "grad_norm": 0.23425596952438354, |
| "learning_rate": 0.0002403814262023217, |
| "loss": 0.4875, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.402425578831312, |
| "grad_norm": 0.26156267523765564, |
| "learning_rate": 0.00024029850746268655, |
| "loss": 0.5087, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.40297684674751927, |
| "grad_norm": 0.23172809183597565, |
| "learning_rate": 0.00024021558872305138, |
| "loss": 0.5024, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.40352811466372657, |
| "grad_norm": 0.23358501493930817, |
| "learning_rate": 0.00024013266998341624, |
| "loss": 0.4972, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.40407938257993387, |
| "grad_norm": 0.23836782574653625, |
| "learning_rate": 0.00024004975124378107, |
| "loss": 0.5061, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.4046306504961411, |
| "grad_norm": 0.23341165482997894, |
| "learning_rate": 0.00023996683250414593, |
| "loss": 0.4927, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.4051819184123484, |
| "grad_norm": 0.2267657369375229, |
| "learning_rate": 0.00023988391376451076, |
| "loss": 0.4884, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.40573318632855565, |
| "grad_norm": 0.23333032429218292, |
| "learning_rate": 0.00023980099502487562, |
| "loss": 0.4764, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.40628445424476295, |
| "grad_norm": 0.24722862243652344, |
| "learning_rate": 0.00023971807628524042, |
| "loss": 0.5168, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.40683572216097025, |
| "grad_norm": 0.24919219315052032, |
| "learning_rate": 0.00023963515754560528, |
| "loss": 0.4953, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.4073869900771775, |
| "grad_norm": 0.22673016786575317, |
| "learning_rate": 0.00023955223880597012, |
| "loss": 0.4883, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.4079382579933848, |
| "grad_norm": 0.22796331346035004, |
| "learning_rate": 0.00023946932006633497, |
| "loss": 0.4683, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.4084895259095921, |
| "grad_norm": 0.23972417414188385, |
| "learning_rate": 0.0002393864013266998, |
| "loss": 0.4919, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.40904079382579933, |
| "grad_norm": 0.23933400213718414, |
| "learning_rate": 0.00023930348258706467, |
| "loss": 0.5053, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.40959206174200663, |
| "grad_norm": 0.24868054687976837, |
| "learning_rate": 0.0002392205638474295, |
| "loss": 0.4854, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.4101433296582139, |
| "grad_norm": 0.23096708953380585, |
| "learning_rate": 0.00023913764510779436, |
| "loss": 0.4739, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.41069459757442117, |
| "grad_norm": 0.2553226947784424, |
| "learning_rate": 0.0002390547263681592, |
| "loss": 0.4679, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.41124586549062847, |
| "grad_norm": 0.24697932600975037, |
| "learning_rate": 0.00023897180762852405, |
| "loss": 0.4858, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.4117971334068357, |
| "grad_norm": 0.2418091893196106, |
| "learning_rate": 0.00023888888888888885, |
| "loss": 0.5172, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.412348401323043, |
| "grad_norm": 0.24144020676612854, |
| "learning_rate": 0.0002388059701492537, |
| "loss": 0.4711, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.41289966923925026, |
| "grad_norm": 0.24137695133686066, |
| "learning_rate": 0.00023872305140961854, |
| "loss": 0.5106, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.41345093715545755, |
| "grad_norm": 0.220285102725029, |
| "learning_rate": 0.0002386401326699834, |
| "loss": 0.4704, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.41400220507166485, |
| "grad_norm": 0.24430547654628754, |
| "learning_rate": 0.00023855721393034824, |
| "loss": 0.5038, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.4145534729878721, |
| "grad_norm": 0.24019300937652588, |
| "learning_rate": 0.0002384742951907131, |
| "loss": 0.4949, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.4151047409040794, |
| "grad_norm": 0.22668643295764923, |
| "learning_rate": 0.00023839137645107793, |
| "loss": 0.4718, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.41565600882028664, |
| "grad_norm": 0.2277330756187439, |
| "learning_rate": 0.00023830845771144279, |
| "loss": 0.514, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.41620727673649394, |
| "grad_norm": 0.2215653359889984, |
| "learning_rate": 0.00023822553897180762, |
| "loss": 0.4873, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.41675854465270123, |
| "grad_norm": 0.22386564314365387, |
| "learning_rate": 0.00023814262023217248, |
| "loss": 0.4824, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.4173098125689085, |
| "grad_norm": 0.2562282681465149, |
| "learning_rate": 0.00023805970149253728, |
| "loss": 0.5177, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.4178610804851158, |
| "grad_norm": 0.25375691056251526, |
| "learning_rate": 0.00023797678275290214, |
| "loss": 0.51, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.418412348401323, |
| "grad_norm": 0.26564472913742065, |
| "learning_rate": 0.00023789386401326697, |
| "loss": 0.5048, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.4189636163175303, |
| "grad_norm": 0.24918165802955627, |
| "learning_rate": 0.00023781094527363183, |
| "loss": 0.4964, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4195148842337376, |
| "grad_norm": 0.26909199357032776, |
| "learning_rate": 0.00023772802653399666, |
| "loss": 0.4511, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.42006615214994486, |
| "grad_norm": 0.27723434567451477, |
| "learning_rate": 0.0002376451077943615, |
| "loss": 0.4994, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.42061742006615216, |
| "grad_norm": 0.23842424154281616, |
| "learning_rate": 0.00023756218905472636, |
| "loss": 0.5127, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.4211686879823594, |
| "grad_norm": 0.2599777281284332, |
| "learning_rate": 0.0002374792703150912, |
| "loss": 0.5221, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.4217199558985667, |
| "grad_norm": 0.2541678845882416, |
| "learning_rate": 0.00023739635157545605, |
| "loss": 0.5086, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.422271223814774, |
| "grad_norm": 0.24489666521549225, |
| "learning_rate": 0.00023731343283582085, |
| "loss": 0.5052, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.42282249173098124, |
| "grad_norm": 0.23364123702049255, |
| "learning_rate": 0.0002372305140961857, |
| "loss": 0.4815, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.42337375964718854, |
| "grad_norm": 0.24420395493507385, |
| "learning_rate": 0.00023714759535655054, |
| "loss": 0.4799, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.4239250275633958, |
| "grad_norm": 0.2559242844581604, |
| "learning_rate": 0.0002370646766169154, |
| "loss": 0.5218, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.4244762954796031, |
| "grad_norm": 0.24033527076244354, |
| "learning_rate": 0.00023698175787728023, |
| "loss": 0.4951, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.4250275633958104, |
| "grad_norm": 0.2582804262638092, |
| "learning_rate": 0.0002368988391376451, |
| "loss": 0.4925, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.4255788313120176, |
| "grad_norm": 0.21231015026569366, |
| "learning_rate": 0.00023681592039800992, |
| "loss": 0.4975, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.4261300992282249, |
| "grad_norm": 0.23742909729480743, |
| "learning_rate": 0.00023673300165837478, |
| "loss": 0.5115, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.42668136714443217, |
| "grad_norm": 0.23761944472789764, |
| "learning_rate": 0.00023665008291873962, |
| "loss": 0.5117, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.42723263506063947, |
| "grad_norm": 0.25065210461616516, |
| "learning_rate": 0.00023656716417910448, |
| "loss": 0.5305, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.42778390297684676, |
| "grad_norm": 0.23839645087718964, |
| "learning_rate": 0.00023648424543946928, |
| "loss": 0.5245, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.428335170893054, |
| "grad_norm": 0.22241149842739105, |
| "learning_rate": 0.00023640132669983414, |
| "loss": 0.5041, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.4288864388092613, |
| "grad_norm": 0.23228657245635986, |
| "learning_rate": 0.00023631840796019897, |
| "loss": 0.4955, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.4294377067254686, |
| "grad_norm": 0.24807095527648926, |
| "learning_rate": 0.00023623548922056383, |
| "loss": 0.5057, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.42998897464167585, |
| "grad_norm": 0.253288209438324, |
| "learning_rate": 0.00023615257048092866, |
| "loss": 0.5179, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.43054024255788315, |
| "grad_norm": 0.2280365228652954, |
| "learning_rate": 0.00023606965174129352, |
| "loss": 0.5104, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.4310915104740904, |
| "grad_norm": 0.21497339010238647, |
| "learning_rate": 0.00023598673300165835, |
| "loss": 0.479, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.4316427783902977, |
| "grad_norm": 0.25969845056533813, |
| "learning_rate": 0.0002359038142620232, |
| "loss": 0.4952, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.432194046306505, |
| "grad_norm": 0.24241061508655548, |
| "learning_rate": 0.00023582089552238804, |
| "loss": 0.5147, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.43274531422271223, |
| "grad_norm": 0.23297248780727386, |
| "learning_rate": 0.0002357379767827529, |
| "loss": 0.4698, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.43329658213891953, |
| "grad_norm": 0.23766906559467316, |
| "learning_rate": 0.0002356550580431177, |
| "loss": 0.5127, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.43384785005512677, |
| "grad_norm": 0.225977823138237, |
| "learning_rate": 0.00023557213930348257, |
| "loss": 0.4698, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.43439911797133407, |
| "grad_norm": 0.25361236929893494, |
| "learning_rate": 0.0002354892205638474, |
| "loss": 0.4887, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.43495038588754137, |
| "grad_norm": 0.23103906214237213, |
| "learning_rate": 0.00023540630182421226, |
| "loss": 0.4831, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.4355016538037486, |
| "grad_norm": 0.23840244114398956, |
| "learning_rate": 0.0002353233830845771, |
| "loss": 0.501, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.4360529217199559, |
| "grad_norm": 0.2217642217874527, |
| "learning_rate": 0.00023524046434494195, |
| "loss": 0.4792, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.43660418963616315, |
| "grad_norm": 0.23963388800621033, |
| "learning_rate": 0.00023515754560530678, |
| "loss": 0.5043, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.43715545755237045, |
| "grad_norm": 0.2423614263534546, |
| "learning_rate": 0.00023507462686567164, |
| "loss": 0.4923, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.43770672546857775, |
| "grad_norm": 0.23817111551761627, |
| "learning_rate": 0.00023499170812603645, |
| "loss": 0.4836, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.438257993384785, |
| "grad_norm": 0.22162829339504242, |
| "learning_rate": 0.00023490878938640133, |
| "loss": 0.4919, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.4388092613009923, |
| "grad_norm": 0.22646528482437134, |
| "learning_rate": 0.00023482587064676614, |
| "loss": 0.4727, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.43936052921719954, |
| "grad_norm": 0.2530063986778259, |
| "learning_rate": 0.000234742951907131, |
| "loss": 0.4896, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.43991179713340683, |
| "grad_norm": 0.24201619625091553, |
| "learning_rate": 0.00023466003316749583, |
| "loss": 0.4664, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.44046306504961413, |
| "grad_norm": 0.22222551703453064, |
| "learning_rate": 0.0002345771144278607, |
| "loss": 0.4914, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.4410143329658214, |
| "grad_norm": 0.2384173721075058, |
| "learning_rate": 0.00023449419568822552, |
| "loss": 0.5029, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4415656008820287, |
| "grad_norm": 0.23053288459777832, |
| "learning_rate": 0.00023441127694859038, |
| "loss": 0.5011, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.4421168687982359, |
| "grad_norm": 0.2338135987520218, |
| "learning_rate": 0.0002343283582089552, |
| "loss": 0.5145, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.4426681367144432, |
| "grad_norm": 0.2439098060131073, |
| "learning_rate": 0.00023424543946932007, |
| "loss": 0.5353, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.4432194046306505, |
| "grad_norm": 0.25395849347114563, |
| "learning_rate": 0.00023416252072968488, |
| "loss": 0.5287, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.44377067254685776, |
| "grad_norm": 0.24382875859737396, |
| "learning_rate": 0.0002340796019900497, |
| "loss": 0.4753, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.44432194046306506, |
| "grad_norm": 0.22943390905857086, |
| "learning_rate": 0.00023399668325041457, |
| "loss": 0.4899, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.4448732083792723, |
| "grad_norm": 0.23026274144649506, |
| "learning_rate": 0.0002339137645107794, |
| "loss": 0.4776, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.4454244762954796, |
| "grad_norm": 0.263637512922287, |
| "learning_rate": 0.00023383084577114426, |
| "loss": 0.5036, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.4459757442116869, |
| "grad_norm": 0.2239854782819748, |
| "learning_rate": 0.0002337479270315091, |
| "loss": 0.5074, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.44652701212789414, |
| "grad_norm": 0.24209174513816833, |
| "learning_rate": 0.00023366500829187395, |
| "loss": 0.4962, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.44707828004410144, |
| "grad_norm": 0.2574441730976105, |
| "learning_rate": 0.00023358208955223878, |
| "loss": 0.4833, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.4476295479603087, |
| "grad_norm": 0.24309788644313812, |
| "learning_rate": 0.00023349917081260364, |
| "loss": 0.4971, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.448180815876516, |
| "grad_norm": 0.23553608357906342, |
| "learning_rate": 0.00023341625207296844, |
| "loss": 0.4951, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.4487320837927233, |
| "grad_norm": 0.23820781707763672, |
| "learning_rate": 0.0002333333333333333, |
| "loss": 0.4974, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.4492833517089305, |
| "grad_norm": 0.26907938718795776, |
| "learning_rate": 0.00023325041459369814, |
| "loss": 0.4904, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.4498346196251378, |
| "grad_norm": 0.2529081702232361, |
| "learning_rate": 0.000233167495854063, |
| "loss": 0.5047, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.4503858875413451, |
| "grad_norm": 0.2080521285533905, |
| "learning_rate": 0.00023308457711442783, |
| "loss": 0.4676, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.45093715545755236, |
| "grad_norm": 0.25028982758522034, |
| "learning_rate": 0.00023300165837479269, |
| "loss": 0.5093, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.45148842337375966, |
| "grad_norm": 0.24182821810245514, |
| "learning_rate": 0.00023291873963515752, |
| "loss": 0.5082, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.4520396912899669, |
| "grad_norm": 0.23918956518173218, |
| "learning_rate": 0.00023283582089552238, |
| "loss": 0.4887, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.4525909592061742, |
| "grad_norm": 0.25016239285469055, |
| "learning_rate": 0.0002327529021558872, |
| "loss": 0.4887, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.4531422271223815, |
| "grad_norm": 0.2489538937807083, |
| "learning_rate": 0.00023266998341625207, |
| "loss": 0.5089, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.45369349503858875, |
| "grad_norm": 0.2490735650062561, |
| "learning_rate": 0.00023258706467661687, |
| "loss": 0.4812, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.45424476295479604, |
| "grad_norm": 0.26727011799812317, |
| "learning_rate": 0.00023250414593698173, |
| "loss": 0.4943, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.4547960308710033, |
| "grad_norm": 0.2334149330854416, |
| "learning_rate": 0.00023242122719734656, |
| "loss": 0.4743, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.4553472987872106, |
| "grad_norm": 0.24874447286128998, |
| "learning_rate": 0.00023233830845771142, |
| "loss": 0.5034, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.4558985667034179, |
| "grad_norm": 0.26186123490333557, |
| "learning_rate": 0.00023225538971807626, |
| "loss": 0.4986, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.4564498346196251, |
| "grad_norm": 0.22734478116035461, |
| "learning_rate": 0.00023217247097844111, |
| "loss": 0.479, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.4570011025358324, |
| "grad_norm": 0.24908246099948883, |
| "learning_rate": 0.00023208955223880595, |
| "loss": 0.5176, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.45755237045203967, |
| "grad_norm": 0.2561740279197693, |
| "learning_rate": 0.0002320066334991708, |
| "loss": 0.5181, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.45810363836824697, |
| "grad_norm": 0.24820713698863983, |
| "learning_rate": 0.00023192371475953564, |
| "loss": 0.5168, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.45865490628445427, |
| "grad_norm": 0.22865842282772064, |
| "learning_rate": 0.0002318407960199005, |
| "loss": 0.5034, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.4592061742006615, |
| "grad_norm": 0.2395135760307312, |
| "learning_rate": 0.0002317578772802653, |
| "loss": 0.4956, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.4597574421168688, |
| "grad_norm": 0.2375570386648178, |
| "learning_rate": 0.00023167495854063016, |
| "loss": 0.4939, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.46030871003307605, |
| "grad_norm": 0.24207614362239838, |
| "learning_rate": 0.000231592039800995, |
| "loss": 0.4998, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.46085997794928335, |
| "grad_norm": 0.231749027967453, |
| "learning_rate": 0.00023150912106135985, |
| "loss": 0.5071, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.46141124586549065, |
| "grad_norm": 0.2529800236225128, |
| "learning_rate": 0.00023142620232172468, |
| "loss": 0.5152, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.4619625137816979, |
| "grad_norm": 0.24748285114765167, |
| "learning_rate": 0.00023134328358208954, |
| "loss": 0.4929, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.4625137816979052, |
| "grad_norm": 0.2481345683336258, |
| "learning_rate": 0.00023126036484245438, |
| "loss": 0.5131, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.46306504961411243, |
| "grad_norm": 0.22557318210601807, |
| "learning_rate": 0.00023117744610281923, |
| "loss": 0.5111, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.46361631753031973, |
| "grad_norm": 0.24130286276340485, |
| "learning_rate": 0.00023109452736318407, |
| "loss": 0.486, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.46416758544652703, |
| "grad_norm": 0.2238035351037979, |
| "learning_rate": 0.00023101160862354893, |
| "loss": 0.4836, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.4647188533627343, |
| "grad_norm": 0.23449353873729706, |
| "learning_rate": 0.00023092868988391373, |
| "loss": 0.4714, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.4652701212789416, |
| "grad_norm": 0.2284533679485321, |
| "learning_rate": 0.0002308457711442786, |
| "loss": 0.4739, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.4658213891951488, |
| "grad_norm": 0.2420201152563095, |
| "learning_rate": 0.00023076285240464342, |
| "loss": 0.4797, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.4663726571113561, |
| "grad_norm": 0.2669530212879181, |
| "learning_rate": 0.00023067993366500828, |
| "loss": 0.5017, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.4669239250275634, |
| "grad_norm": 0.2415032982826233, |
| "learning_rate": 0.0002305970149253731, |
| "loss": 0.5023, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.46747519294377066, |
| "grad_norm": 0.2327703833580017, |
| "learning_rate": 0.00023051409618573797, |
| "loss": 0.5089, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.46802646085997796, |
| "grad_norm": 0.24102593958377838, |
| "learning_rate": 0.0002304311774461028, |
| "loss": 0.5092, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.4685777287761852, |
| "grad_norm": 0.22270776331424713, |
| "learning_rate": 0.00023034825870646764, |
| "loss": 0.4677, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.4691289966923925, |
| "grad_norm": 0.23423947393894196, |
| "learning_rate": 0.0002302653399668325, |
| "loss": 0.4909, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.4696802646085998, |
| "grad_norm": 0.24698768556118011, |
| "learning_rate": 0.0002301824212271973, |
| "loss": 0.5, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.47023153252480704, |
| "grad_norm": 0.24313125014305115, |
| "learning_rate": 0.00023009950248756216, |
| "loss": 0.4908, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.47078280044101434, |
| "grad_norm": 0.2673037648200989, |
| "learning_rate": 0.000230016583747927, |
| "loss": 0.4971, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.47133406835722164, |
| "grad_norm": 0.23639419674873352, |
| "learning_rate": 0.00022993366500829185, |
| "loss": 0.486, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.4718853362734289, |
| "grad_norm": 0.2316926270723343, |
| "learning_rate": 0.00022985074626865668, |
| "loss": 0.5045, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.4724366041896362, |
| "grad_norm": 0.23044279217720032, |
| "learning_rate": 0.00022976782752902154, |
| "loss": 0.4752, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.4729878721058434, |
| "grad_norm": 0.2599242329597473, |
| "learning_rate": 0.00022968490878938637, |
| "loss": 0.5058, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.4735391400220507, |
| "grad_norm": 0.2420707494020462, |
| "learning_rate": 0.00022960199004975123, |
| "loss": 0.4689, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.474090407938258, |
| "grad_norm": 0.26549097895622253, |
| "learning_rate": 0.00022951907131011607, |
| "loss": 0.5161, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.47464167585446526, |
| "grad_norm": 0.24539636075496674, |
| "learning_rate": 0.00022943615257048092, |
| "loss": 0.4887, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.47519294377067256, |
| "grad_norm": 0.23257140815258026, |
| "learning_rate": 0.00022935323383084573, |
| "loss": 0.4841, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.4757442116868798, |
| "grad_norm": 0.27551430463790894, |
| "learning_rate": 0.0002292703150912106, |
| "loss": 0.5369, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.4762954796030871, |
| "grad_norm": 0.2414499670267105, |
| "learning_rate": 0.00022918739635157542, |
| "loss": 0.5031, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.4768467475192944, |
| "grad_norm": 0.24039071798324585, |
| "learning_rate": 0.00022910447761194028, |
| "loss": 0.4958, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.47739801543550164, |
| "grad_norm": 0.23044785857200623, |
| "learning_rate": 0.0002290215588723051, |
| "loss": 0.4884, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.47794928335170894, |
| "grad_norm": 0.2677319645881653, |
| "learning_rate": 0.00022893864013266997, |
| "loss": 0.5096, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.4785005512679162, |
| "grad_norm": 0.22575704753398895, |
| "learning_rate": 0.0002288557213930348, |
| "loss": 0.4968, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.4790518191841235, |
| "grad_norm": 0.24338865280151367, |
| "learning_rate": 0.00022877280265339966, |
| "loss": 0.4669, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.4796030871003308, |
| "grad_norm": 0.25083914399147034, |
| "learning_rate": 0.0002286898839137645, |
| "loss": 0.5035, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.480154355016538, |
| "grad_norm": 0.24006043374538422, |
| "learning_rate": 0.00022860696517412935, |
| "loss": 0.459, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.4807056229327453, |
| "grad_norm": 0.2326238453388214, |
| "learning_rate": 0.00022852404643449416, |
| "loss": 0.4599, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.48125689084895257, |
| "grad_norm": 0.24134741723537445, |
| "learning_rate": 0.00022844112769485902, |
| "loss": 0.4755, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.48180815876515987, |
| "grad_norm": 0.2148948460817337, |
| "learning_rate": 0.00022835820895522385, |
| "loss": 0.4759, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.48235942668136716, |
| "grad_norm": 0.2361116260290146, |
| "learning_rate": 0.0002282752902155887, |
| "loss": 0.4771, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.4829106945975744, |
| "grad_norm": 0.24435687065124512, |
| "learning_rate": 0.00022819237147595354, |
| "loss": 0.492, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.4834619625137817, |
| "grad_norm": 0.23266686499118805, |
| "learning_rate": 0.0002281094527363184, |
| "loss": 0.5269, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.48401323042998895, |
| "grad_norm": 0.2184826284646988, |
| "learning_rate": 0.00022802653399668323, |
| "loss": 0.4741, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.48456449834619625, |
| "grad_norm": 0.24351243674755096, |
| "learning_rate": 0.0002279436152570481, |
| "loss": 0.5121, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.48511576626240355, |
| "grad_norm": 0.2366686463356018, |
| "learning_rate": 0.00022786069651741292, |
| "loss": 0.5002, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4856670341786108, |
| "grad_norm": 0.23044729232788086, |
| "learning_rate": 0.00022777777777777778, |
| "loss": 0.4742, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.4862183020948181, |
| "grad_norm": 0.23718389868736267, |
| "learning_rate": 0.0002276948590381426, |
| "loss": 0.4864, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.48676957001102533, |
| "grad_norm": 0.25451889634132385, |
| "learning_rate": 0.00022761194029850745, |
| "loss": 0.4809, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.48732083792723263, |
| "grad_norm": 0.22073966264724731, |
| "learning_rate": 0.00022752902155887228, |
| "loss": 0.4853, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.48787210584343993, |
| "grad_norm": 0.24639108777046204, |
| "learning_rate": 0.00022744610281923714, |
| "loss": 0.4848, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.4884233737596472, |
| "grad_norm": 0.2543313503265381, |
| "learning_rate": 0.00022736318407960197, |
| "loss": 0.5109, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.48897464167585447, |
| "grad_norm": 0.24580398201942444, |
| "learning_rate": 0.00022728026533996683, |
| "loss": 0.4919, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.4895259095920617, |
| "grad_norm": 0.23678098618984222, |
| "learning_rate": 0.00022719734660033166, |
| "loss": 0.48, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.490077177508269, |
| "grad_norm": 0.2219116985797882, |
| "learning_rate": 0.00022711442786069652, |
| "loss": 0.4647, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.4906284454244763, |
| "grad_norm": 0.2577376067638397, |
| "learning_rate": 0.00022703150912106135, |
| "loss": 0.4729, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.49117971334068355, |
| "grad_norm": 0.2527279853820801, |
| "learning_rate": 0.0002269485903814262, |
| "loss": 0.4899, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.49173098125689085, |
| "grad_norm": 0.2718394100666046, |
| "learning_rate": 0.00022686567164179102, |
| "loss": 0.5247, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.49228224917309815, |
| "grad_norm": 0.23161333799362183, |
| "learning_rate": 0.00022678275290215585, |
| "loss": 0.4786, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.4928335170893054, |
| "grad_norm": 0.22976607084274292, |
| "learning_rate": 0.0002266998341625207, |
| "loss": 0.4963, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.4933847850055127, |
| "grad_norm": 0.26446732878685, |
| "learning_rate": 0.00022661691542288554, |
| "loss": 0.5076, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.49393605292171994, |
| "grad_norm": 0.2513757348060608, |
| "learning_rate": 0.0002265339966832504, |
| "loss": 0.4906, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.49448732083792724, |
| "grad_norm": 0.2355221062898636, |
| "learning_rate": 0.00022645107794361523, |
| "loss": 0.5083, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.49503858875413453, |
| "grad_norm": 0.24008940160274506, |
| "learning_rate": 0.0002263681592039801, |
| "loss": 0.5075, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.4955898566703418, |
| "grad_norm": 0.23088522255420685, |
| "learning_rate": 0.00022628524046434492, |
| "loss": 0.4975, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.4961411245865491, |
| "grad_norm": 0.2754332721233368, |
| "learning_rate": 0.00022620232172470978, |
| "loss": 0.5144, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4966923925027563, |
| "grad_norm": 0.25219646096229553, |
| "learning_rate": 0.00022611940298507459, |
| "loss": 0.4854, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.4972436604189636, |
| "grad_norm": 0.2489755004644394, |
| "learning_rate": 0.00022603648424543944, |
| "loss": 0.4708, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.4977949283351709, |
| "grad_norm": 0.24141034483909607, |
| "learning_rate": 0.00022595356550580428, |
| "loss": 0.4917, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.49834619625137816, |
| "grad_norm": 0.23453152179718018, |
| "learning_rate": 0.00022587064676616914, |
| "loss": 0.4754, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.49889746416758546, |
| "grad_norm": 0.25601381063461304, |
| "learning_rate": 0.00022578772802653397, |
| "loss": 0.4909, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.4994487320837927, |
| "grad_norm": 0.22102084755897522, |
| "learning_rate": 0.00022570480928689883, |
| "loss": 0.4673, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.2369261085987091, |
| "learning_rate": 0.00022562189054726366, |
| "loss": 0.4544, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.5005512679162073, |
| "grad_norm": 0.25789421796798706, |
| "learning_rate": 0.00022553897180762852, |
| "loss": 0.5032, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.5011025358324146, |
| "grad_norm": 0.2342817783355713, |
| "learning_rate": 0.00022545605306799335, |
| "loss": 0.4649, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.5016538037486218, |
| "grad_norm": 0.25317567586898804, |
| "learning_rate": 0.0002253731343283582, |
| "loss": 0.4974, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5022050716648291, |
| "grad_norm": 0.23973771929740906, |
| "learning_rate": 0.00022529021558872301, |
| "loss": 0.5093, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.5027563395810364, |
| "grad_norm": 0.24858252704143524, |
| "learning_rate": 0.00022520729684908787, |
| "loss": 0.4781, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.5033076074972437, |
| "grad_norm": 0.25571468472480774, |
| "learning_rate": 0.0002251243781094527, |
| "loss": 0.4992, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.503858875413451, |
| "grad_norm": 0.2476612776517868, |
| "learning_rate": 0.00022504145936981756, |
| "loss": 0.4803, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.5044101433296582, |
| "grad_norm": 0.24917398393154144, |
| "learning_rate": 0.0002249585406301824, |
| "loss": 0.5022, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.5049614112458655, |
| "grad_norm": 0.24204300343990326, |
| "learning_rate": 0.00022487562189054726, |
| "loss": 0.4919, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.5055126791620728, |
| "grad_norm": 0.23442697525024414, |
| "learning_rate": 0.0002247927031509121, |
| "loss": 0.4754, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.5060639470782801, |
| "grad_norm": 0.26630768179893494, |
| "learning_rate": 0.00022470978441127695, |
| "loss": 0.5119, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.5066152149944874, |
| "grad_norm": 0.2312323898077011, |
| "learning_rate": 0.00022462686567164175, |
| "loss": 0.4735, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.5071664829106945, |
| "grad_norm": 0.23444309830665588, |
| "learning_rate": 0.0002245439469320066, |
| "loss": 0.4718, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5077177508269018, |
| "grad_norm": 0.2260974645614624, |
| "learning_rate": 0.00022446102819237144, |
| "loss": 0.48, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.5082690187431091, |
| "grad_norm": 0.2403731793165207, |
| "learning_rate": 0.0002243781094527363, |
| "loss": 0.5014, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.5088202866593164, |
| "grad_norm": 0.240118607878685, |
| "learning_rate": 0.00022429519071310113, |
| "loss": 0.4669, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.5093715545755237, |
| "grad_norm": 0.2268829345703125, |
| "learning_rate": 0.000224212271973466, |
| "loss": 0.4924, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.5099228224917309, |
| "grad_norm": 0.23937518894672394, |
| "learning_rate": 0.00022412935323383083, |
| "loss": 0.4743, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.5104740904079382, |
| "grad_norm": 0.25224533677101135, |
| "learning_rate": 0.00022404643449419568, |
| "loss": 0.502, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.5110253583241455, |
| "grad_norm": 0.23434899747371674, |
| "learning_rate": 0.00022396351575456052, |
| "loss": 0.4825, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.5115766262403528, |
| "grad_norm": 0.249129980802536, |
| "learning_rate": 0.00022388059701492538, |
| "loss": 0.4689, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.5121278941565601, |
| "grad_norm": 0.2530542314052582, |
| "learning_rate": 0.00022379767827529018, |
| "loss": 0.4726, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.5126791620727673, |
| "grad_norm": 0.2488546073436737, |
| "learning_rate": 0.00022371475953565504, |
| "loss": 0.5024, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5132304299889746, |
| "grad_norm": 0.23048900067806244, |
| "learning_rate": 0.00022363184079601987, |
| "loss": 0.4633, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.5137816979051819, |
| "grad_norm": 0.2485697716474533, |
| "learning_rate": 0.00022354892205638473, |
| "loss": 0.4955, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.5143329658213892, |
| "grad_norm": 0.23724399507045746, |
| "learning_rate": 0.00022346600331674956, |
| "loss": 0.4859, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.5148842337375965, |
| "grad_norm": 0.2424692064523697, |
| "learning_rate": 0.00022338308457711442, |
| "loss": 0.5115, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.5154355016538037, |
| "grad_norm": 0.24387586116790771, |
| "learning_rate": 0.00022330016583747925, |
| "loss": 0.4969, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.515986769570011, |
| "grad_norm": 0.22749263048171997, |
| "learning_rate": 0.0002232172470978441, |
| "loss": 0.5014, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.5165380374862183, |
| "grad_norm": 0.22205640375614166, |
| "learning_rate": 0.00022313432835820894, |
| "loss": 0.4912, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.5170893054024256, |
| "grad_norm": 0.23504669964313507, |
| "learning_rate": 0.00022305140961857375, |
| "loss": 0.4841, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.5176405733186329, |
| "grad_norm": 0.2282828390598297, |
| "learning_rate": 0.0002229684908789386, |
| "loss": 0.463, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.5181918412348401, |
| "grad_norm": 0.23592360317707062, |
| "learning_rate": 0.00022288557213930344, |
| "loss": 0.48, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5187431091510474, |
| "grad_norm": 0.2408529818058014, |
| "learning_rate": 0.0002228026533996683, |
| "loss": 0.485, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.5192943770672547, |
| "grad_norm": 0.2507123351097107, |
| "learning_rate": 0.00022271973466003313, |
| "loss": 0.4696, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.519845644983462, |
| "grad_norm": 0.21724364161491394, |
| "learning_rate": 0.000222636815920398, |
| "loss": 0.4883, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.5203969128996693, |
| "grad_norm": 0.22868378460407257, |
| "learning_rate": 0.00022255389718076282, |
| "loss": 0.4852, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.5209481808158766, |
| "grad_norm": 0.23937176167964935, |
| "learning_rate": 0.00022247097844112768, |
| "loss": 0.4966, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.5214994487320838, |
| "grad_norm": 0.24673771858215332, |
| "learning_rate": 0.00022238805970149251, |
| "loss": 0.5089, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.5220507166482911, |
| "grad_norm": 0.23318541049957275, |
| "learning_rate": 0.00022230514096185737, |
| "loss": 0.4847, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.5226019845644984, |
| "grad_norm": 0.2237371951341629, |
| "learning_rate": 0.00022222222222222218, |
| "loss": 0.4745, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.5231532524807057, |
| "grad_norm": 0.22587883472442627, |
| "learning_rate": 0.00022213930348258704, |
| "loss": 0.502, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.523704520396913, |
| "grad_norm": 0.237474262714386, |
| "learning_rate": 0.00022205638474295187, |
| "loss": 0.5003, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5242557883131201, |
| "grad_norm": 0.2394198328256607, |
| "learning_rate": 0.00022197346600331673, |
| "loss": 0.5032, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.5248070562293274, |
| "grad_norm": 0.22187075018882751, |
| "learning_rate": 0.00022189054726368156, |
| "loss": 0.4543, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.5253583241455347, |
| "grad_norm": 0.23657891154289246, |
| "learning_rate": 0.00022180762852404642, |
| "loss": 0.496, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.525909592061742, |
| "grad_norm": 0.23503652215003967, |
| "learning_rate": 0.00022172470978441125, |
| "loss": 0.4724, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.5264608599779493, |
| "grad_norm": 0.2500884532928467, |
| "learning_rate": 0.0002216417910447761, |
| "loss": 0.4837, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.5270121278941565, |
| "grad_norm": 0.2291148602962494, |
| "learning_rate": 0.00022155887230514094, |
| "loss": 0.4884, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.5275633958103638, |
| "grad_norm": 0.2256416380405426, |
| "learning_rate": 0.0002214759535655058, |
| "loss": 0.4743, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.5281146637265711, |
| "grad_norm": 0.23922450840473175, |
| "learning_rate": 0.0002213930348258706, |
| "loss": 0.4784, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.5286659316427784, |
| "grad_norm": 0.24849876761436462, |
| "learning_rate": 0.00022131011608623547, |
| "loss": 0.498, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.5292171995589857, |
| "grad_norm": 0.2211284190416336, |
| "learning_rate": 0.0002212271973466003, |
| "loss": 0.4711, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5297684674751929, |
| "grad_norm": 0.2296118289232254, |
| "learning_rate": 0.00022114427860696516, |
| "loss": 0.49, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.5303197353914002, |
| "grad_norm": 0.22921642661094666, |
| "learning_rate": 0.00022106135986733, |
| "loss": 0.4864, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.5308710033076075, |
| "grad_norm": 0.23854584991931915, |
| "learning_rate": 0.00022097844112769485, |
| "loss": 0.4976, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.5314222712238148, |
| "grad_norm": 0.22192314267158508, |
| "learning_rate": 0.00022089552238805968, |
| "loss": 0.4889, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.5319735391400221, |
| "grad_norm": 0.24450358748435974, |
| "learning_rate": 0.00022081260364842454, |
| "loss": 0.4784, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.5325248070562293, |
| "grad_norm": 0.2145015150308609, |
| "learning_rate": 0.00022072968490878937, |
| "loss": 0.4543, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.5330760749724366, |
| "grad_norm": 0.22203224897384644, |
| "learning_rate": 0.00022064676616915423, |
| "loss": 0.4892, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.5336273428886439, |
| "grad_norm": 0.2423708289861679, |
| "learning_rate": 0.00022056384742951904, |
| "loss": 0.4866, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.5341786108048512, |
| "grad_norm": 0.2290901392698288, |
| "learning_rate": 0.0002204809286898839, |
| "loss": 0.4809, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.5347298787210585, |
| "grad_norm": 0.22281813621520996, |
| "learning_rate": 0.00022039800995024873, |
| "loss": 0.5083, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5352811466372657, |
| "grad_norm": 0.23863239586353302, |
| "learning_rate": 0.0002203150912106136, |
| "loss": 0.4732, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.535832414553473, |
| "grad_norm": 0.2304835319519043, |
| "learning_rate": 0.00022023217247097842, |
| "loss": 0.4898, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.5363836824696803, |
| "grad_norm": 0.23452985286712646, |
| "learning_rate": 0.00022014925373134328, |
| "loss": 0.5177, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.5369349503858876, |
| "grad_norm": 0.252209335565567, |
| "learning_rate": 0.0002200663349917081, |
| "loss": 0.482, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.5374862183020949, |
| "grad_norm": 0.23390796780586243, |
| "learning_rate": 0.00021998341625207297, |
| "loss": 0.4913, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.538037486218302, |
| "grad_norm": 0.24304579198360443, |
| "learning_rate": 0.0002199004975124378, |
| "loss": 0.4963, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.5385887541345094, |
| "grad_norm": 0.22291411459445953, |
| "learning_rate": 0.00021981757877280266, |
| "loss": 0.4835, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.5391400220507166, |
| "grad_norm": 0.23994603753089905, |
| "learning_rate": 0.00021973466003316746, |
| "loss": 0.4596, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.539691289966924, |
| "grad_norm": 0.2375342845916748, |
| "learning_rate": 0.00021965174129353232, |
| "loss": 0.5138, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.5402425578831312, |
| "grad_norm": 0.22774764895439148, |
| "learning_rate": 0.00021956882255389716, |
| "loss": 0.4949, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5407938257993384, |
| "grad_norm": 0.2277144491672516, |
| "learning_rate": 0.000219485903814262, |
| "loss": 0.4843, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.5413450937155457, |
| "grad_norm": 0.23078951239585876, |
| "learning_rate": 0.00021940298507462685, |
| "loss": 0.5089, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.541896361631753, |
| "grad_norm": 0.23093165457248688, |
| "learning_rate": 0.00021932006633499168, |
| "loss": 0.4913, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.5424476295479603, |
| "grad_norm": 0.22961430251598358, |
| "learning_rate": 0.00021923714759535654, |
| "loss": 0.4957, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.5429988974641676, |
| "grad_norm": 0.2303048074245453, |
| "learning_rate": 0.00021915422885572137, |
| "loss": 0.4991, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.5435501653803748, |
| "grad_norm": 0.2352553904056549, |
| "learning_rate": 0.00021907131011608623, |
| "loss": 0.4838, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.5441014332965821, |
| "grad_norm": 0.2251589596271515, |
| "learning_rate": 0.00021898839137645103, |
| "loss": 0.4928, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.5446527012127894, |
| "grad_norm": 0.2577657103538513, |
| "learning_rate": 0.0002189054726368159, |
| "loss": 0.4897, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.5452039691289967, |
| "grad_norm": 0.23328843712806702, |
| "learning_rate": 0.00021882255389718073, |
| "loss": 0.4949, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.545755237045204, |
| "grad_norm": 0.23206306993961334, |
| "learning_rate": 0.00021873963515754558, |
| "loss": 0.4791, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.5463065049614112, |
| "grad_norm": 0.2417128086090088, |
| "learning_rate": 0.00021865671641791042, |
| "loss": 0.5161, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.5468577728776185, |
| "grad_norm": 0.2541581988334656, |
| "learning_rate": 0.00021857379767827528, |
| "loss": 0.5253, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.5474090407938258, |
| "grad_norm": 0.23152418434619904, |
| "learning_rate": 0.0002184908789386401, |
| "loss": 0.4854, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.5479603087100331, |
| "grad_norm": 0.21505197882652283, |
| "learning_rate": 0.00021840796019900497, |
| "loss": 0.4664, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.5485115766262404, |
| "grad_norm": 0.23766584694385529, |
| "learning_rate": 0.0002183250414593698, |
| "loss": 0.4976, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.5490628445424476, |
| "grad_norm": 0.23223701119422913, |
| "learning_rate": 0.00021824212271973466, |
| "loss": 0.4485, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.5496141124586549, |
| "grad_norm": 0.25161734223365784, |
| "learning_rate": 0.00021815920398009946, |
| "loss": 0.4818, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.5501653803748622, |
| "grad_norm": 0.23082609474658966, |
| "learning_rate": 0.00021807628524046432, |
| "loss": 0.502, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.5507166482910695, |
| "grad_norm": 0.23080939054489136, |
| "learning_rate": 0.00021799336650082915, |
| "loss": 0.5005, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.5512679162072768, |
| "grad_norm": 0.22184456884860992, |
| "learning_rate": 0.00021791044776119401, |
| "loss": 0.4833, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5512679162072768, |
| "eval_loss": 0.48357656598091125, |
| "eval_runtime": 311.7364, |
| "eval_samples_per_second": 3.737, |
| "eval_steps_per_second": 0.468, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.551819184123484, |
| "grad_norm": 0.25572869181632996, |
| "learning_rate": 0.00021782752902155885, |
| "loss": 0.4925, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.5523704520396913, |
| "grad_norm": 0.2477078139781952, |
| "learning_rate": 0.0002177446102819237, |
| "loss": 0.4847, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.5529217199558986, |
| "grad_norm": 0.23749567568302155, |
| "learning_rate": 0.00021766169154228854, |
| "loss": 0.4933, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.5534729878721059, |
| "grad_norm": 0.22248369455337524, |
| "learning_rate": 0.0002175787728026534, |
| "loss": 0.4883, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.5540242557883132, |
| "grad_norm": 0.23769117891788483, |
| "learning_rate": 0.00021749585406301823, |
| "loss": 0.4977, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.5545755237045203, |
| "grad_norm": 0.22872841358184814, |
| "learning_rate": 0.0002174129353233831, |
| "loss": 0.4952, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.5551267916207276, |
| "grad_norm": 0.23627693951129913, |
| "learning_rate": 0.0002173300165837479, |
| "loss": 0.4653, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.5556780595369349, |
| "grad_norm": 0.24900414049625397, |
| "learning_rate": 0.00021724709784411275, |
| "loss": 0.4833, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.5562293274531422, |
| "grad_norm": 0.2288302332162857, |
| "learning_rate": 0.00021716417910447758, |
| "loss": 0.4735, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.5567805953693495, |
| "grad_norm": 0.2251368761062622, |
| "learning_rate": 0.00021708126036484244, |
| "loss": 0.4887, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.5573318632855567, |
| "grad_norm": 0.2496083676815033, |
| "learning_rate": 0.00021699834162520727, |
| "loss": 0.4959, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.557883131201764, |
| "grad_norm": 0.23241998255252838, |
| "learning_rate": 0.00021691542288557213, |
| "loss": 0.462, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.5584343991179713, |
| "grad_norm": 0.239312544465065, |
| "learning_rate": 0.00021683250414593697, |
| "loss": 0.4792, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.5589856670341786, |
| "grad_norm": 0.22684402763843536, |
| "learning_rate": 0.00021674958540630182, |
| "loss": 0.4825, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.5595369349503859, |
| "grad_norm": 0.23261615633964539, |
| "learning_rate": 0.00021666666666666666, |
| "loss": 0.4604, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.5600882028665931, |
| "grad_norm": 0.26163482666015625, |
| "learning_rate": 0.00021658374792703152, |
| "loss": 0.5158, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.5606394707828004, |
| "grad_norm": 0.2275197058916092, |
| "learning_rate": 0.00021650082918739632, |
| "loss": 0.4733, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.5611907386990077, |
| "grad_norm": 0.2636192739009857, |
| "learning_rate": 0.00021641791044776118, |
| "loss": 0.5018, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.561742006615215, |
| "grad_norm": 0.2224932312965393, |
| "learning_rate": 0.000216334991708126, |
| "loss": 0.5064, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.5622932745314223, |
| "grad_norm": 0.2518375813961029, |
| "learning_rate": 0.00021625207296849087, |
| "loss": 0.4874, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.5628445424476296, |
| "grad_norm": 0.24104849994182587, |
| "learning_rate": 0.0002161691542288557, |
| "loss": 0.4864, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.5633958103638368, |
| "grad_norm": 0.25608646869659424, |
| "learning_rate": 0.00021608623548922056, |
| "loss": 0.4752, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.5639470782800441, |
| "grad_norm": 0.24174031615257263, |
| "learning_rate": 0.0002160033167495854, |
| "loss": 0.4986, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.5644983461962514, |
| "grad_norm": 0.23120078444480896, |
| "learning_rate": 0.00021592039800995025, |
| "loss": 0.4615, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.5650496141124587, |
| "grad_norm": 0.2599080204963684, |
| "learning_rate": 0.00021583747927031509, |
| "loss": 0.4994, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.565600882028666, |
| "grad_norm": 0.23741313815116882, |
| "learning_rate": 0.0002157545605306799, |
| "loss": 0.4745, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.5661521499448732, |
| "grad_norm": 0.24400565028190613, |
| "learning_rate": 0.00021567164179104475, |
| "loss": 0.4891, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.5667034178610805, |
| "grad_norm": 0.2503412663936615, |
| "learning_rate": 0.00021558872305140958, |
| "loss": 0.5014, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.5672546857772878, |
| "grad_norm": 0.23471197485923767, |
| "learning_rate": 0.00021550580431177444, |
| "loss": 0.4958, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.5678059536934951, |
| "grad_norm": 0.2323479950428009, |
| "learning_rate": 0.00021542288557213927, |
| "loss": 0.4691, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.5683572216097024, |
| "grad_norm": 0.23778273165225983, |
| "learning_rate": 0.00021533996683250413, |
| "loss": 0.4881, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.5689084895259096, |
| "grad_norm": 0.21465396881103516, |
| "learning_rate": 0.00021525704809286896, |
| "loss": 0.4689, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.5694597574421169, |
| "grad_norm": 0.2397712767124176, |
| "learning_rate": 0.00021517412935323382, |
| "loss": 0.4873, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.5700110253583242, |
| "grad_norm": 0.2142529934644699, |
| "learning_rate": 0.00021509121061359863, |
| "loss": 0.4686, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.5705622932745315, |
| "grad_norm": 0.24334488809108734, |
| "learning_rate": 0.00021500829187396351, |
| "loss": 0.508, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.5711135611907387, |
| "grad_norm": 0.2391451597213745, |
| "learning_rate": 0.00021492537313432832, |
| "loss": 0.5049, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.5716648291069459, |
| "grad_norm": 0.25972914695739746, |
| "learning_rate": 0.00021484245439469318, |
| "loss": 0.5022, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.5722160970231532, |
| "grad_norm": 0.23072604835033417, |
| "learning_rate": 0.000214759535655058, |
| "loss": 0.4888, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.5727673649393605, |
| "grad_norm": 0.2415681630373001, |
| "learning_rate": 0.00021467661691542287, |
| "loss": 0.4787, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.5733186328555678, |
| "grad_norm": 0.24707187712192535, |
| "learning_rate": 0.0002145936981757877, |
| "loss": 0.4877, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.5738699007717751, |
| "grad_norm": 0.24816669523715973, |
| "learning_rate": 0.00021451077943615256, |
| "loss": 0.4704, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.5744211686879823, |
| "grad_norm": 0.23687899112701416, |
| "learning_rate": 0.0002144278606965174, |
| "loss": 0.4757, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.5749724366041896, |
| "grad_norm": 0.25993046164512634, |
| "learning_rate": 0.00021434494195688225, |
| "loss": 0.4919, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.5755237045203969, |
| "grad_norm": 0.23352675139904022, |
| "learning_rate": 0.00021426202321724706, |
| "loss": 0.4762, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.5760749724366042, |
| "grad_norm": 0.23056983947753906, |
| "learning_rate": 0.00021417910447761192, |
| "loss": 0.4638, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.5766262403528115, |
| "grad_norm": 0.22587046027183533, |
| "learning_rate": 0.00021409618573797675, |
| "loss": 0.4777, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.5771775082690187, |
| "grad_norm": 0.2561855912208557, |
| "learning_rate": 0.0002140132669983416, |
| "loss": 0.5056, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.577728776185226, |
| "grad_norm": 0.24537737667560577, |
| "learning_rate": 0.00021393034825870644, |
| "loss": 0.497, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.5782800441014333, |
| "grad_norm": 0.22903874516487122, |
| "learning_rate": 0.0002138474295190713, |
| "loss": 0.4749, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.5788313120176406, |
| "grad_norm": 0.24069786071777344, |
| "learning_rate": 0.00021376451077943613, |
| "loss": 0.4901, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.5793825799338479, |
| "grad_norm": 0.2355291098356247, |
| "learning_rate": 0.000213681592039801, |
| "loss": 0.478, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.5799338478500551, |
| "grad_norm": 0.24105066061019897, |
| "learning_rate": 0.00021359867330016582, |
| "loss": 0.4832, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.5804851157662624, |
| "grad_norm": 0.22479461133480072, |
| "learning_rate": 0.00021351575456053068, |
| "loss": 0.4657, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.5810363836824697, |
| "grad_norm": 0.24978676438331604, |
| "learning_rate": 0.00021343283582089549, |
| "loss": 0.4795, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.581587651598677, |
| "grad_norm": 0.22877342998981476, |
| "learning_rate": 0.00021334991708126034, |
| "loss": 0.476, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.5821389195148843, |
| "grad_norm": 0.230316624045372, |
| "learning_rate": 0.00021326699834162518, |
| "loss": 0.4854, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.5826901874310915, |
| "grad_norm": 0.2178526371717453, |
| "learning_rate": 0.00021318407960199004, |
| "loss": 0.4798, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.5832414553472988, |
| "grad_norm": 0.23913492262363434, |
| "learning_rate": 0.00021310116086235487, |
| "loss": 0.4759, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.5837927232635061, |
| "grad_norm": 0.23534056544303894, |
| "learning_rate": 0.00021301824212271973, |
| "loss": 0.475, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.5843439911797134, |
| "grad_norm": 0.23057684302330017, |
| "learning_rate": 0.00021293532338308456, |
| "loss": 0.4835, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.5848952590959207, |
| "grad_norm": 0.2420724630355835, |
| "learning_rate": 0.00021285240464344942, |
| "loss": 0.4684, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.5854465270121278, |
| "grad_norm": 0.23270656168460846, |
| "learning_rate": 0.00021276948590381425, |
| "loss": 0.4714, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.5859977949283351, |
| "grad_norm": 0.22105982899665833, |
| "learning_rate": 0.0002126865671641791, |
| "loss": 0.4739, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.5865490628445424, |
| "grad_norm": 0.22896204888820648, |
| "learning_rate": 0.00021260364842454391, |
| "loss": 0.4792, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.5871003307607497, |
| "grad_norm": 0.22883784770965576, |
| "learning_rate": 0.00021252072968490877, |
| "loss": 0.4775, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.587651598676957, |
| "grad_norm": 0.22493380308151245, |
| "learning_rate": 0.0002124378109452736, |
| "loss": 0.4565, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.5882028665931642, |
| "grad_norm": 0.20627589523792267, |
| "learning_rate": 0.00021235489220563846, |
| "loss": 0.4421, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.5887541345093715, |
| "grad_norm": 0.22995707392692566, |
| "learning_rate": 0.0002122719734660033, |
| "loss": 0.5007, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.5893054024255788, |
| "grad_norm": 0.22702358663082123, |
| "learning_rate": 0.00021218905472636813, |
| "loss": 0.4848, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.5898566703417861, |
| "grad_norm": 0.2274836003780365, |
| "learning_rate": 0.000212106135986733, |
| "loss": 0.4512, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.5904079382579934, |
| "grad_norm": 0.25226280093193054, |
| "learning_rate": 0.00021202321724709782, |
| "loss": 0.4739, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.5909592061742006, |
| "grad_norm": 0.21378135681152344, |
| "learning_rate": 0.00021194029850746268, |
| "loss": 0.4902, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.5915104740904079, |
| "grad_norm": 0.2266150563955307, |
| "learning_rate": 0.00021185737976782748, |
| "loss": 0.4787, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.5920617420066152, |
| "grad_norm": 0.24346543848514557, |
| "learning_rate": 0.00021177446102819234, |
| "loss": 0.4758, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.5926130099228225, |
| "grad_norm": 0.23416201770305634, |
| "learning_rate": 0.00021169154228855718, |
| "loss": 0.4976, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.5931642778390298, |
| "grad_norm": 0.22314603626728058, |
| "learning_rate": 0.00021160862354892203, |
| "loss": 0.483, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.593715545755237, |
| "grad_norm": 0.23636144399642944, |
| "learning_rate": 0.00021152570480928687, |
| "loss": 0.4883, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.5942668136714443, |
| "grad_norm": 0.25075021386146545, |
| "learning_rate": 0.00021144278606965173, |
| "loss": 0.5093, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.5948180815876516, |
| "grad_norm": 0.25016966462135315, |
| "learning_rate": 0.00021135986733001656, |
| "loss": 0.4901, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.5953693495038589, |
| "grad_norm": 0.22505664825439453, |
| "learning_rate": 0.00021127694859038142, |
| "loss": 0.4982, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.5959206174200662, |
| "grad_norm": 0.2462112158536911, |
| "learning_rate": 0.00021119402985074625, |
| "loss": 0.4925, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.5964718853362734, |
| "grad_norm": 0.24048367142677307, |
| "learning_rate": 0.0002111111111111111, |
| "loss": 0.4711, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.5970231532524807, |
| "grad_norm": 0.2399929016828537, |
| "learning_rate": 0.0002110281923714759, |
| "loss": 0.4534, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.597574421168688, |
| "grad_norm": 0.22102728486061096, |
| "learning_rate": 0.00021094527363184077, |
| "loss": 0.475, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.5981256890848953, |
| "grad_norm": 0.22623874247074127, |
| "learning_rate": 0.0002108623548922056, |
| "loss": 0.4771, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.5986769570011026, |
| "grad_norm": 0.22739335894584656, |
| "learning_rate": 0.00021077943615257046, |
| "loss": 0.4524, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.5992282249173098, |
| "grad_norm": 0.22587355971336365, |
| "learning_rate": 0.0002106965174129353, |
| "loss": 0.481, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.5997794928335171, |
| "grad_norm": 0.238664448261261, |
| "learning_rate": 0.00021061359867330015, |
| "loss": 0.4812, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.6003307607497244, |
| "grad_norm": 0.2626015245914459, |
| "learning_rate": 0.00021053067993366499, |
| "loss": 0.5396, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.6008820286659317, |
| "grad_norm": 0.23110847175121307, |
| "learning_rate": 0.00021044776119402985, |
| "loss": 0.4768, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.601433296582139, |
| "grad_norm": 0.2324095070362091, |
| "learning_rate": 0.00021036484245439468, |
| "loss": 0.4569, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.6019845644983461, |
| "grad_norm": 0.2298206239938736, |
| "learning_rate": 0.00021028192371475954, |
| "loss": 0.4867, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.6025358324145534, |
| "grad_norm": 0.23651166260242462, |
| "learning_rate": 0.00021019900497512434, |
| "loss": 0.5119, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.6030871003307607, |
| "grad_norm": 0.24213020503520966, |
| "learning_rate": 0.0002101160862354892, |
| "loss": 0.4989, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.603638368246968, |
| "grad_norm": 0.2975553572177887, |
| "learning_rate": 0.00021003316749585403, |
| "loss": 0.4937, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.6041896361631753, |
| "grad_norm": 0.22954276204109192, |
| "learning_rate": 0.0002099502487562189, |
| "loss": 0.4569, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.6047409040793826, |
| "grad_norm": 0.23405365645885468, |
| "learning_rate": 0.00020986733001658372, |
| "loss": 0.476, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.6052921719955898, |
| "grad_norm": 0.22513137757778168, |
| "learning_rate": 0.00020978441127694858, |
| "loss": 0.4561, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.6058434399117971, |
| "grad_norm": 0.2296430617570877, |
| "learning_rate": 0.00020970149253731341, |
| "loss": 0.4628, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.6063947078280044, |
| "grad_norm": 0.24347829818725586, |
| "learning_rate": 0.00020961857379767827, |
| "loss": 0.5152, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6069459757442117, |
| "grad_norm": 0.2580801546573639, |
| "learning_rate": 0.0002095356550580431, |
| "loss": 0.4751, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.607497243660419, |
| "grad_norm": 0.22813639044761658, |
| "learning_rate": 0.00020945273631840797, |
| "loss": 0.4807, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.6080485115766262, |
| "grad_norm": 0.22047673165798187, |
| "learning_rate": 0.00020936981757877277, |
| "loss": 0.4686, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.6085997794928335, |
| "grad_norm": 0.2241135686635971, |
| "learning_rate": 0.00020928689883913763, |
| "loss": 0.4826, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.6091510474090408, |
| "grad_norm": 0.24011586606502533, |
| "learning_rate": 0.00020920398009950246, |
| "loss": 0.4559, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.6097023153252481, |
| "grad_norm": 0.2351463884115219, |
| "learning_rate": 0.00020912106135986732, |
| "loss": 0.4523, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.6102535832414554, |
| "grad_norm": 0.2268303632736206, |
| "learning_rate": 0.00020903814262023215, |
| "loss": 0.486, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.6108048511576626, |
| "grad_norm": 0.2280043363571167, |
| "learning_rate": 0.000208955223880597, |
| "loss": 0.4902, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.6113561190738699, |
| "grad_norm": 0.21859845519065857, |
| "learning_rate": 0.00020887230514096184, |
| "loss": 0.4593, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.6119073869900772, |
| "grad_norm": 0.23152512311935425, |
| "learning_rate": 0.0002087893864013267, |
| "loss": 0.4762, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6124586549062845, |
| "grad_norm": 0.23346808552742004, |
| "learning_rate": 0.00020870646766169153, |
| "loss": 0.4919, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.6130099228224918, |
| "grad_norm": 0.2313188761472702, |
| "learning_rate": 0.0002086235489220564, |
| "loss": 0.4792, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.613561190738699, |
| "grad_norm": 0.2261422574520111, |
| "learning_rate": 0.0002085406301824212, |
| "loss": 0.5008, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.6141124586549063, |
| "grad_norm": 0.24444694817066193, |
| "learning_rate": 0.00020845771144278603, |
| "loss": 0.503, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.6146637265711136, |
| "grad_norm": 0.23184862732887268, |
| "learning_rate": 0.0002083747927031509, |
| "loss": 0.5024, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.6152149944873209, |
| "grad_norm": 0.22305606305599213, |
| "learning_rate": 0.00020829187396351572, |
| "loss": 0.4815, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.6157662624035282, |
| "grad_norm": 0.24641431868076324, |
| "learning_rate": 0.00020820895522388058, |
| "loss": 0.5079, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.6163175303197354, |
| "grad_norm": 0.24148327112197876, |
| "learning_rate": 0.0002081260364842454, |
| "loss": 0.507, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.6168687982359427, |
| "grad_norm": 0.23938195407390594, |
| "learning_rate": 0.00020804311774461027, |
| "loss": 0.4668, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.61742006615215, |
| "grad_norm": 0.2462988644838333, |
| "learning_rate": 0.0002079601990049751, |
| "loss": 0.4941, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6179713340683572, |
| "grad_norm": 0.23903852701187134, |
| "learning_rate": 0.00020787728026533996, |
| "loss": 0.4684, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.6185226019845645, |
| "grad_norm": 0.2402830719947815, |
| "learning_rate": 0.00020779436152570477, |
| "loss": 0.4705, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.6190738699007717, |
| "grad_norm": 0.24639341235160828, |
| "learning_rate": 0.00020771144278606963, |
| "loss": 0.4874, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.619625137816979, |
| "grad_norm": 0.22861522436141968, |
| "learning_rate": 0.00020762852404643446, |
| "loss": 0.4696, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.6201764057331863, |
| "grad_norm": 0.23462949693202972, |
| "learning_rate": 0.00020754560530679932, |
| "loss": 0.509, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.6207276736493936, |
| "grad_norm": 0.24041415750980377, |
| "learning_rate": 0.00020746268656716415, |
| "loss": 0.4792, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.6212789415656009, |
| "grad_norm": 0.23339125514030457, |
| "learning_rate": 0.000207379767827529, |
| "loss": 0.4603, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.6218302094818081, |
| "grad_norm": 0.23568972945213318, |
| "learning_rate": 0.00020729684908789384, |
| "loss": 0.4882, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.6223814773980154, |
| "grad_norm": 0.24162200093269348, |
| "learning_rate": 0.0002072139303482587, |
| "loss": 0.4835, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.6229327453142227, |
| "grad_norm": 0.24957728385925293, |
| "learning_rate": 0.00020713101160862353, |
| "loss": 0.4871, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.62348401323043, |
| "grad_norm": 0.24710482358932495, |
| "learning_rate": 0.0002070480928689884, |
| "loss": 0.4604, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.6240352811466373, |
| "grad_norm": 0.24623054265975952, |
| "learning_rate": 0.0002069651741293532, |
| "loss": 0.4986, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.6245865490628445, |
| "grad_norm": 0.24791941046714783, |
| "learning_rate": 0.00020688225538971806, |
| "loss": 0.4665, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.6251378169790518, |
| "grad_norm": 0.26239630579948425, |
| "learning_rate": 0.0002067993366500829, |
| "loss": 0.5193, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.6256890848952591, |
| "grad_norm": 0.2580834925174713, |
| "learning_rate": 0.00020671641791044775, |
| "loss": 0.5162, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.6262403528114664, |
| "grad_norm": 0.21768338978290558, |
| "learning_rate": 0.00020663349917081258, |
| "loss": 0.4626, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.6267916207276737, |
| "grad_norm": 0.24815984070301056, |
| "learning_rate": 0.00020655058043117744, |
| "loss": 0.4943, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.6273428886438809, |
| "grad_norm": 0.2349233627319336, |
| "learning_rate": 0.00020646766169154227, |
| "loss": 0.4819, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.6278941565600882, |
| "grad_norm": 0.23029837012290955, |
| "learning_rate": 0.00020638474295190713, |
| "loss": 0.488, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.6284454244762955, |
| "grad_norm": 0.23574088513851166, |
| "learning_rate": 0.00020630182421227196, |
| "loss": 0.4791, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6289966923925028, |
| "grad_norm": 0.23277179896831512, |
| "learning_rate": 0.00020621890547263682, |
| "loss": 0.5047, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.6295479603087101, |
| "grad_norm": 0.2530352473258972, |
| "learning_rate": 0.00020613598673300163, |
| "loss": 0.5143, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.6300992282249173, |
| "grad_norm": 0.2136935591697693, |
| "learning_rate": 0.00020605306799336649, |
| "loss": 0.4768, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.6306504961411246, |
| "grad_norm": 0.23165372014045715, |
| "learning_rate": 0.00020597014925373132, |
| "loss": 0.4802, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.6312017640573319, |
| "grad_norm": 0.23744627833366394, |
| "learning_rate": 0.00020588723051409618, |
| "loss": 0.4751, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.6317530319735392, |
| "grad_norm": 0.2552582323551178, |
| "learning_rate": 0.000205804311774461, |
| "loss": 0.4949, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.6323042998897465, |
| "grad_norm": 0.22193565964698792, |
| "learning_rate": 0.00020572139303482587, |
| "loss": 0.4629, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.6328555678059536, |
| "grad_norm": 0.2249847799539566, |
| "learning_rate": 0.0002056384742951907, |
| "loss": 0.46, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.6334068357221609, |
| "grad_norm": 0.234629824757576, |
| "learning_rate": 0.00020555555555555556, |
| "loss": 0.4792, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.6339581036383682, |
| "grad_norm": 0.23007982969284058, |
| "learning_rate": 0.0002054726368159204, |
| "loss": 0.4857, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6345093715545755, |
| "grad_norm": 0.24549317359924316, |
| "learning_rate": 0.00020538971807628525, |
| "loss": 0.4697, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.6350606394707828, |
| "grad_norm": 0.26415401697158813, |
| "learning_rate": 0.00020530679933665005, |
| "loss": 0.4858, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.63561190738699, |
| "grad_norm": 0.20789586007595062, |
| "learning_rate": 0.00020522388059701491, |
| "loss": 0.4312, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.6361631753031973, |
| "grad_norm": 0.23789043724536896, |
| "learning_rate": 0.00020514096185737975, |
| "loss": 0.4816, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.6367144432194046, |
| "grad_norm": 0.23785383999347687, |
| "learning_rate": 0.0002050580431177446, |
| "loss": 0.4743, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.6372657111356119, |
| "grad_norm": 0.26521044969558716, |
| "learning_rate": 0.00020497512437810944, |
| "loss": 0.4904, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.6378169790518192, |
| "grad_norm": 0.25412556529045105, |
| "learning_rate": 0.0002048922056384743, |
| "loss": 0.5, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.6383682469680264, |
| "grad_norm": 0.23178859055042267, |
| "learning_rate": 0.00020480928689883913, |
| "loss": 0.4791, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.6389195148842337, |
| "grad_norm": 0.23838523030281067, |
| "learning_rate": 0.00020472636815920393, |
| "loss": 0.4539, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.639470782800441, |
| "grad_norm": 0.23378612101078033, |
| "learning_rate": 0.0002046434494195688, |
| "loss": 0.492, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.6400220507166483, |
| "grad_norm": 0.24227279424667358, |
| "learning_rate": 0.00020456053067993362, |
| "loss": 0.474, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.6405733186328556, |
| "grad_norm": 0.23166267573833466, |
| "learning_rate": 0.00020447761194029848, |
| "loss": 0.4684, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.6411245865490628, |
| "grad_norm": 0.23626738786697388, |
| "learning_rate": 0.00020439469320066332, |
| "loss": 0.4744, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.6416758544652701, |
| "grad_norm": 0.2464771568775177, |
| "learning_rate": 0.00020431177446102817, |
| "loss": 0.47, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.6422271223814774, |
| "grad_norm": 0.23458126187324524, |
| "learning_rate": 0.000204228855721393, |
| "loss": 0.4442, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.6427783902976847, |
| "grad_norm": 0.23561522364616394, |
| "learning_rate": 0.00020414593698175787, |
| "loss": 0.4696, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.643329658213892, |
| "grad_norm": 0.2327614575624466, |
| "learning_rate": 0.0002040630182421227, |
| "loss": 0.486, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.6438809261300992, |
| "grad_norm": 0.22742946445941925, |
| "learning_rate": 0.00020398009950248756, |
| "loss": 0.4448, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.6444321940463065, |
| "grad_norm": 0.22767378389835358, |
| "learning_rate": 0.00020389718076285236, |
| "loss": 0.4749, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.6449834619625138, |
| "grad_norm": 0.21805496513843536, |
| "learning_rate": 0.00020381426202321722, |
| "loss": 0.4976, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.6455347298787211, |
| "grad_norm": 0.23068863153457642, |
| "learning_rate": 0.00020373134328358205, |
| "loss": 0.4839, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.6460859977949284, |
| "grad_norm": 0.24028991162776947, |
| "learning_rate": 0.0002036484245439469, |
| "loss": 0.4613, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.6466372657111357, |
| "grad_norm": 0.2558547854423523, |
| "learning_rate": 0.00020356550580431174, |
| "loss": 0.4795, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.6471885336273429, |
| "grad_norm": 0.2363976091146469, |
| "learning_rate": 0.0002034825870646766, |
| "loss": 0.4819, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.6477398015435502, |
| "grad_norm": 0.23440702259540558, |
| "learning_rate": 0.00020339966832504144, |
| "loss": 0.4676, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.6482910694597575, |
| "grad_norm": 0.23950831592082977, |
| "learning_rate": 0.0002033167495854063, |
| "loss": 0.4775, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.6488423373759648, |
| "grad_norm": 0.23549869656562805, |
| "learning_rate": 0.00020323383084577113, |
| "loss": 0.471, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.649393605292172, |
| "grad_norm": 0.2294132113456726, |
| "learning_rate": 0.00020315091210613599, |
| "loss": 0.4584, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.6499448732083792, |
| "grad_norm": 0.2511732280254364, |
| "learning_rate": 0.0002030679933665008, |
| "loss": 0.4886, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.6504961411245865, |
| "grad_norm": 0.23680317401885986, |
| "learning_rate": 0.00020298507462686565, |
| "loss": 0.5026, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.6510474090407938, |
| "grad_norm": 0.24410556256771088, |
| "learning_rate": 0.00020290215588723048, |
| "loss": 0.4862, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.6515986769570011, |
| "grad_norm": 0.24827975034713745, |
| "learning_rate": 0.00020281923714759534, |
| "loss": 0.4734, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.6521499448732084, |
| "grad_norm": 0.24595201015472412, |
| "learning_rate": 0.00020273631840796017, |
| "loss": 0.4754, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.6527012127894156, |
| "grad_norm": 0.24838019907474518, |
| "learning_rate": 0.00020265339966832503, |
| "loss": 0.4923, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.6532524807056229, |
| "grad_norm": 0.23605762422084808, |
| "learning_rate": 0.00020257048092868986, |
| "loss": 0.477, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.6538037486218302, |
| "grad_norm": 0.24502962827682495, |
| "learning_rate": 0.00020248756218905472, |
| "loss": 0.482, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.6543550165380375, |
| "grad_norm": 0.24489161372184753, |
| "learning_rate": 0.00020240464344941956, |
| "loss": 0.4783, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.6549062844542448, |
| "grad_norm": 0.236792653799057, |
| "learning_rate": 0.00020232172470978441, |
| "loss": 0.4899, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.655457552370452, |
| "grad_norm": 0.2327335923910141, |
| "learning_rate": 0.00020223880597014922, |
| "loss": 0.4915, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.6560088202866593, |
| "grad_norm": 0.21822991967201233, |
| "learning_rate": 0.00020215588723051408, |
| "loss": 0.472, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.6565600882028666, |
| "grad_norm": 0.2524334788322449, |
| "learning_rate": 0.0002020729684908789, |
| "loss": 0.4942, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.6571113561190739, |
| "grad_norm": 0.23585528135299683, |
| "learning_rate": 0.00020199004975124377, |
| "loss": 0.5011, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.6576626240352812, |
| "grad_norm": 0.24948836863040924, |
| "learning_rate": 0.0002019071310116086, |
| "loss": 0.4831, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.6582138919514884, |
| "grad_norm": 0.2369844615459442, |
| "learning_rate": 0.00020182421227197346, |
| "loss": 0.4923, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.6587651598676957, |
| "grad_norm": 0.22455725073814392, |
| "learning_rate": 0.0002017412935323383, |
| "loss": 0.4699, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.659316427783903, |
| "grad_norm": 0.22049696743488312, |
| "learning_rate": 0.00020165837479270315, |
| "loss": 0.4569, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.6598676957001103, |
| "grad_norm": 0.21964412927627563, |
| "learning_rate": 0.00020157545605306798, |
| "loss": 0.4818, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.6604189636163176, |
| "grad_norm": 0.24084921181201935, |
| "learning_rate": 0.00020149253731343284, |
| "loss": 0.4834, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.6609702315325248, |
| "grad_norm": 0.2169031798839569, |
| "learning_rate": 0.00020140961857379765, |
| "loss": 0.458, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.6615214994487321, |
| "grad_norm": 0.2437864989042282, |
| "learning_rate": 0.0002013266998341625, |
| "loss": 0.4998, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.6620727673649394, |
| "grad_norm": 0.2373666912317276, |
| "learning_rate": 0.00020124378109452734, |
| "loss": 0.4593, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.6626240352811467, |
| "grad_norm": 0.2300565093755722, |
| "learning_rate": 0.00020116086235489217, |
| "loss": 0.4698, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.663175303197354, |
| "grad_norm": 0.2500588595867157, |
| "learning_rate": 0.00020107794361525703, |
| "loss": 0.4847, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.6637265711135611, |
| "grad_norm": 0.24038562178611755, |
| "learning_rate": 0.00020099502487562186, |
| "loss": 0.4746, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.6642778390297684, |
| "grad_norm": 0.2691898047924042, |
| "learning_rate": 0.00020091210613598672, |
| "loss": 0.4547, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.6648291069459757, |
| "grad_norm": 0.23530587553977966, |
| "learning_rate": 0.00020082918739635155, |
| "loss": 0.4618, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.665380374862183, |
| "grad_norm": 0.24838554859161377, |
| "learning_rate": 0.0002007462686567164, |
| "loss": 0.5093, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.6659316427783903, |
| "grad_norm": 0.24996088445186615, |
| "learning_rate": 0.00020066334991708122, |
| "loss": 0.4703, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.6664829106945975, |
| "grad_norm": 0.2432130128145218, |
| "learning_rate": 0.00020058043117744608, |
| "loss": 0.4651, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.6670341786108048, |
| "grad_norm": 0.2394338846206665, |
| "learning_rate": 0.0002004975124378109, |
| "loss": 0.4679, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.6675854465270121, |
| "grad_norm": 0.23440587520599365, |
| "learning_rate": 0.00020041459369817577, |
| "loss": 0.4552, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.6681367144432194, |
| "grad_norm": 0.25409042835235596, |
| "learning_rate": 0.0002003316749585406, |
| "loss": 0.4879, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.6686879823594267, |
| "grad_norm": 0.24675914645195007, |
| "learning_rate": 0.00020024875621890546, |
| "loss": 0.4935, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.6692392502756339, |
| "grad_norm": 0.2398385852575302, |
| "learning_rate": 0.0002001658374792703, |
| "loss": 0.4588, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.6697905181918412, |
| "grad_norm": 0.23646225035190582, |
| "learning_rate": 0.00020008291873963515, |
| "loss": 0.486, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.6703417861080485, |
| "grad_norm": 0.2433752566576004, |
| "learning_rate": 0.00019999999999999998, |
| "loss": 0.5, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.6708930540242558, |
| "grad_norm": 0.22759981453418732, |
| "learning_rate": 0.00019991708126036484, |
| "loss": 0.482, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.6714443219404631, |
| "grad_norm": 0.2414034903049469, |
| "learning_rate": 0.00019983416252072965, |
| "loss": 0.4754, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.6719955898566703, |
| "grad_norm": 0.23548895120620728, |
| "learning_rate": 0.0001997512437810945, |
| "loss": 0.4793, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.6725468577728776, |
| "grad_norm": 0.22510850429534912, |
| "learning_rate": 0.00019966832504145934, |
| "loss": 0.474, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.6730981256890849, |
| "grad_norm": 0.21878324449062347, |
| "learning_rate": 0.0001995854063018242, |
| "loss": 0.4349, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.6736493936052922, |
| "grad_norm": 0.234661266207695, |
| "learning_rate": 0.00019950248756218903, |
| "loss": 0.4602, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.6742006615214995, |
| "grad_norm": 0.24233828485012054, |
| "learning_rate": 0.0001994195688225539, |
| "loss": 0.4932, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.6747519294377067, |
| "grad_norm": 0.22866547107696533, |
| "learning_rate": 0.00019933665008291872, |
| "loss": 0.4697, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.675303197353914, |
| "grad_norm": 0.2325911670923233, |
| "learning_rate": 0.00019925373134328358, |
| "loss": 0.4631, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.6758544652701213, |
| "grad_norm": 0.22702381014823914, |
| "learning_rate": 0.0001991708126036484, |
| "loss": 0.4631, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.6764057331863286, |
| "grad_norm": 0.23354612290859222, |
| "learning_rate": 0.00019908789386401327, |
| "loss": 0.4687, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.6769570011025359, |
| "grad_norm": 0.2386290431022644, |
| "learning_rate": 0.00019900497512437808, |
| "loss": 0.4777, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.6775082690187431, |
| "grad_norm": 0.24729053676128387, |
| "learning_rate": 0.00019892205638474293, |
| "loss": 0.4785, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.6780595369349504, |
| "grad_norm": 0.2109660655260086, |
| "learning_rate": 0.00019883913764510777, |
| "loss": 0.464, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.6786108048511577, |
| "grad_norm": 0.24349510669708252, |
| "learning_rate": 0.00019875621890547263, |
| "loss": 0.4972, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.679162072767365, |
| "grad_norm": 0.236436665058136, |
| "learning_rate": 0.00019867330016583746, |
| "loss": 0.4655, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.6797133406835723, |
| "grad_norm": 0.22133763134479523, |
| "learning_rate": 0.00019859038142620232, |
| "loss": 0.4856, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.6802646085997794, |
| "grad_norm": 0.23461799323558807, |
| "learning_rate": 0.00019850746268656715, |
| "loss": 0.4974, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.6808158765159867, |
| "grad_norm": 0.23802213370800018, |
| "learning_rate": 0.000198424543946932, |
| "loss": 0.4634, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.681367144432194, |
| "grad_norm": 0.23866182565689087, |
| "learning_rate": 0.00019834162520729684, |
| "loss": 0.4962, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.6819184123484013, |
| "grad_norm": 0.20461198687553406, |
| "learning_rate": 0.0001982587064676617, |
| "loss": 0.479, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.6824696802646086, |
| "grad_norm": 0.2442476749420166, |
| "learning_rate": 0.0001981757877280265, |
| "loss": 0.5007, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.6830209481808158, |
| "grad_norm": 0.2257671356201172, |
| "learning_rate": 0.00019809286898839136, |
| "loss": 0.4899, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.6835722160970231, |
| "grad_norm": 0.214102640748024, |
| "learning_rate": 0.0001980099502487562, |
| "loss": 0.4536, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.6841234840132304, |
| "grad_norm": 0.21543948352336884, |
| "learning_rate": 0.00019792703150912105, |
| "loss": 0.4811, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.6846747519294377, |
| "grad_norm": 0.25430455803871155, |
| "learning_rate": 0.00019784411276948589, |
| "loss": 0.486, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.685226019845645, |
| "grad_norm": 0.2656538486480713, |
| "learning_rate": 0.00019776119402985075, |
| "loss": 0.462, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.6857772877618522, |
| "grad_norm": 0.23967699706554413, |
| "learning_rate": 0.00019767827529021558, |
| "loss": 0.5004, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.6863285556780595, |
| "grad_norm": 0.22987446188926697, |
| "learning_rate": 0.00019759535655058044, |
| "loss": 0.4804, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.6868798235942668, |
| "grad_norm": 0.20953255891799927, |
| "learning_rate": 0.00019751243781094527, |
| "loss": 0.4793, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.6874310915104741, |
| "grad_norm": 0.256028413772583, |
| "learning_rate": 0.00019742951907131007, |
| "loss": 0.4881, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.6879823594266814, |
| "grad_norm": 0.23885922133922577, |
| "learning_rate": 0.00019734660033167493, |
| "loss": 0.508, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.6885336273428887, |
| "grad_norm": 0.24736814200878143, |
| "learning_rate": 0.00019726368159203976, |
| "loss": 0.4935, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.6890848952590959, |
| "grad_norm": 0.23237743973731995, |
| "learning_rate": 0.00019718076285240462, |
| "loss": 0.4775, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.6896361631753032, |
| "grad_norm": 0.24340516328811646, |
| "learning_rate": 0.00019709784411276946, |
| "loss": 0.4987, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.6901874310915105, |
| "grad_norm": 0.22015541791915894, |
| "learning_rate": 0.00019701492537313432, |
| "loss": 0.4524, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.6907386990077178, |
| "grad_norm": 0.25280436873435974, |
| "learning_rate": 0.00019693200663349915, |
| "loss": 0.4953, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.6912899669239251, |
| "grad_norm": 0.22572125494480133, |
| "learning_rate": 0.000196849087893864, |
| "loss": 0.4692, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.6918412348401323, |
| "grad_norm": 0.2326386719942093, |
| "learning_rate": 0.00019676616915422884, |
| "loss": 0.475, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.6923925027563396, |
| "grad_norm": 0.2248145192861557, |
| "learning_rate": 0.0001966832504145937, |
| "loss": 0.4463, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.6929437706725469, |
| "grad_norm": 0.236514613032341, |
| "learning_rate": 0.0001966003316749585, |
| "loss": 0.4502, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.6934950385887542, |
| "grad_norm": 0.2295265942811966, |
| "learning_rate": 0.00019651741293532336, |
| "loss": 0.4559, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.6940463065049615, |
| "grad_norm": 0.24026772379875183, |
| "learning_rate": 0.0001964344941956882, |
| "loss": 0.4642, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.6945975744211687, |
| "grad_norm": 0.2558375298976898, |
| "learning_rate": 0.00019635157545605305, |
| "loss": 0.4864, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.695148842337376, |
| "grad_norm": 0.2334502935409546, |
| "learning_rate": 0.00019626865671641788, |
| "loss": 0.47, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.6957001102535832, |
| "grad_norm": 0.23098182678222656, |
| "learning_rate": 0.00019618573797678274, |
| "loss": 0.4786, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.6962513781697905, |
| "grad_norm": 0.22288668155670166, |
| "learning_rate": 0.00019610281923714758, |
| "loss": 0.4638, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.6968026460859978, |
| "grad_norm": 0.23454713821411133, |
| "learning_rate": 0.00019601990049751244, |
| "loss": 0.4661, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.697353914002205, |
| "grad_norm": 0.22980453073978424, |
| "learning_rate": 0.00019593698175787727, |
| "loss": 0.4681, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.6979051819184123, |
| "grad_norm": 0.20731012523174286, |
| "learning_rate": 0.00019585406301824213, |
| "loss": 0.4439, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.6984564498346196, |
| "grad_norm": 0.22292488813400269, |
| "learning_rate": 0.00019577114427860693, |
| "loss": 0.4663, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.6990077177508269, |
| "grad_norm": 0.22497937083244324, |
| "learning_rate": 0.0001956882255389718, |
| "loss": 0.4751, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.6995589856670342, |
| "grad_norm": 0.2342757284641266, |
| "learning_rate": 0.00019560530679933662, |
| "loss": 0.4544, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.7001102535832414, |
| "grad_norm": 0.23075568675994873, |
| "learning_rate": 0.00019552238805970148, |
| "loss": 0.4634, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.7006615214994487, |
| "grad_norm": 0.2278735637664795, |
| "learning_rate": 0.0001954394693200663, |
| "loss": 0.4895, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.701212789415656, |
| "grad_norm": 0.25607171654701233, |
| "learning_rate": 0.00019535655058043117, |
| "loss": 0.49, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.7017640573318633, |
| "grad_norm": 0.2315627932548523, |
| "learning_rate": 0.000195273631840796, |
| "loss": 0.4522, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.7023153252480706, |
| "grad_norm": 0.2047976851463318, |
| "learning_rate": 0.00019519071310116086, |
| "loss": 0.4356, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.7028665931642778, |
| "grad_norm": 0.24180057644844055, |
| "learning_rate": 0.00019510779436152567, |
| "loss": 0.4749, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.7034178610804851, |
| "grad_norm": 0.2599826753139496, |
| "learning_rate": 0.00019502487562189055, |
| "loss": 0.5082, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.7039691289966924, |
| "grad_norm": 0.23944783210754395, |
| "learning_rate": 0.00019494195688225536, |
| "loss": 0.4828, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.7045203969128997, |
| "grad_norm": 0.21794094145298004, |
| "learning_rate": 0.00019485903814262022, |
| "loss": 0.4691, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.705071664829107, |
| "grad_norm": 0.23379597067832947, |
| "learning_rate": 0.00019477611940298505, |
| "loss": 0.486, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.7056229327453142, |
| "grad_norm": 0.21778427064418793, |
| "learning_rate": 0.0001946932006633499, |
| "loss": 0.4483, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7061742006615215, |
| "grad_norm": 0.23941390216350555, |
| "learning_rate": 0.00019461028192371474, |
| "loss": 0.4885, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.7067254685777288, |
| "grad_norm": 0.23993995785713196, |
| "learning_rate": 0.0001945273631840796, |
| "loss": 0.5098, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.7072767364939361, |
| "grad_norm": 0.2523173391819, |
| "learning_rate": 0.00019444444444444443, |
| "loss": 0.4752, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.7078280044101434, |
| "grad_norm": 0.23337773978710175, |
| "learning_rate": 0.0001943615257048093, |
| "loss": 0.5198, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.7083792723263506, |
| "grad_norm": 0.24418905377388, |
| "learning_rate": 0.0001942786069651741, |
| "loss": 0.519, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.7089305402425579, |
| "grad_norm": 0.24214893579483032, |
| "learning_rate": 0.00019419568822553896, |
| "loss": 0.4625, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.7094818081587652, |
| "grad_norm": 0.25616276264190674, |
| "learning_rate": 0.0001941127694859038, |
| "loss": 0.483, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.7100330760749725, |
| "grad_norm": 0.2434643656015396, |
| "learning_rate": 0.00019402985074626865, |
| "loss": 0.4834, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.7105843439911798, |
| "grad_norm": 0.23342913389205933, |
| "learning_rate": 0.00019394693200663348, |
| "loss": 0.4577, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.7111356119073869, |
| "grad_norm": 0.23564305901527405, |
| "learning_rate": 0.0001938640132669983, |
| "loss": 0.4731, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.7116868798235942, |
| "grad_norm": 0.2814309000968933, |
| "learning_rate": 0.00019378109452736317, |
| "loss": 0.4845, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.7122381477398015, |
| "grad_norm": 0.2305363267660141, |
| "learning_rate": 0.000193698175787728, |
| "loss": 0.4577, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.7127894156560088, |
| "grad_norm": 0.2413802593946457, |
| "learning_rate": 0.00019361525704809286, |
| "loss": 0.5005, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.7133406835722161, |
| "grad_norm": 0.22398939728736877, |
| "learning_rate": 0.00019353233830845767, |
| "loss": 0.4645, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.7138919514884233, |
| "grad_norm": 0.2510089874267578, |
| "learning_rate": 0.00019344941956882253, |
| "loss": 0.4721, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.7144432194046306, |
| "grad_norm": 0.23676623404026031, |
| "learning_rate": 0.00019336650082918736, |
| "loss": 0.5126, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.7149944873208379, |
| "grad_norm": 0.22751228511333466, |
| "learning_rate": 0.00019328358208955222, |
| "loss": 0.4403, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.7155457552370452, |
| "grad_norm": 0.23468491435050964, |
| "learning_rate": 0.00019320066334991705, |
| "loss": 0.4697, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.7160970231532525, |
| "grad_norm": 0.2132336050271988, |
| "learning_rate": 0.0001931177446102819, |
| "loss": 0.4468, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.7166482910694597, |
| "grad_norm": 0.22579894959926605, |
| "learning_rate": 0.00019303482587064674, |
| "loss": 0.458, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.717199558985667, |
| "grad_norm": 0.22772036492824554, |
| "learning_rate": 0.0001929519071310116, |
| "loss": 0.457, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.7177508269018743, |
| "grad_norm": 0.2290082722902298, |
| "learning_rate": 0.00019286898839137643, |
| "loss": 0.4771, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.7183020948180816, |
| "grad_norm": 0.2190980762243271, |
| "learning_rate": 0.0001927860696517413, |
| "loss": 0.4754, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.7188533627342889, |
| "grad_norm": 0.2228933423757553, |
| "learning_rate": 0.0001927031509121061, |
| "loss": 0.476, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.7194046306504961, |
| "grad_norm": 0.23896026611328125, |
| "learning_rate": 0.00019262023217247096, |
| "loss": 0.5008, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.7199558985667034, |
| "grad_norm": 0.222875714302063, |
| "learning_rate": 0.0001925373134328358, |
| "loss": 0.4526, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.7205071664829107, |
| "grad_norm": 0.22457565367221832, |
| "learning_rate": 0.00019245439469320065, |
| "loss": 0.5019, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.721058434399118, |
| "grad_norm": 0.24464376270771027, |
| "learning_rate": 0.00019237147595356548, |
| "loss": 0.4896, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.7216097023153253, |
| "grad_norm": 0.22952450811862946, |
| "learning_rate": 0.00019228855721393034, |
| "loss": 0.4751, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.7221609702315325, |
| "grad_norm": 0.22557076811790466, |
| "learning_rate": 0.00019220563847429517, |
| "loss": 0.4859, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7227122381477398, |
| "grad_norm": 0.2599719762802124, |
| "learning_rate": 0.00019212271973466003, |
| "loss": 0.4871, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.7232635060639471, |
| "grad_norm": 0.2541002333164215, |
| "learning_rate": 0.00019203980099502486, |
| "loss": 0.5076, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.7238147739801544, |
| "grad_norm": 0.234733447432518, |
| "learning_rate": 0.00019195688225538972, |
| "loss": 0.471, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.7243660418963617, |
| "grad_norm": 0.23307423293590546, |
| "learning_rate": 0.00019187396351575452, |
| "loss": 0.4758, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.7249173098125689, |
| "grad_norm": 0.22905585169792175, |
| "learning_rate": 0.00019179104477611938, |
| "loss": 0.4674, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.7254685777287762, |
| "grad_norm": 0.24311380088329315, |
| "learning_rate": 0.00019170812603648422, |
| "loss": 0.4838, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.7260198456449835, |
| "grad_norm": 0.24221283197402954, |
| "learning_rate": 0.00019162520729684907, |
| "loss": 0.4671, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.7265711135611908, |
| "grad_norm": 0.2364143580198288, |
| "learning_rate": 0.0001915422885572139, |
| "loss": 0.4496, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.727122381477398, |
| "grad_norm": 0.2382567673921585, |
| "learning_rate": 0.00019145936981757877, |
| "loss": 0.4516, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.7276736493936052, |
| "grad_norm": 0.281539648771286, |
| "learning_rate": 0.0001913764510779436, |
| "loss": 0.4742, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.7282249173098125, |
| "grad_norm": 0.2738378345966339, |
| "learning_rate": 0.00019129353233830846, |
| "loss": 0.5158, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.7287761852260198, |
| "grad_norm": 0.23668839037418365, |
| "learning_rate": 0.0001912106135986733, |
| "loss": 0.4907, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.7293274531422271, |
| "grad_norm": 0.2443835288286209, |
| "learning_rate": 0.00019112769485903815, |
| "loss": 0.4887, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.7298787210584344, |
| "grad_norm": 0.2538048028945923, |
| "learning_rate": 0.00019104477611940295, |
| "loss": 0.4413, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.7304299889746417, |
| "grad_norm": 0.24266113340854645, |
| "learning_rate": 0.0001909618573797678, |
| "loss": 0.4618, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.7309812568908489, |
| "grad_norm": 0.2522546648979187, |
| "learning_rate": 0.00019087893864013264, |
| "loss": 0.493, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.7315325248070562, |
| "grad_norm": 0.24361646175384521, |
| "learning_rate": 0.0001907960199004975, |
| "loss": 0.4552, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.7320837927232635, |
| "grad_norm": 0.24726730585098267, |
| "learning_rate": 0.00019071310116086234, |
| "loss": 0.4899, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.7326350606394708, |
| "grad_norm": 0.23533383011817932, |
| "learning_rate": 0.0001906301824212272, |
| "loss": 0.4674, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.7331863285556781, |
| "grad_norm": 0.23652805387973785, |
| "learning_rate": 0.00019054726368159203, |
| "loss": 0.4734, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.7337375964718853, |
| "grad_norm": 0.24334965646266937, |
| "learning_rate": 0.00019046434494195689, |
| "loss": 0.4897, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.7342888643880926, |
| "grad_norm": 0.2077738642692566, |
| "learning_rate": 0.00019038142620232172, |
| "loss": 0.4516, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.7348401323042999, |
| "grad_norm": 0.23306086659431458, |
| "learning_rate": 0.00019029850746268658, |
| "loss": 0.5076, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.7353914002205072, |
| "grad_norm": 0.2449159324169159, |
| "learning_rate": 0.00019021558872305138, |
| "loss": 0.4618, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.7359426681367145, |
| "grad_norm": 0.24829532206058502, |
| "learning_rate": 0.00019013266998341621, |
| "loss": 0.4614, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.7364939360529217, |
| "grad_norm": 0.23648925125598907, |
| "learning_rate": 0.00019004975124378107, |
| "loss": 0.4616, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.737045203969129, |
| "grad_norm": 0.23551128804683685, |
| "learning_rate": 0.0001899668325041459, |
| "loss": 0.4724, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.7375964718853363, |
| "grad_norm": 0.23878498375415802, |
| "learning_rate": 0.00018988391376451076, |
| "loss": 0.4639, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.7381477398015436, |
| "grad_norm": 0.24612358212471008, |
| "learning_rate": 0.0001898009950248756, |
| "loss": 0.4757, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.7386990077177509, |
| "grad_norm": 0.2288011610507965, |
| "learning_rate": 0.00018971807628524046, |
| "loss": 0.4598, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.7392502756339581, |
| "grad_norm": 0.2329450398683548, |
| "learning_rate": 0.0001896351575456053, |
| "loss": 0.4884, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.7398015435501654, |
| "grad_norm": 0.23273812234401703, |
| "learning_rate": 0.00018955223880597015, |
| "loss": 0.4834, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.7403528114663727, |
| "grad_norm": 0.24095992743968964, |
| "learning_rate": 0.00018946932006633495, |
| "loss": 0.4352, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.74090407938258, |
| "grad_norm": 0.24149319529533386, |
| "learning_rate": 0.0001893864013266998, |
| "loss": 0.4675, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.7414553472987873, |
| "grad_norm": 0.24013857543468475, |
| "learning_rate": 0.00018930348258706464, |
| "loss": 0.4879, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.7420066152149944, |
| "grad_norm": 0.23142081499099731, |
| "learning_rate": 0.0001892205638474295, |
| "loss": 0.4607, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.7425578831312017, |
| "grad_norm": 0.2619989514350891, |
| "learning_rate": 0.00018913764510779433, |
| "loss": 0.4784, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.743109151047409, |
| "grad_norm": 0.23706799745559692, |
| "learning_rate": 0.0001890547263681592, |
| "loss": 0.4716, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.7436604189636163, |
| "grad_norm": 0.25641632080078125, |
| "learning_rate": 0.00018897180762852403, |
| "loss": 0.4951, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.7442116868798236, |
| "grad_norm": 0.225026935338974, |
| "learning_rate": 0.00018888888888888888, |
| "loss": 0.4742, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.7447629547960308, |
| "grad_norm": 0.21225763857364655, |
| "learning_rate": 0.00018880597014925372, |
| "loss": 0.4484, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.7453142227122381, |
| "grad_norm": 0.2503174841403961, |
| "learning_rate": 0.00018872305140961858, |
| "loss": 0.4832, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.7458654906284454, |
| "grad_norm": 0.25594860315322876, |
| "learning_rate": 0.00018864013266998338, |
| "loss": 0.4952, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.7464167585446527, |
| "grad_norm": 0.23849812150001526, |
| "learning_rate": 0.00018855721393034824, |
| "loss": 0.4927, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.74696802646086, |
| "grad_norm": 0.22114640474319458, |
| "learning_rate": 0.00018847429519071307, |
| "loss": 0.4475, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.7475192943770672, |
| "grad_norm": 0.23791830241680145, |
| "learning_rate": 0.00018839137645107793, |
| "loss": 0.4846, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.7480705622932745, |
| "grad_norm": 0.2577480673789978, |
| "learning_rate": 0.00018830845771144276, |
| "loss": 0.4541, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.7486218302094818, |
| "grad_norm": 0.2754758596420288, |
| "learning_rate": 0.00018822553897180762, |
| "loss": 0.4958, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.7491730981256891, |
| "grad_norm": 0.2309567779302597, |
| "learning_rate": 0.00018814262023217245, |
| "loss": 0.4671, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.7497243660418964, |
| "grad_norm": 0.24164016544818878, |
| "learning_rate": 0.0001880597014925373, |
| "loss": 0.4712, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.7502756339581036, |
| "grad_norm": 0.21853327751159668, |
| "learning_rate": 0.00018797678275290215, |
| "loss": 0.503, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.7508269018743109, |
| "grad_norm": 0.22078783810138702, |
| "learning_rate": 0.000187893864013267, |
| "loss": 0.4654, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.7513781697905182, |
| "grad_norm": 0.23638005554676056, |
| "learning_rate": 0.0001878109452736318, |
| "loss": 0.4742, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.7519294377067255, |
| "grad_norm": 0.23174162209033966, |
| "learning_rate": 0.00018772802653399667, |
| "loss": 0.4599, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.7524807056229328, |
| "grad_norm": 0.23956626653671265, |
| "learning_rate": 0.0001876451077943615, |
| "loss": 0.477, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.75303197353914, |
| "grad_norm": 0.23747730255126953, |
| "learning_rate": 0.00018756218905472636, |
| "loss": 0.46, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.7535832414553473, |
| "grad_norm": 0.22467990219593048, |
| "learning_rate": 0.0001874792703150912, |
| "loss": 0.4502, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.7541345093715546, |
| "grad_norm": 0.230741485953331, |
| "learning_rate": 0.00018739635157545605, |
| "loss": 0.4718, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.7546857772877619, |
| "grad_norm": 0.24028630554676056, |
| "learning_rate": 0.00018731343283582088, |
| "loss": 0.4619, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.7552370452039692, |
| "grad_norm": 0.24253641068935394, |
| "learning_rate": 0.00018723051409618574, |
| "loss": 0.4817, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.7557883131201764, |
| "grad_norm": 0.22565878927707672, |
| "learning_rate": 0.00018714759535655057, |
| "loss": 0.4663, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.7563395810363837, |
| "grad_norm": 0.23143254220485687, |
| "learning_rate": 0.00018706467661691543, |
| "loss": 0.4536, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.756890848952591, |
| "grad_norm": 0.23320366442203522, |
| "learning_rate": 0.00018698175787728024, |
| "loss": 0.4304, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.7574421168687983, |
| "grad_norm": 0.23350325226783752, |
| "learning_rate": 0.0001868988391376451, |
| "loss": 0.4649, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.7579933847850056, |
| "grad_norm": 0.2501453757286072, |
| "learning_rate": 0.00018681592039800993, |
| "loss": 0.4696, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.7585446527012127, |
| "grad_norm": 0.22919632494449615, |
| "learning_rate": 0.0001867330016583748, |
| "loss": 0.4751, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.75909592061742, |
| "grad_norm": 0.2562139332294464, |
| "learning_rate": 0.00018665008291873962, |
| "loss": 0.49, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.7596471885336273, |
| "grad_norm": 0.2472946047782898, |
| "learning_rate": 0.00018656716417910445, |
| "loss": 0.4873, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.7601984564498346, |
| "grad_norm": 0.22273144125938416, |
| "learning_rate": 0.0001864842454394693, |
| "loss": 0.4569, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.7607497243660419, |
| "grad_norm": 0.24337974190711975, |
| "learning_rate": 0.00018640132669983414, |
| "loss": 0.4717, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.7613009922822491, |
| "grad_norm": 0.23919668793678284, |
| "learning_rate": 0.000186318407960199, |
| "loss": 0.4966, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.7618522601984564, |
| "grad_norm": 0.25102800130844116, |
| "learning_rate": 0.0001862354892205638, |
| "loss": 0.4551, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.7624035281146637, |
| "grad_norm": 0.22430755198001862, |
| "learning_rate": 0.00018615257048092867, |
| "loss": 0.4628, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.762954796030871, |
| "grad_norm": 0.2542060613632202, |
| "learning_rate": 0.0001860696517412935, |
| "loss": 0.474, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.7635060639470783, |
| "grad_norm": 0.24267995357513428, |
| "learning_rate": 0.00018598673300165836, |
| "loss": 0.4709, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.7640573318632855, |
| "grad_norm": 0.24730850756168365, |
| "learning_rate": 0.0001859038142620232, |
| "loss": 0.4703, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.7646085997794928, |
| "grad_norm": 0.22491230070590973, |
| "learning_rate": 0.00018582089552238805, |
| "loss": 0.4572, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.7651598676957001, |
| "grad_norm": 0.25823476910591125, |
| "learning_rate": 0.00018573797678275288, |
| "loss": 0.4911, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.7657111356119074, |
| "grad_norm": 0.2442496418952942, |
| "learning_rate": 0.00018565505804311774, |
| "loss": 0.4514, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.7662624035281147, |
| "grad_norm": 0.22842232882976532, |
| "learning_rate": 0.00018557213930348257, |
| "loss": 0.459, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.7668136714443219, |
| "grad_norm": 0.24691414833068848, |
| "learning_rate": 0.00018548922056384743, |
| "loss": 0.4958, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.7673649393605292, |
| "grad_norm": 0.22024598717689514, |
| "learning_rate": 0.00018540630182421224, |
| "loss": 0.4621, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.7679162072767365, |
| "grad_norm": 0.24100075662136078, |
| "learning_rate": 0.0001853233830845771, |
| "loss": 0.486, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.7684674751929438, |
| "grad_norm": 0.2123764157295227, |
| "learning_rate": 0.00018524046434494193, |
| "loss": 0.4575, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.7690187431091511, |
| "grad_norm": 0.239015132188797, |
| "learning_rate": 0.0001851575456053068, |
| "loss": 0.4777, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.7695700110253583, |
| "grad_norm": 0.22858455777168274, |
| "learning_rate": 0.00018507462686567162, |
| "loss": 0.438, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.7701212789415656, |
| "grad_norm": 0.23843710124492645, |
| "learning_rate": 0.00018499170812603648, |
| "loss": 0.456, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.7706725468577729, |
| "grad_norm": 0.23079745471477509, |
| "learning_rate": 0.0001849087893864013, |
| "loss": 0.4648, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.7712238147739802, |
| "grad_norm": 0.23103727400302887, |
| "learning_rate": 0.00018482587064676617, |
| "loss": 0.4589, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.7717750826901875, |
| "grad_norm": 0.2261170893907547, |
| "learning_rate": 0.00018474295190713097, |
| "loss": 0.4734, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.7723263506063948, |
| "grad_norm": 0.2249629944562912, |
| "learning_rate": 0.00018466003316749586, |
| "loss": 0.4542, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.772877618522602, |
| "grad_norm": 0.2366032898426056, |
| "learning_rate": 0.00018457711442786067, |
| "loss": 0.458, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.7734288864388092, |
| "grad_norm": 0.2598401606082916, |
| "learning_rate": 0.00018449419568822552, |
| "loss": 0.4557, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.7739801543550165, |
| "grad_norm": 0.23570790886878967, |
| "learning_rate": 0.00018441127694859036, |
| "loss": 0.4656, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.7745314222712238, |
| "grad_norm": 0.23591196537017822, |
| "learning_rate": 0.00018432835820895522, |
| "loss": 0.4689, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.7750826901874311, |
| "grad_norm": 0.2540998160839081, |
| "learning_rate": 0.00018424543946932005, |
| "loss": 0.4977, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.7756339581036383, |
| "grad_norm": 0.22981034219264984, |
| "learning_rate": 0.0001841625207296849, |
| "loss": 0.4718, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.7761852260198456, |
| "grad_norm": 0.2221202403306961, |
| "learning_rate": 0.00018407960199004974, |
| "loss": 0.4784, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.7767364939360529, |
| "grad_norm": 0.2501460909843445, |
| "learning_rate": 0.0001839966832504146, |
| "loss": 0.4806, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.7772877618522602, |
| "grad_norm": 0.2174586057662964, |
| "learning_rate": 0.0001839137645107794, |
| "loss": 0.4833, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.7778390297684675, |
| "grad_norm": 0.2424350082874298, |
| "learning_rate": 0.00018383084577114426, |
| "loss": 0.4902, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.7783902976846747, |
| "grad_norm": 0.25260457396507263, |
| "learning_rate": 0.0001837479270315091, |
| "loss": 0.4843, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.778941565600882, |
| "grad_norm": 0.27532869577407837, |
| "learning_rate": 0.00018366500829187395, |
| "loss": 0.4914, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.7794928335170893, |
| "grad_norm": 0.24072158336639404, |
| "learning_rate": 0.00018358208955223879, |
| "loss": 0.4888, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.7800441014332966, |
| "grad_norm": 0.24182955920696259, |
| "learning_rate": 0.00018349917081260364, |
| "loss": 0.4589, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.7805953693495039, |
| "grad_norm": 0.25824496150016785, |
| "learning_rate": 0.00018341625207296848, |
| "loss": 0.4868, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.7811466372657111, |
| "grad_norm": 0.2336832731962204, |
| "learning_rate": 0.00018333333333333334, |
| "loss": 0.472, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.7816979051819184, |
| "grad_norm": 0.24849727749824524, |
| "learning_rate": 0.00018325041459369817, |
| "loss": 0.4743, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.7822491730981257, |
| "grad_norm": 0.21890904009342194, |
| "learning_rate": 0.00018316749585406303, |
| "loss": 0.465, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.782800441014333, |
| "grad_norm": 0.2601034343242645, |
| "learning_rate": 0.00018308457711442783, |
| "loss": 0.4531, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.7833517089305403, |
| "grad_norm": 0.2441786229610443, |
| "learning_rate": 0.0001830016583747927, |
| "loss": 0.4536, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.7839029768467475, |
| "grad_norm": 0.2240273654460907, |
| "learning_rate": 0.00018291873963515752, |
| "loss": 0.461, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.7844542447629548, |
| "grad_norm": 0.2334737479686737, |
| "learning_rate": 0.00018283582089552235, |
| "loss": 0.4779, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.7850055126791621, |
| "grad_norm": 0.23395971953868866, |
| "learning_rate": 0.00018275290215588721, |
| "loss": 0.4585, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.7855567805953694, |
| "grad_norm": 0.24163080751895905, |
| "learning_rate": 0.00018266998341625205, |
| "loss": 0.4781, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.7861080485115767, |
| "grad_norm": 0.23681163787841797, |
| "learning_rate": 0.0001825870646766169, |
| "loss": 0.4518, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.7866593164277839, |
| "grad_norm": 0.2450489103794098, |
| "learning_rate": 0.00018250414593698174, |
| "loss": 0.4741, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.7872105843439912, |
| "grad_norm": 0.23335276544094086, |
| "learning_rate": 0.0001824212271973466, |
| "loss": 0.4938, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.7877618522601985, |
| "grad_norm": 0.22969652712345123, |
| "learning_rate": 0.0001823383084577114, |
| "loss": 0.4577, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.7883131201764058, |
| "grad_norm": 0.2162095010280609, |
| "learning_rate": 0.00018225538971807626, |
| "loss": 0.4632, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.7888643880926131, |
| "grad_norm": 0.2445029318332672, |
| "learning_rate": 0.0001821724709784411, |
| "loss": 0.4657, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.7894156560088202, |
| "grad_norm": 0.21864482760429382, |
| "learning_rate": 0.00018208955223880595, |
| "loss": 0.4759, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.7899669239250275, |
| "grad_norm": 0.24577899277210236, |
| "learning_rate": 0.00018200663349917078, |
| "loss": 0.4717, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.7905181918412348, |
| "grad_norm": 0.21177740395069122, |
| "learning_rate": 0.00018192371475953564, |
| "loss": 0.4564, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.7910694597574421, |
| "grad_norm": 0.2460215985774994, |
| "learning_rate": 0.00018184079601990047, |
| "loss": 0.4921, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.7916207276736494, |
| "grad_norm": 0.24731247127056122, |
| "learning_rate": 0.00018175787728026533, |
| "loss": 0.4655, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.7921719955898566, |
| "grad_norm": 0.24188898503780365, |
| "learning_rate": 0.00018167495854063017, |
| "loss": 0.4665, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.7927232635060639, |
| "grad_norm": 0.2347448617219925, |
| "learning_rate": 0.00018159203980099502, |
| "loss": 0.4563, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.7932745314222712, |
| "grad_norm": 0.242751806974411, |
| "learning_rate": 0.00018150912106135983, |
| "loss": 0.4622, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.7938257993384785, |
| "grad_norm": 0.2598075270652771, |
| "learning_rate": 0.0001814262023217247, |
| "loss": 0.4679, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.7943770672546858, |
| "grad_norm": 0.23368312418460846, |
| "learning_rate": 0.00018134328358208952, |
| "loss": 0.4627, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.794928335170893, |
| "grad_norm": 0.24804770946502686, |
| "learning_rate": 0.00018126036484245438, |
| "loss": 0.4663, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.7954796030871003, |
| "grad_norm": 0.22588974237442017, |
| "learning_rate": 0.0001811774461028192, |
| "loss": 0.4514, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.7960308710033076, |
| "grad_norm": 0.22374935448169708, |
| "learning_rate": 0.00018109452736318407, |
| "loss": 0.4552, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.7965821389195149, |
| "grad_norm": 0.24665199220180511, |
| "learning_rate": 0.0001810116086235489, |
| "loss": 0.4639, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.7971334068357222, |
| "grad_norm": 0.25782036781311035, |
| "learning_rate": 0.00018092868988391376, |
| "loss": 0.4592, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.7976846747519294, |
| "grad_norm": 0.21815195679664612, |
| "learning_rate": 0.0001808457711442786, |
| "loss": 0.4724, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.7982359426681367, |
| "grad_norm": 0.24236443638801575, |
| "learning_rate": 0.00018076285240464345, |
| "loss": 0.473, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.798787210584344, |
| "grad_norm": 0.23173320293426514, |
| "learning_rate": 0.00018067993366500826, |
| "loss": 0.4771, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.7993384785005513, |
| "grad_norm": 0.22303089499473572, |
| "learning_rate": 0.00018059701492537312, |
| "loss": 0.4545, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.7998897464167586, |
| "grad_norm": 0.23491422832012177, |
| "learning_rate": 0.00018051409618573795, |
| "loss": 0.4807, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.8004410143329658, |
| "grad_norm": 0.23925326764583588, |
| "learning_rate": 0.0001804311774461028, |
| "loss": 0.4705, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.8009922822491731, |
| "grad_norm": 0.2446267306804657, |
| "learning_rate": 0.00018034825870646764, |
| "loss": 0.4514, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.8015435501653804, |
| "grad_norm": 0.2514120936393738, |
| "learning_rate": 0.0001802653399668325, |
| "loss": 0.4823, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.8020948180815877, |
| "grad_norm": 0.2469882369041443, |
| "learning_rate": 0.00018018242122719733, |
| "loss": 0.45, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.802646085997795, |
| "grad_norm": 0.23653636872768402, |
| "learning_rate": 0.0001800995024875622, |
| "loss": 0.4649, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.8031973539140022, |
| "grad_norm": 0.22585710883140564, |
| "learning_rate": 0.00018001658374792702, |
| "loss": 0.4384, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.8037486218302095, |
| "grad_norm": 0.24817028641700745, |
| "learning_rate": 0.00017993366500829188, |
| "loss": 0.4739, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.8042998897464168, |
| "grad_norm": 0.25585106015205383, |
| "learning_rate": 0.0001798507462686567, |
| "loss": 0.4958, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.804851157662624, |
| "grad_norm": 0.25958600640296936, |
| "learning_rate": 0.00017976782752902155, |
| "loss": 0.4673, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.8054024255788313, |
| "grad_norm": 0.2447502166032791, |
| "learning_rate": 0.00017968490878938638, |
| "loss": 0.484, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.8059536934950385, |
| "grad_norm": 0.22878794372081757, |
| "learning_rate": 0.00017960199004975124, |
| "loss": 0.4832, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.8065049614112458, |
| "grad_norm": 0.24230952560901642, |
| "learning_rate": 0.00017951907131011607, |
| "loss": 0.4498, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.8070562293274531, |
| "grad_norm": 0.2345331311225891, |
| "learning_rate": 0.00017943615257048093, |
| "loss": 0.4529, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.8076074972436604, |
| "grad_norm": 0.2564900815486908, |
| "learning_rate": 0.00017935323383084576, |
| "loss": 0.4747, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.8081587651598677, |
| "grad_norm": 0.2226727157831192, |
| "learning_rate": 0.00017927031509121062, |
| "loss": 0.4453, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.8087100330760749, |
| "grad_norm": 0.26586976647377014, |
| "learning_rate": 0.00017918739635157545, |
| "loss": 0.5032, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.8092613009922822, |
| "grad_norm": 0.23573876917362213, |
| "learning_rate": 0.00017910447761194026, |
| "loss": 0.4674, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.8098125689084895, |
| "grad_norm": 0.24506725370883942, |
| "learning_rate": 0.00017902155887230512, |
| "loss": 0.4605, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.8103638368246968, |
| "grad_norm": 0.2386348396539688, |
| "learning_rate": 0.00017893864013266995, |
| "loss": 0.4618, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.8109151047409041, |
| "grad_norm": 0.24811455607414246, |
| "learning_rate": 0.0001788557213930348, |
| "loss": 0.4615, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.8114663726571113, |
| "grad_norm": 0.2334372103214264, |
| "learning_rate": 0.00017877280265339964, |
| "loss": 0.474, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.8120176405733186, |
| "grad_norm": 0.247808575630188, |
| "learning_rate": 0.0001786898839137645, |
| "loss": 0.4504, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.8125689084895259, |
| "grad_norm": 0.21028272807598114, |
| "learning_rate": 0.00017860696517412933, |
| "loss": 0.4425, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.8131201764057332, |
| "grad_norm": 0.22339411079883575, |
| "learning_rate": 0.0001785240464344942, |
| "loss": 0.449, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.8136714443219405, |
| "grad_norm": 0.23447810113430023, |
| "learning_rate": 0.00017844112769485902, |
| "loss": 0.4593, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.8142227122381478, |
| "grad_norm": 0.22381900250911713, |
| "learning_rate": 0.00017835820895522388, |
| "loss": 0.4603, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.814773980154355, |
| "grad_norm": 0.22677209973335266, |
| "learning_rate": 0.00017827529021558869, |
| "loss": 0.4525, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.8153252480705623, |
| "grad_norm": 0.2385341227054596, |
| "learning_rate": 0.00017819237147595354, |
| "loss": 0.49, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.8158765159867696, |
| "grad_norm": 0.24088934063911438, |
| "learning_rate": 0.00017810945273631838, |
| "loss": 0.4984, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.8164277839029769, |
| "grad_norm": 0.20627839863300323, |
| "learning_rate": 0.00017802653399668324, |
| "loss": 0.4597, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.8169790518191842, |
| "grad_norm": 0.2268056422472, |
| "learning_rate": 0.00017794361525704807, |
| "loss": 0.4581, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.8175303197353914, |
| "grad_norm": 0.24342721700668335, |
| "learning_rate": 0.00017786069651741293, |
| "loss": 0.4715, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.8180815876515987, |
| "grad_norm": 0.23494994640350342, |
| "learning_rate": 0.00017777777777777776, |
| "loss": 0.4859, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.818632855567806, |
| "grad_norm": 0.23297634720802307, |
| "learning_rate": 0.00017769485903814262, |
| "loss": 0.4644, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.8191841234840133, |
| "grad_norm": 0.24424344301223755, |
| "learning_rate": 0.00017761194029850745, |
| "loss": 0.456, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.8197353914002206, |
| "grad_norm": 0.2417961210012436, |
| "learning_rate": 0.0001775290215588723, |
| "loss": 0.5005, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.8202866593164277, |
| "grad_norm": 0.24089650809764862, |
| "learning_rate": 0.00017744610281923711, |
| "loss": 0.4953, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.820837927232635, |
| "grad_norm": 0.22983671724796295, |
| "learning_rate": 0.00017736318407960197, |
| "loss": 0.4544, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.8213891951488423, |
| "grad_norm": 0.20966455340385437, |
| "learning_rate": 0.0001772802653399668, |
| "loss": 0.4724, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.8219404630650496, |
| "grad_norm": 0.24843506515026093, |
| "learning_rate": 0.00017719734660033166, |
| "loss": 0.4799, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.8224917309812569, |
| "grad_norm": 0.22664618492126465, |
| "learning_rate": 0.0001771144278606965, |
| "loss": 0.4421, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.8230429988974641, |
| "grad_norm": 0.22813642024993896, |
| "learning_rate": 0.00017703150912106136, |
| "loss": 0.4622, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.8235942668136714, |
| "grad_norm": 0.2250567078590393, |
| "learning_rate": 0.0001769485903814262, |
| "loss": 0.4526, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.8241455347298787, |
| "grad_norm": 0.2317907065153122, |
| "learning_rate": 0.00017686567164179105, |
| "loss": 0.4743, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.824696802646086, |
| "grad_norm": 0.22760067880153656, |
| "learning_rate": 0.00017678275290215588, |
| "loss": 0.4765, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.8252480705622933, |
| "grad_norm": 0.21815039217472076, |
| "learning_rate": 0.00017669983416252074, |
| "loss": 0.4588, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.8257993384785005, |
| "grad_norm": 0.25006452202796936, |
| "learning_rate": 0.00017661691542288554, |
| "loss": 0.451, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.8263506063947078, |
| "grad_norm": 0.22310319542884827, |
| "learning_rate": 0.0001765339966832504, |
| "loss": 0.4754, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.8269018743109151, |
| "grad_norm": 0.26363706588745117, |
| "learning_rate": 0.00017645107794361523, |
| "loss": 0.4834, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.8269018743109151, |
| "eval_loss": 0.4649047255516052, |
| "eval_runtime": 312.7946, |
| "eval_samples_per_second": 3.724, |
| "eval_steps_per_second": 0.467, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.8274531422271224, |
| "grad_norm": 0.22052568197250366, |
| "learning_rate": 0.0001763681592039801, |
| "loss": 0.4931, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.8280044101433297, |
| "grad_norm": 0.23108328878879547, |
| "learning_rate": 0.00017628524046434493, |
| "loss": 0.4901, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.8285556780595369, |
| "grad_norm": 0.23075662553310394, |
| "learning_rate": 0.00017620232172470978, |
| "loss": 0.4484, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.8291069459757442, |
| "grad_norm": 0.24602019786834717, |
| "learning_rate": 0.00017611940298507462, |
| "loss": 0.4427, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.8296582138919515, |
| "grad_norm": 0.2438734471797943, |
| "learning_rate": 0.00017603648424543948, |
| "loss": 0.4731, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.8302094818081588, |
| "grad_norm": 0.23441627621650696, |
| "learning_rate": 0.0001759535655058043, |
| "loss": 0.4628, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.8307607497243661, |
| "grad_norm": 0.23310305178165436, |
| "learning_rate": 0.00017587064676616917, |
| "loss": 0.4929, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.8313120176405733, |
| "grad_norm": 0.25448939204216003, |
| "learning_rate": 0.00017578772802653397, |
| "loss": 0.4851, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.8318632855567806, |
| "grad_norm": 0.2438756674528122, |
| "learning_rate": 0.00017570480928689883, |
| "loss": 0.4706, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.8324145534729879, |
| "grad_norm": 0.25436931848526, |
| "learning_rate": 0.00017562189054726366, |
| "loss": 0.4869, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.8329658213891952, |
| "grad_norm": 0.22301998734474182, |
| "learning_rate": 0.0001755389718076285, |
| "loss": 0.4593, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.8335170893054025, |
| "grad_norm": 0.24233976006507874, |
| "learning_rate": 0.00017545605306799335, |
| "loss": 0.5016, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.8340683572216097, |
| "grad_norm": 0.22516629099845886, |
| "learning_rate": 0.00017537313432835819, |
| "loss": 0.4732, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.834619625137817, |
| "grad_norm": 0.22612155973911285, |
| "learning_rate": 0.00017529021558872305, |
| "loss": 0.4625, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.8351708930540243, |
| "grad_norm": 0.23177853226661682, |
| "learning_rate": 0.00017520729684908785, |
| "loss": 0.4776, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.8357221609702316, |
| "grad_norm": 0.24279583990573883, |
| "learning_rate": 0.00017512437810945274, |
| "loss": 0.4721, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.8362734288864389, |
| "grad_norm": 0.23456443846225739, |
| "learning_rate": 0.00017504145936981754, |
| "loss": 0.4635, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.836824696802646, |
| "grad_norm": 0.23287171125411987, |
| "learning_rate": 0.0001749585406301824, |
| "loss": 0.4739, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.8373759647188533, |
| "grad_norm": 0.22415684163570404, |
| "learning_rate": 0.00017487562189054723, |
| "loss": 0.4769, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.8379272326350606, |
| "grad_norm": 0.2180211991071701, |
| "learning_rate": 0.0001747927031509121, |
| "loss": 0.4388, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.8384785005512679, |
| "grad_norm": 0.2260761708021164, |
| "learning_rate": 0.00017470978441127692, |
| "loss": 0.4972, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.8390297684674752, |
| "grad_norm": 0.22887657582759857, |
| "learning_rate": 0.00017462686567164178, |
| "loss": 0.4554, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.8395810363836824, |
| "grad_norm": 0.241640105843544, |
| "learning_rate": 0.00017454394693200662, |
| "loss": 0.4732, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.8401323042998897, |
| "grad_norm": 0.2288465052843094, |
| "learning_rate": 0.00017446102819237147, |
| "loss": 0.4527, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.840683572216097, |
| "grad_norm": 0.23457041382789612, |
| "learning_rate": 0.00017437810945273628, |
| "loss": 0.4574, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.8412348401323043, |
| "grad_norm": 0.25197815895080566, |
| "learning_rate": 0.00017429519071310114, |
| "loss": 0.4597, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.8417861080485116, |
| "grad_norm": 0.2385404258966446, |
| "learning_rate": 0.00017421227197346597, |
| "loss": 0.4649, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.8423373759647188, |
| "grad_norm": 0.23451651632785797, |
| "learning_rate": 0.00017412935323383083, |
| "loss": 0.4646, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.8428886438809261, |
| "grad_norm": 0.2421046793460846, |
| "learning_rate": 0.00017404643449419566, |
| "loss": 0.4852, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.8434399117971334, |
| "grad_norm": 0.25406989455223083, |
| "learning_rate": 0.00017396351575456052, |
| "loss": 0.4804, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.8439911797133407, |
| "grad_norm": 0.24752497673034668, |
| "learning_rate": 0.00017388059701492535, |
| "loss": 0.4777, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.844542447629548, |
| "grad_norm": 0.226281076669693, |
| "learning_rate": 0.0001737976782752902, |
| "loss": 0.4747, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.8450937155457552, |
| "grad_norm": 0.2519485652446747, |
| "learning_rate": 0.00017371475953565504, |
| "loss": 0.4639, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.8456449834619625, |
| "grad_norm": 0.2347985804080963, |
| "learning_rate": 0.0001736318407960199, |
| "loss": 0.4715, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.8461962513781698, |
| "grad_norm": 0.24425053596496582, |
| "learning_rate": 0.0001735489220563847, |
| "loss": 0.445, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.8467475192943771, |
| "grad_norm": 0.2559725046157837, |
| "learning_rate": 0.00017346600331674957, |
| "loss": 0.49, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.8472987872105844, |
| "grad_norm": 0.23750551044940948, |
| "learning_rate": 0.0001733830845771144, |
| "loss": 0.4663, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.8478500551267916, |
| "grad_norm": 0.22861897945404053, |
| "learning_rate": 0.00017330016583747926, |
| "loss": 0.45, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.8484013230429989, |
| "grad_norm": 0.24839669466018677, |
| "learning_rate": 0.0001732172470978441, |
| "loss": 0.4856, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.8489525909592062, |
| "grad_norm": 0.23960521817207336, |
| "learning_rate": 0.00017313432835820895, |
| "loss": 0.4933, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.8495038588754135, |
| "grad_norm": 0.23533576726913452, |
| "learning_rate": 0.00017305140961857378, |
| "loss": 0.4698, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.8500551267916208, |
| "grad_norm": 0.23979732394218445, |
| "learning_rate": 0.00017296849087893864, |
| "loss": 0.4953, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.850606394707828, |
| "grad_norm": 0.24841150641441345, |
| "learning_rate": 0.00017288557213930347, |
| "loss": 0.4845, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.8511576626240352, |
| "grad_norm": 0.22132597863674164, |
| "learning_rate": 0.00017280265339966833, |
| "loss": 0.4643, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.8517089305402425, |
| "grad_norm": 0.22431734204292297, |
| "learning_rate": 0.00017271973466003314, |
| "loss": 0.4547, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.8522601984564498, |
| "grad_norm": 0.22704413533210754, |
| "learning_rate": 0.000172636815920398, |
| "loss": 0.4665, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.8528114663726571, |
| "grad_norm": 0.22971755266189575, |
| "learning_rate": 0.00017255389718076283, |
| "loss": 0.4709, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.8533627342888643, |
| "grad_norm": 0.2435724288225174, |
| "learning_rate": 0.0001724709784411277, |
| "loss": 0.4733, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.8539140022050716, |
| "grad_norm": 0.24051538109779358, |
| "learning_rate": 0.00017238805970149252, |
| "loss": 0.4695, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.8544652701212789, |
| "grad_norm": 0.26592954993247986, |
| "learning_rate": 0.00017230514096185738, |
| "loss": 0.4683, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.8550165380374862, |
| "grad_norm": 0.24452587962150574, |
| "learning_rate": 0.0001722222222222222, |
| "loss": 0.4623, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.8555678059536935, |
| "grad_norm": 0.23351791501045227, |
| "learning_rate": 0.00017213930348258707, |
| "loss": 0.4559, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.8561190738699008, |
| "grad_norm": 0.23652702569961548, |
| "learning_rate": 0.0001720563847429519, |
| "loss": 0.4507, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.856670341786108, |
| "grad_norm": 0.22390702366828918, |
| "learning_rate": 0.00017197346600331676, |
| "loss": 0.4521, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.8572216097023153, |
| "grad_norm": 0.24590735137462616, |
| "learning_rate": 0.00017189054726368157, |
| "loss": 0.4712, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.8577728776185226, |
| "grad_norm": 0.21954110264778137, |
| "learning_rate": 0.0001718076285240464, |
| "loss": 0.4447, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.8583241455347299, |
| "grad_norm": 0.23404909670352936, |
| "learning_rate": 0.00017172470978441126, |
| "loss": 0.4699, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.8588754134509372, |
| "grad_norm": 0.24352899193763733, |
| "learning_rate": 0.0001716417910447761, |
| "loss": 0.4904, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.8594266813671444, |
| "grad_norm": 0.30317431688308716, |
| "learning_rate": 0.00017155887230514095, |
| "loss": 0.4606, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.8599779492833517, |
| "grad_norm": 0.22517681121826172, |
| "learning_rate": 0.00017147595356550578, |
| "loss": 0.4892, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.860529217199559, |
| "grad_norm": 0.23503634333610535, |
| "learning_rate": 0.00017139303482587064, |
| "loss": 0.4755, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.8610804851157663, |
| "grad_norm": 0.22381718456745148, |
| "learning_rate": 0.00017131011608623547, |
| "loss": 0.4492, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.8616317530319736, |
| "grad_norm": 0.24450813233852386, |
| "learning_rate": 0.00017122719734660033, |
| "loss": 0.4764, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.8621830209481808, |
| "grad_norm": 0.2357473075389862, |
| "learning_rate": 0.00017114427860696513, |
| "loss": 0.4727, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.8627342888643881, |
| "grad_norm": 0.22676219046115875, |
| "learning_rate": 0.00017106135986733, |
| "loss": 0.454, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.8632855567805954, |
| "grad_norm": 0.24174387753009796, |
| "learning_rate": 0.00017097844112769483, |
| "loss": 0.4451, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.8638368246968027, |
| "grad_norm": 0.24716874957084656, |
| "learning_rate": 0.00017089552238805969, |
| "loss": 0.4639, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.86438809261301, |
| "grad_norm": 0.24672383069992065, |
| "learning_rate": 0.00017081260364842452, |
| "loss": 0.4811, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.8649393605292172, |
| "grad_norm": 0.2504035234451294, |
| "learning_rate": 0.00017072968490878938, |
| "loss": 0.4715, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.8654906284454245, |
| "grad_norm": 0.2296275794506073, |
| "learning_rate": 0.0001706467661691542, |
| "loss": 0.4552, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.8660418963616318, |
| "grad_norm": 0.24308894574642181, |
| "learning_rate": 0.00017056384742951907, |
| "loss": 0.4798, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.8665931642778391, |
| "grad_norm": 0.25587549805641174, |
| "learning_rate": 0.0001704809286898839, |
| "loss": 0.473, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.8671444321940464, |
| "grad_norm": 0.22006462514400482, |
| "learning_rate": 0.00017039800995024876, |
| "loss": 0.4512, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.8676957001102535, |
| "grad_norm": 0.2469773143529892, |
| "learning_rate": 0.00017031509121061356, |
| "loss": 0.4651, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.8682469680264608, |
| "grad_norm": 0.23426435887813568, |
| "learning_rate": 0.00017023217247097842, |
| "loss": 0.4658, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.8687982359426681, |
| "grad_norm": 0.2696544826030731, |
| "learning_rate": 0.00017014925373134325, |
| "loss": 0.4555, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.8693495038588754, |
| "grad_norm": 0.24263867735862732, |
| "learning_rate": 0.00017006633499170811, |
| "loss": 0.4426, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.8699007717750827, |
| "grad_norm": 0.24693246185779572, |
| "learning_rate": 0.00016998341625207295, |
| "loss": 0.4876, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.8704520396912899, |
| "grad_norm": 0.24460558593273163, |
| "learning_rate": 0.0001699004975124378, |
| "loss": 0.4704, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.8710033076074972, |
| "grad_norm": 0.2212182730436325, |
| "learning_rate": 0.00016981757877280264, |
| "loss": 0.4496, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.8715545755237045, |
| "grad_norm": 0.23751485347747803, |
| "learning_rate": 0.0001697346600331675, |
| "loss": 0.4546, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.8721058434399118, |
| "grad_norm": 0.2521110475063324, |
| "learning_rate": 0.00016965174129353233, |
| "loss": 0.4706, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.8726571113561191, |
| "grad_norm": 0.24147383868694305, |
| "learning_rate": 0.0001695688225538972, |
| "loss": 0.4519, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.8732083792723263, |
| "grad_norm": 0.2279898077249527, |
| "learning_rate": 0.000169485903814262, |
| "loss": 0.4648, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.8737596471885336, |
| "grad_norm": 0.24053026735782623, |
| "learning_rate": 0.00016940298507462685, |
| "loss": 0.4747, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.8743109151047409, |
| "grad_norm": 0.24321089684963226, |
| "learning_rate": 0.00016932006633499168, |
| "loss": 0.4562, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.8748621830209482, |
| "grad_norm": 0.2396124303340912, |
| "learning_rate": 0.00016923714759535654, |
| "loss": 0.4631, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.8754134509371555, |
| "grad_norm": 0.23284991085529327, |
| "learning_rate": 0.00016915422885572137, |
| "loss": 0.4452, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.8759647188533627, |
| "grad_norm": 0.2377912849187851, |
| "learning_rate": 0.00016907131011608623, |
| "loss": 0.4471, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.87651598676957, |
| "grad_norm": 0.23828253149986267, |
| "learning_rate": 0.00016898839137645107, |
| "loss": 0.4463, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.8770672546857773, |
| "grad_norm": 0.24640867114067078, |
| "learning_rate": 0.00016890547263681593, |
| "loss": 0.4776, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.8776185226019846, |
| "grad_norm": 0.24699927866458893, |
| "learning_rate": 0.00016882255389718076, |
| "loss": 0.437, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.8781697905181919, |
| "grad_norm": 0.24521562457084656, |
| "learning_rate": 0.00016873963515754562, |
| "loss": 0.4805, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.8787210584343991, |
| "grad_norm": 0.2375350147485733, |
| "learning_rate": 0.00016865671641791042, |
| "loss": 0.4835, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.8792723263506064, |
| "grad_norm": 0.23784852027893066, |
| "learning_rate": 0.00016857379767827528, |
| "loss": 0.49, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.8798235942668137, |
| "grad_norm": 0.23371200263500214, |
| "learning_rate": 0.0001684908789386401, |
| "loss": 0.4701, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.880374862183021, |
| "grad_norm": 0.23373621702194214, |
| "learning_rate": 0.00016840796019900497, |
| "loss": 0.4765, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.8809261300992283, |
| "grad_norm": 0.25964394211769104, |
| "learning_rate": 0.0001683250414593698, |
| "loss": 0.4505, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.8814773980154355, |
| "grad_norm": 0.2420414835214615, |
| "learning_rate": 0.00016824212271973464, |
| "loss": 0.5, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.8820286659316428, |
| "grad_norm": 0.24534733593463898, |
| "learning_rate": 0.0001681592039800995, |
| "loss": 0.4625, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.88257993384785, |
| "grad_norm": 0.22338466346263885, |
| "learning_rate": 0.00016807628524046433, |
| "loss": 0.4383, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.8831312017640573, |
| "grad_norm": 0.24304436147212982, |
| "learning_rate": 0.00016799336650082919, |
| "loss": 0.4717, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.8836824696802646, |
| "grad_norm": 0.24378708004951477, |
| "learning_rate": 0.000167910447761194, |
| "loss": 0.4732, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.8842337375964718, |
| "grad_norm": 0.22068338096141815, |
| "learning_rate": 0.00016782752902155885, |
| "loss": 0.4709, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.8847850055126791, |
| "grad_norm": 0.25752487778663635, |
| "learning_rate": 0.00016774461028192368, |
| "loss": 0.4571, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.8853362734288864, |
| "grad_norm": 0.21915499866008759, |
| "learning_rate": 0.00016766169154228854, |
| "loss": 0.4551, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.8858875413450937, |
| "grad_norm": 0.220630943775177, |
| "learning_rate": 0.00016757877280265337, |
| "loss": 0.4336, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.886438809261301, |
| "grad_norm": 0.2279721051454544, |
| "learning_rate": 0.00016749585406301823, |
| "loss": 0.4546, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.8869900771775082, |
| "grad_norm": 0.23162703216075897, |
| "learning_rate": 0.00016741293532338306, |
| "loss": 0.4596, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.8875413450937155, |
| "grad_norm": 0.22968967258930206, |
| "learning_rate": 0.00016733001658374792, |
| "loss": 0.4457, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.8880926130099228, |
| "grad_norm": 0.23839277029037476, |
| "learning_rate": 0.00016724709784411276, |
| "loss": 0.444, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.8886438809261301, |
| "grad_norm": 0.2291092872619629, |
| "learning_rate": 0.00016716417910447761, |
| "loss": 0.4796, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.8891951488423374, |
| "grad_norm": 0.2277524322271347, |
| "learning_rate": 0.00016708126036484242, |
| "loss": 0.4373, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.8897464167585446, |
| "grad_norm": 0.24553948640823364, |
| "learning_rate": 0.00016699834162520728, |
| "loss": 0.4948, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.8902976846747519, |
| "grad_norm": 0.21850357949733734, |
| "learning_rate": 0.0001669154228855721, |
| "loss": 0.4575, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.8908489525909592, |
| "grad_norm": 0.23171943426132202, |
| "learning_rate": 0.00016683250414593697, |
| "loss": 0.4947, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.8914002205071665, |
| "grad_norm": 0.22626076638698578, |
| "learning_rate": 0.0001667495854063018, |
| "loss": 0.4619, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.8919514884233738, |
| "grad_norm": 0.23768572509288788, |
| "learning_rate": 0.00016666666666666666, |
| "loss": 0.4535, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.892502756339581, |
| "grad_norm": 0.2264167070388794, |
| "learning_rate": 0.0001665837479270315, |
| "loss": 0.467, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.8930540242557883, |
| "grad_norm": 0.2234300673007965, |
| "learning_rate": 0.00016650082918739635, |
| "loss": 0.4331, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.8936052921719956, |
| "grad_norm": 0.22206327319145203, |
| "learning_rate": 0.00016641791044776118, |
| "loss": 0.4442, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.8941565600882029, |
| "grad_norm": 0.22858171164989471, |
| "learning_rate": 0.00016633499170812604, |
| "loss": 0.4611, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.8947078280044102, |
| "grad_norm": 0.24421337246894836, |
| "learning_rate": 0.00016625207296849085, |
| "loss": 0.4551, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.8952590959206174, |
| "grad_norm": 0.20711436867713928, |
| "learning_rate": 0.0001661691542288557, |
| "loss": 0.4555, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.8958103638368247, |
| "grad_norm": 0.22994433343410492, |
| "learning_rate": 0.00016608623548922054, |
| "loss": 0.4745, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.896361631753032, |
| "grad_norm": 0.22984014451503754, |
| "learning_rate": 0.0001660033167495854, |
| "loss": 0.4613, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.8969128996692393, |
| "grad_norm": 0.2339726984500885, |
| "learning_rate": 0.00016592039800995023, |
| "loss": 0.469, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.8974641675854466, |
| "grad_norm": 0.23884552717208862, |
| "learning_rate": 0.0001658374792703151, |
| "loss": 0.4812, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.8980154355016539, |
| "grad_norm": 0.23677459359169006, |
| "learning_rate": 0.00016575456053067992, |
| "loss": 0.471, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.898566703417861, |
| "grad_norm": 0.22945214807987213, |
| "learning_rate": 0.00016567164179104478, |
| "loss": 0.4666, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.8991179713340683, |
| "grad_norm": 0.231664776802063, |
| "learning_rate": 0.0001655887230514096, |
| "loss": 0.4657, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.8996692392502756, |
| "grad_norm": 0.22424204647541046, |
| "learning_rate": 0.00016550580431177447, |
| "loss": 0.4682, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.9002205071664829, |
| "grad_norm": 0.23469983041286469, |
| "learning_rate": 0.00016542288557213928, |
| "loss": 0.4761, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.9007717750826902, |
| "grad_norm": 0.2397875040769577, |
| "learning_rate": 0.00016533996683250414, |
| "loss": 0.4763, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.9013230429988974, |
| "grad_norm": 0.21035277843475342, |
| "learning_rate": 0.00016525704809286897, |
| "loss": 0.4225, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.9018743109151047, |
| "grad_norm": 0.24221475422382355, |
| "learning_rate": 0.00016517412935323383, |
| "loss": 0.4666, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.902425578831312, |
| "grad_norm": 0.22903227806091309, |
| "learning_rate": 0.00016509121061359866, |
| "loss": 0.4699, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.9029768467475193, |
| "grad_norm": 0.23368406295776367, |
| "learning_rate": 0.00016500829187396352, |
| "loss": 0.4763, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.9035281146637266, |
| "grad_norm": 0.2397768199443817, |
| "learning_rate": 0.00016492537313432835, |
| "loss": 0.4552, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.9040793825799338, |
| "grad_norm": 0.24322962760925293, |
| "learning_rate": 0.0001648424543946932, |
| "loss": 0.4441, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.9046306504961411, |
| "grad_norm": 0.21771124005317688, |
| "learning_rate": 0.00016475953565505801, |
| "loss": 0.4635, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.9051819184123484, |
| "grad_norm": 0.21717268228530884, |
| "learning_rate": 0.0001646766169154229, |
| "loss": 0.4459, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.9057331863285557, |
| "grad_norm": 0.23191964626312256, |
| "learning_rate": 0.0001645936981757877, |
| "loss": 0.4605, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.906284454244763, |
| "grad_norm": 0.24638865888118744, |
| "learning_rate": 0.00016451077943615254, |
| "loss": 0.4477, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.9068357221609702, |
| "grad_norm": 0.24050134420394897, |
| "learning_rate": 0.0001644278606965174, |
| "loss": 0.4389, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.9073869900771775, |
| "grad_norm": 0.23574888706207275, |
| "learning_rate": 0.00016434494195688223, |
| "loss": 0.4556, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.9079382579933848, |
| "grad_norm": 0.23960547149181366, |
| "learning_rate": 0.0001642620232172471, |
| "loss": 0.4599, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.9084895259095921, |
| "grad_norm": 0.22923794388771057, |
| "learning_rate": 0.00016417910447761192, |
| "loss": 0.4566, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.9090407938257994, |
| "grad_norm": 0.23294423520565033, |
| "learning_rate": 0.00016409618573797678, |
| "loss": 0.4726, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.9095920617420066, |
| "grad_norm": 0.24964945018291473, |
| "learning_rate": 0.00016401326699834158, |
| "loss": 0.483, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.9101433296582139, |
| "grad_norm": 0.22729866206645966, |
| "learning_rate": 0.00016393034825870644, |
| "loss": 0.4708, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.9106945975744212, |
| "grad_norm": 0.22324109077453613, |
| "learning_rate": 0.00016384742951907128, |
| "loss": 0.4798, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.9112458654906285, |
| "grad_norm": 0.2301269918680191, |
| "learning_rate": 0.00016376451077943613, |
| "loss": 0.4659, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.9117971334068358, |
| "grad_norm": 0.26973679661750793, |
| "learning_rate": 0.00016368159203980097, |
| "loss": 0.4743, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.912348401323043, |
| "grad_norm": 0.2236243188381195, |
| "learning_rate": 0.00016359867330016583, |
| "loss": 0.4464, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.9128996692392503, |
| "grad_norm": 0.23898382484912872, |
| "learning_rate": 0.00016351575456053066, |
| "loss": 0.4715, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.9134509371554576, |
| "grad_norm": 0.226115882396698, |
| "learning_rate": 0.00016343283582089552, |
| "loss": 0.452, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.9140022050716649, |
| "grad_norm": 0.24120070040225983, |
| "learning_rate": 0.00016334991708126035, |
| "loss": 0.4594, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.9145534729878722, |
| "grad_norm": 0.2507602870464325, |
| "learning_rate": 0.0001632669983416252, |
| "loss": 0.4759, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.9151047409040793, |
| "grad_norm": 0.26350581645965576, |
| "learning_rate": 0.00016318407960199, |
| "loss": 0.4553, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.9156560088202866, |
| "grad_norm": 0.23043513298034668, |
| "learning_rate": 0.00016310116086235487, |
| "loss": 0.4754, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.9162072767364939, |
| "grad_norm": 0.22888733446598053, |
| "learning_rate": 0.0001630182421227197, |
| "loss": 0.4602, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.9167585446527012, |
| "grad_norm": 0.23566976189613342, |
| "learning_rate": 0.00016293532338308456, |
| "loss": 0.4492, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.9173098125689085, |
| "grad_norm": 0.2403411716222763, |
| "learning_rate": 0.0001628524046434494, |
| "loss": 0.4529, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.9178610804851157, |
| "grad_norm": 0.24615786969661713, |
| "learning_rate": 0.00016276948590381425, |
| "loss": 0.4688, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.918412348401323, |
| "grad_norm": 0.2582218647003174, |
| "learning_rate": 0.0001626865671641791, |
| "loss": 0.4626, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.9189636163175303, |
| "grad_norm": 0.2405799925327301, |
| "learning_rate": 0.00016260364842454395, |
| "loss": 0.4529, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.9195148842337376, |
| "grad_norm": 0.2288394719362259, |
| "learning_rate": 0.00016252072968490878, |
| "loss": 0.4513, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.9200661521499449, |
| "grad_norm": 0.22039665281772614, |
| "learning_rate": 0.00016243781094527364, |
| "loss": 0.4636, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.9206174200661521, |
| "grad_norm": 0.2359505444765091, |
| "learning_rate": 0.00016235489220563844, |
| "loss": 0.4703, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.9211686879823594, |
| "grad_norm": 0.25222134590148926, |
| "learning_rate": 0.0001622719734660033, |
| "loss": 0.4729, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.9217199558985667, |
| "grad_norm": 0.24714909493923187, |
| "learning_rate": 0.00016218905472636813, |
| "loss": 0.4376, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.922271223814774, |
| "grad_norm": 0.271454781293869, |
| "learning_rate": 0.000162106135986733, |
| "loss": 0.4771, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.9228224917309813, |
| "grad_norm": 0.2408027946949005, |
| "learning_rate": 0.00016202321724709782, |
| "loss": 0.4581, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.9233737596471885, |
| "grad_norm": 0.25041836500167847, |
| "learning_rate": 0.00016194029850746268, |
| "loss": 0.4685, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.9239250275633958, |
| "grad_norm": 0.2697443664073944, |
| "learning_rate": 0.00016185737976782752, |
| "loss": 0.4905, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.9244762954796031, |
| "grad_norm": 0.261924684047699, |
| "learning_rate": 0.00016177446102819237, |
| "loss": 0.5045, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.9250275633958104, |
| "grad_norm": 0.23671838641166687, |
| "learning_rate": 0.0001616915422885572, |
| "loss": 0.4477, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.9255788313120177, |
| "grad_norm": 0.26420533657073975, |
| "learning_rate": 0.00016160862354892207, |
| "loss": 0.4922, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.9261300992282249, |
| "grad_norm": 0.2353939265012741, |
| "learning_rate": 0.00016152570480928687, |
| "loss": 0.4434, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.9266813671444322, |
| "grad_norm": 0.23843790590763092, |
| "learning_rate": 0.00016144278606965173, |
| "loss": 0.4567, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.9272326350606395, |
| "grad_norm": 0.22744010388851166, |
| "learning_rate": 0.00016135986733001656, |
| "loss": 0.4607, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.9277839029768468, |
| "grad_norm": 0.2599264979362488, |
| "learning_rate": 0.00016127694859038142, |
| "loss": 0.4839, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.9283351708930541, |
| "grad_norm": 0.2337629646062851, |
| "learning_rate": 0.00016119402985074625, |
| "loss": 0.4697, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.9288864388092613, |
| "grad_norm": 0.2365848571062088, |
| "learning_rate": 0.0001611111111111111, |
| "loss": 0.4589, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.9294377067254685, |
| "grad_norm": 0.22954298555850983, |
| "learning_rate": 0.00016102819237147594, |
| "loss": 0.4071, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.9299889746416758, |
| "grad_norm": 0.22945284843444824, |
| "learning_rate": 0.00016094527363184078, |
| "loss": 0.4432, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.9305402425578831, |
| "grad_norm": 0.2274722009897232, |
| "learning_rate": 0.00016086235489220564, |
| "loss": 0.4537, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.9310915104740904, |
| "grad_norm": 0.23572379350662231, |
| "learning_rate": 0.00016077943615257044, |
| "loss": 0.4621, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.9316427783902976, |
| "grad_norm": 0.2582686245441437, |
| "learning_rate": 0.0001606965174129353, |
| "loss": 0.4845, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.9321940463065049, |
| "grad_norm": 0.252638578414917, |
| "learning_rate": 0.00016061359867330013, |
| "loss": 0.4583, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.9327453142227122, |
| "grad_norm": 0.24242907762527466, |
| "learning_rate": 0.000160530679933665, |
| "loss": 0.4659, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.9332965821389195, |
| "grad_norm": 0.25426262617111206, |
| "learning_rate": 0.00016044776119402982, |
| "loss": 0.4615, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.9338478500551268, |
| "grad_norm": 0.2503727972507477, |
| "learning_rate": 0.00016036484245439468, |
| "loss": 0.4732, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.934399117971334, |
| "grad_norm": 0.23591485619544983, |
| "learning_rate": 0.00016028192371475951, |
| "loss": 0.4865, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.9349503858875413, |
| "grad_norm": 0.2307887077331543, |
| "learning_rate": 0.00016019900497512437, |
| "loss": 0.4694, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.9355016538037486, |
| "grad_norm": 0.24209177494049072, |
| "learning_rate": 0.0001601160862354892, |
| "loss": 0.4716, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.9360529217199559, |
| "grad_norm": 0.23071332275867462, |
| "learning_rate": 0.00016003316749585406, |
| "loss": 0.4548, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.9366041896361632, |
| "grad_norm": 0.2404324859380722, |
| "learning_rate": 0.00015995024875621887, |
| "loss": 0.4614, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.9371554575523704, |
| "grad_norm": 0.24288049340248108, |
| "learning_rate": 0.00015986733001658373, |
| "loss": 0.477, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.9377067254685777, |
| "grad_norm": 0.2315543293952942, |
| "learning_rate": 0.00015978441127694856, |
| "loss": 0.4294, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.938257993384785, |
| "grad_norm": 0.24326400458812714, |
| "learning_rate": 0.00015970149253731342, |
| "loss": 0.4751, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.9388092613009923, |
| "grad_norm": 0.23202817142009735, |
| "learning_rate": 0.00015961857379767825, |
| "loss": 0.4539, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.9393605292171996, |
| "grad_norm": 0.24364544451236725, |
| "learning_rate": 0.0001595356550580431, |
| "loss": 0.4742, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.9399117971334069, |
| "grad_norm": 0.24248524010181427, |
| "learning_rate": 0.00015945273631840794, |
| "loss": 0.4335, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.9404630650496141, |
| "grad_norm": 0.2423916757106781, |
| "learning_rate": 0.0001593698175787728, |
| "loss": 0.4825, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.9410143329658214, |
| "grad_norm": 0.22844377160072327, |
| "learning_rate": 0.00015928689883913763, |
| "loss": 0.468, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.9415656008820287, |
| "grad_norm": 0.23481746017932892, |
| "learning_rate": 0.0001592039800995025, |
| "loss": 0.459, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.942116868798236, |
| "grad_norm": 0.23676711320877075, |
| "learning_rate": 0.0001591210613598673, |
| "loss": 0.4748, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.9426681367144433, |
| "grad_norm": 0.23470185697078705, |
| "learning_rate": 0.00015903814262023216, |
| "loss": 0.4538, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.9432194046306505, |
| "grad_norm": 0.26180773973464966, |
| "learning_rate": 0.000158955223880597, |
| "loss": 0.4737, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.9437706725468578, |
| "grad_norm": 0.23656126856803894, |
| "learning_rate": 0.00015887230514096185, |
| "loss": 0.4716, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.9443219404630651, |
| "grad_norm": 0.2338191270828247, |
| "learning_rate": 0.00015878938640132668, |
| "loss": 0.4712, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.9448732083792724, |
| "grad_norm": 0.2348823845386505, |
| "learning_rate": 0.00015870646766169154, |
| "loss": 0.4645, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.9454244762954797, |
| "grad_norm": 0.23620596528053284, |
| "learning_rate": 0.00015862354892205637, |
| "loss": 0.4456, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.9459757442116868, |
| "grad_norm": 0.25021445751190186, |
| "learning_rate": 0.00015854063018242123, |
| "loss": 0.4807, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.9465270121278941, |
| "grad_norm": 0.23087383806705475, |
| "learning_rate": 0.00015845771144278606, |
| "loss": 0.4648, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.9470782800441014, |
| "grad_norm": 0.23474477231502533, |
| "learning_rate": 0.00015837479270315092, |
| "loss": 0.4672, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.9476295479603087, |
| "grad_norm": 0.2543323338031769, |
| "learning_rate": 0.00015829187396351573, |
| "loss": 0.473, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.948180815876516, |
| "grad_norm": 0.2378506064414978, |
| "learning_rate": 0.00015820895522388059, |
| "loss": 0.4569, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.9487320837927232, |
| "grad_norm": 0.23003467917442322, |
| "learning_rate": 0.00015812603648424542, |
| "loss": 0.4621, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.9492833517089305, |
| "grad_norm": 0.24162529408931732, |
| "learning_rate": 0.00015804311774461028, |
| "loss": 0.445, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.9498346196251378, |
| "grad_norm": 0.23978053033351898, |
| "learning_rate": 0.0001579601990049751, |
| "loss": 0.4753, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.9503858875413451, |
| "grad_norm": 0.23133328557014465, |
| "learning_rate": 0.00015787728026533997, |
| "loss": 0.4735, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.9509371554575524, |
| "grad_norm": 0.20942679047584534, |
| "learning_rate": 0.0001577943615257048, |
| "loss": 0.4208, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.9514884233737596, |
| "grad_norm": 0.23965676128864288, |
| "learning_rate": 0.00015771144278606966, |
| "loss": 0.4758, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.9520396912899669, |
| "grad_norm": 0.23537394404411316, |
| "learning_rate": 0.0001576285240464345, |
| "loss": 0.4276, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.9525909592061742, |
| "grad_norm": 0.24360457062721252, |
| "learning_rate": 0.00015754560530679935, |
| "loss": 0.4686, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.9531422271223815, |
| "grad_norm": 0.22790101170539856, |
| "learning_rate": 0.00015746268656716416, |
| "loss": 0.4501, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.9536934950385888, |
| "grad_norm": 0.23862150311470032, |
| "learning_rate": 0.00015737976782752901, |
| "loss": 0.4545, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.954244762954796, |
| "grad_norm": 0.24378471076488495, |
| "learning_rate": 0.00015729684908789385, |
| "loss": 0.4912, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.9547960308710033, |
| "grad_norm": 0.23474174737930298, |
| "learning_rate": 0.00015721393034825868, |
| "loss": 0.4692, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.9553472987872106, |
| "grad_norm": 0.24299736320972443, |
| "learning_rate": 0.00015713101160862354, |
| "loss": 0.4582, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.9558985667034179, |
| "grad_norm": 0.23355722427368164, |
| "learning_rate": 0.00015704809286898837, |
| "loss": 0.4579, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.9564498346196252, |
| "grad_norm": 0.2307385504245758, |
| "learning_rate": 0.00015696517412935323, |
| "loss": 0.4276, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.9570011025358324, |
| "grad_norm": 0.25666573643684387, |
| "learning_rate": 0.00015688225538971806, |
| "loss": 0.4488, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.9575523704520397, |
| "grad_norm": 0.2472536265850067, |
| "learning_rate": 0.00015679933665008292, |
| "loss": 0.4635, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.958103638368247, |
| "grad_norm": 0.23561540246009827, |
| "learning_rate": 0.00015671641791044772, |
| "loss": 0.456, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.9586549062844543, |
| "grad_norm": 0.2695865333080292, |
| "learning_rate": 0.00015663349917081258, |
| "loss": 0.4894, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.9592061742006616, |
| "grad_norm": 0.23878848552703857, |
| "learning_rate": 0.00015655058043117742, |
| "loss": 0.4945, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.9597574421168688, |
| "grad_norm": 0.2417537271976471, |
| "learning_rate": 0.00015646766169154228, |
| "loss": 0.4456, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.960308710033076, |
| "grad_norm": 0.258645623922348, |
| "learning_rate": 0.0001563847429519071, |
| "loss": 0.4767, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.9608599779492834, |
| "grad_norm": 0.23502197861671448, |
| "learning_rate": 0.00015630182421227197, |
| "loss": 0.4636, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.9614112458654906, |
| "grad_norm": 0.22951334714889526, |
| "learning_rate": 0.0001562189054726368, |
| "loss": 0.4329, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.961962513781698, |
| "grad_norm": 0.24502499401569366, |
| "learning_rate": 0.00015613598673300166, |
| "loss": 0.4452, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.9625137816979051, |
| "grad_norm": 0.24659104645252228, |
| "learning_rate": 0.0001560530679933665, |
| "loss": 0.4489, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.9630650496141124, |
| "grad_norm": 0.2458224892616272, |
| "learning_rate": 0.00015597014925373135, |
| "loss": 0.4903, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.9636163175303197, |
| "grad_norm": 0.24105043709278107, |
| "learning_rate": 0.00015588723051409615, |
| "loss": 0.4738, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.964167585446527, |
| "grad_norm": 0.2505391836166382, |
| "learning_rate": 0.000155804311774461, |
| "loss": 0.4643, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.9647188533627343, |
| "grad_norm": 0.23488488793373108, |
| "learning_rate": 0.00015572139303482584, |
| "loss": 0.4731, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.9652701212789415, |
| "grad_norm": 0.2317710667848587, |
| "learning_rate": 0.0001556384742951907, |
| "loss": 0.4736, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.9658213891951488, |
| "grad_norm": 0.23009353876113892, |
| "learning_rate": 0.00015555555555555554, |
| "loss": 0.4512, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.9663726571113561, |
| "grad_norm": 0.24625705182552338, |
| "learning_rate": 0.0001554726368159204, |
| "loss": 0.455, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.9669239250275634, |
| "grad_norm": 0.2400812804698944, |
| "learning_rate": 0.00015538971807628523, |
| "loss": 0.4725, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.9674751929437707, |
| "grad_norm": 0.26011791825294495, |
| "learning_rate": 0.00015530679933665009, |
| "loss": 0.4868, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.9680264608599779, |
| "grad_norm": 0.2298017144203186, |
| "learning_rate": 0.0001552238805970149, |
| "loss": 0.4559, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.9685777287761852, |
| "grad_norm": 0.23378150165081024, |
| "learning_rate": 0.00015514096185737978, |
| "loss": 0.4511, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.9691289966923925, |
| "grad_norm": 0.24460946023464203, |
| "learning_rate": 0.00015505804311774458, |
| "loss": 0.4571, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.9696802646085998, |
| "grad_norm": 0.241620734333992, |
| "learning_rate": 0.00015497512437810944, |
| "loss": 0.4743, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.9702315325248071, |
| "grad_norm": 0.23285698890686035, |
| "learning_rate": 0.00015489220563847427, |
| "loss": 0.4619, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.9707828004410143, |
| "grad_norm": 0.24175579845905304, |
| "learning_rate": 0.00015480928689883913, |
| "loss": 0.4544, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.9713340683572216, |
| "grad_norm": 0.22799162566661835, |
| "learning_rate": 0.00015472636815920396, |
| "loss": 0.4679, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.9718853362734289, |
| "grad_norm": 0.23015514016151428, |
| "learning_rate": 0.00015464344941956882, |
| "loss": 0.4867, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.9724366041896362, |
| "grad_norm": 0.22983665764331818, |
| "learning_rate": 0.00015456053067993366, |
| "loss": 0.4608, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.9729878721058435, |
| "grad_norm": 0.22515413165092468, |
| "learning_rate": 0.00015447761194029851, |
| "loss": 0.4578, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.9735391400220507, |
| "grad_norm": 0.23187264800071716, |
| "learning_rate": 0.00015439469320066332, |
| "loss": 0.4253, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.974090407938258, |
| "grad_norm": 0.23280374705791473, |
| "learning_rate": 0.00015431177446102818, |
| "loss": 0.4473, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.9746416758544653, |
| "grad_norm": 0.2500572204589844, |
| "learning_rate": 0.000154228855721393, |
| "loss": 0.4519, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.9751929437706726, |
| "grad_norm": 0.23001956939697266, |
| "learning_rate": 0.00015414593698175787, |
| "loss": 0.4708, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.9757442116868799, |
| "grad_norm": 0.23875866830348969, |
| "learning_rate": 0.0001540630182421227, |
| "loss": 0.4679, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.976295479603087, |
| "grad_norm": 0.22990469634532928, |
| "learning_rate": 0.00015398009950248756, |
| "loss": 0.4632, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.9768467475192943, |
| "grad_norm": 0.24912653863430023, |
| "learning_rate": 0.0001538971807628524, |
| "loss": 0.4569, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.9773980154355016, |
| "grad_norm": 0.2521923780441284, |
| "learning_rate": 0.00015381426202321725, |
| "loss": 0.4696, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.9779492833517089, |
| "grad_norm": 0.23184111714363098, |
| "learning_rate": 0.00015373134328358208, |
| "loss": 0.4518, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.9785005512679162, |
| "grad_norm": 0.22830599546432495, |
| "learning_rate": 0.0001536484245439469, |
| "loss": 0.4511, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.9790518191841234, |
| "grad_norm": 0.24908460676670074, |
| "learning_rate": 0.00015356550580431175, |
| "loss": 0.4556, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.9796030871003307, |
| "grad_norm": 0.2542704939842224, |
| "learning_rate": 0.00015348258706467658, |
| "loss": 0.4876, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.980154355016538, |
| "grad_norm": 0.23091669380664825, |
| "learning_rate": 0.00015339966832504144, |
| "loss": 0.4502, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.9807056229327453, |
| "grad_norm": 0.24079181253910065, |
| "learning_rate": 0.00015331674958540627, |
| "loss": 0.4549, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.9812568908489526, |
| "grad_norm": 0.224042147397995, |
| "learning_rate": 0.00015323383084577113, |
| "loss": 0.4568, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.9818081587651599, |
| "grad_norm": 0.23204737901687622, |
| "learning_rate": 0.00015315091210613596, |
| "loss": 0.4516, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.9823594266813671, |
| "grad_norm": 0.24899733066558838, |
| "learning_rate": 0.00015306799336650082, |
| "loss": 0.4422, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.9829106945975744, |
| "grad_norm": 0.2473718822002411, |
| "learning_rate": 0.00015298507462686565, |
| "loss": 0.4698, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.9834619625137817, |
| "grad_norm": 0.23376363515853882, |
| "learning_rate": 0.0001529021558872305, |
| "loss": 0.4735, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.984013230429989, |
| "grad_norm": 0.21901825070381165, |
| "learning_rate": 0.00015281923714759532, |
| "loss": 0.4055, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.9845644983461963, |
| "grad_norm": 0.24539053440093994, |
| "learning_rate": 0.00015273631840796018, |
| "loss": 0.477, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.9851157662624035, |
| "grad_norm": 0.2802634537220001, |
| "learning_rate": 0.000152653399668325, |
| "loss": 0.4924, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.9856670341786108, |
| "grad_norm": 0.2387421429157257, |
| "learning_rate": 0.00015257048092868987, |
| "loss": 0.4671, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.9862183020948181, |
| "grad_norm": 0.22999261319637299, |
| "learning_rate": 0.0001524875621890547, |
| "loss": 0.4682, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.9867695700110254, |
| "grad_norm": 0.2567140758037567, |
| "learning_rate": 0.00015240464344941956, |
| "loss": 0.4395, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.9873208379272327, |
| "grad_norm": 0.24533671140670776, |
| "learning_rate": 0.0001523217247097844, |
| "loss": 0.4415, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.9878721058434399, |
| "grad_norm": 0.24147699773311615, |
| "learning_rate": 0.00015223880597014925, |
| "loss": 0.4731, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.9884233737596472, |
| "grad_norm": 0.23697462677955627, |
| "learning_rate": 0.00015215588723051408, |
| "loss": 0.451, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.9889746416758545, |
| "grad_norm": 0.2380775809288025, |
| "learning_rate": 0.00015207296849087894, |
| "loss": 0.452, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.9895259095920618, |
| "grad_norm": 0.24654051661491394, |
| "learning_rate": 0.00015199004975124375, |
| "loss": 0.4724, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.9900771775082691, |
| "grad_norm": 0.2548507750034332, |
| "learning_rate": 0.0001519071310116086, |
| "loss": 0.4578, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.9906284454244763, |
| "grad_norm": 0.23419903218746185, |
| "learning_rate": 0.00015182421227197344, |
| "loss": 0.4627, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.9911797133406836, |
| "grad_norm": 0.2721438705921173, |
| "learning_rate": 0.0001517412935323383, |
| "loss": 0.4704, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.9917309812568909, |
| "grad_norm": 0.22823266685009003, |
| "learning_rate": 0.00015165837479270313, |
| "loss": 0.4402, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.9922822491730982, |
| "grad_norm": 0.3155699372291565, |
| "learning_rate": 0.000151575456053068, |
| "loss": 0.4537, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.9928335170893055, |
| "grad_norm": 0.24750587344169617, |
| "learning_rate": 0.00015149253731343282, |
| "loss": 0.4674, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.9933847850055126, |
| "grad_norm": 0.23167037963867188, |
| "learning_rate": 0.00015140961857379768, |
| "loss": 0.4506, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.9939360529217199, |
| "grad_norm": 0.24583961069583893, |
| "learning_rate": 0.0001513266998341625, |
| "loss": 0.4809, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.9944873208379272, |
| "grad_norm": 0.23894868791103363, |
| "learning_rate": 0.00015124378109452737, |
| "loss": 0.4729, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.9950385887541345, |
| "grad_norm": 0.23357604444026947, |
| "learning_rate": 0.00015116086235489218, |
| "loss": 0.4608, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.9955898566703418, |
| "grad_norm": 0.2364039272069931, |
| "learning_rate": 0.00015107794361525703, |
| "loss": 0.4803, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.996141124586549, |
| "grad_norm": 0.23034816980361938, |
| "learning_rate": 0.00015099502487562187, |
| "loss": 0.4687, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.9966923925027563, |
| "grad_norm": 0.23677074909210205, |
| "learning_rate": 0.00015091210613598673, |
| "loss": 0.4591, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.9972436604189636, |
| "grad_norm": 0.24638359248638153, |
| "learning_rate": 0.00015082918739635156, |
| "loss": 0.462, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.9977949283351709, |
| "grad_norm": 0.23346304893493652, |
| "learning_rate": 0.00015074626865671642, |
| "loss": 0.4245, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.9983461962513782, |
| "grad_norm": 0.2604617774486542, |
| "learning_rate": 0.00015066334991708125, |
| "loss": 0.4665, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.9988974641675854, |
| "grad_norm": 0.22308942675590515, |
| "learning_rate": 0.0001505804311774461, |
| "loss": 0.4671, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.9994487320837927, |
| "grad_norm": 0.2405402511358261, |
| "learning_rate": 0.00015049751243781094, |
| "loss": 0.4808, |
| "step": 1813 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.2668411433696747, |
| "learning_rate": 0.0001504145936981758, |
| "loss": 0.4683, |
| "step": 1814 |
| }, |
| { |
| "epoch": 1.0005512679162072, |
| "grad_norm": 0.23000217974185944, |
| "learning_rate": 0.0001503316749585406, |
| "loss": 0.3736, |
| "step": 1815 |
| }, |
| { |
| "epoch": 1.0011025358324146, |
| "grad_norm": 0.2307773381471634, |
| "learning_rate": 0.00015024875621890546, |
| "loss": 0.3834, |
| "step": 1816 |
| }, |
| { |
| "epoch": 1.0016538037486218, |
| "grad_norm": 0.23737002909183502, |
| "learning_rate": 0.0001501658374792703, |
| "loss": 0.3863, |
| "step": 1817 |
| }, |
| { |
| "epoch": 1.0022050716648292, |
| "grad_norm": 0.2283601313829422, |
| "learning_rate": 0.00015008291873963515, |
| "loss": 0.3837, |
| "step": 1818 |
| }, |
| { |
| "epoch": 1.0027563395810364, |
| "grad_norm": 0.21821331977844238, |
| "learning_rate": 0.00015, |
| "loss": 0.4085, |
| "step": 1819 |
| }, |
| { |
| "epoch": 1.0033076074972436, |
| "grad_norm": 0.2391849011182785, |
| "learning_rate": 0.00014991708126036482, |
| "loss": 0.4207, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.003858875413451, |
| "grad_norm": 0.23875446617603302, |
| "learning_rate": 0.00014983416252072968, |
| "loss": 0.4013, |
| "step": 1821 |
| }, |
| { |
| "epoch": 1.0044101433296582, |
| "grad_norm": 0.24305221438407898, |
| "learning_rate": 0.0001497512437810945, |
| "loss": 0.4106, |
| "step": 1822 |
| }, |
| { |
| "epoch": 1.0049614112458656, |
| "grad_norm": 0.21675904095172882, |
| "learning_rate": 0.00014966832504145937, |
| "loss": 0.3744, |
| "step": 1823 |
| }, |
| { |
| "epoch": 1.0055126791620728, |
| "grad_norm": 0.23470553755760193, |
| "learning_rate": 0.0001495854063018242, |
| "loss": 0.3997, |
| "step": 1824 |
| }, |
| { |
| "epoch": 1.00606394707828, |
| "grad_norm": 0.2310658097267151, |
| "learning_rate": 0.00014950248756218903, |
| "loss": 0.411, |
| "step": 1825 |
| }, |
| { |
| "epoch": 1.0066152149944874, |
| "grad_norm": 0.23178675770759583, |
| "learning_rate": 0.0001494195688225539, |
| "loss": 0.3884, |
| "step": 1826 |
| }, |
| { |
| "epoch": 1.0071664829106945, |
| "grad_norm": 0.23985427618026733, |
| "learning_rate": 0.00014933665008291872, |
| "loss": 0.4026, |
| "step": 1827 |
| }, |
| { |
| "epoch": 1.007717750826902, |
| "grad_norm": 0.228210911154747, |
| "learning_rate": 0.00014925373134328358, |
| "loss": 0.3952, |
| "step": 1828 |
| }, |
| { |
| "epoch": 1.0082690187431091, |
| "grad_norm": 0.22802165150642395, |
| "learning_rate": 0.00014917081260364842, |
| "loss": 0.4194, |
| "step": 1829 |
| }, |
| { |
| "epoch": 1.0088202866593163, |
| "grad_norm": 0.2423812299966812, |
| "learning_rate": 0.00014908789386401325, |
| "loss": 0.4282, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.0093715545755237, |
| "grad_norm": 0.23589813709259033, |
| "learning_rate": 0.0001490049751243781, |
| "loss": 0.3911, |
| "step": 1831 |
| }, |
| { |
| "epoch": 1.009922822491731, |
| "grad_norm": 0.21917280554771423, |
| "learning_rate": 0.00014892205638474294, |
| "loss": 0.3723, |
| "step": 1832 |
| }, |
| { |
| "epoch": 1.0104740904079383, |
| "grad_norm": 0.22650456428527832, |
| "learning_rate": 0.0001488391376451078, |
| "loss": 0.3962, |
| "step": 1833 |
| }, |
| { |
| "epoch": 1.0110253583241455, |
| "grad_norm": 0.23731641471385956, |
| "learning_rate": 0.00014875621890547263, |
| "loss": 0.4235, |
| "step": 1834 |
| }, |
| { |
| "epoch": 1.0115766262403527, |
| "grad_norm": 0.21167220175266266, |
| "learning_rate": 0.00014867330016583746, |
| "loss": 0.3786, |
| "step": 1835 |
| }, |
| { |
| "epoch": 1.0121278941565601, |
| "grad_norm": 0.23506543040275574, |
| "learning_rate": 0.00014859038142620232, |
| "loss": 0.4098, |
| "step": 1836 |
| }, |
| { |
| "epoch": 1.0126791620727673, |
| "grad_norm": 0.25581830739974976, |
| "learning_rate": 0.00014850746268656715, |
| "loss": 0.4052, |
| "step": 1837 |
| }, |
| { |
| "epoch": 1.0132304299889747, |
| "grad_norm": 0.2236202359199524, |
| "learning_rate": 0.000148424543946932, |
| "loss": 0.3975, |
| "step": 1838 |
| }, |
| { |
| "epoch": 1.013781697905182, |
| "grad_norm": 0.21659554541110992, |
| "learning_rate": 0.00014834162520729684, |
| "loss": 0.3843, |
| "step": 1839 |
| }, |
| { |
| "epoch": 1.014332965821389, |
| "grad_norm": 0.22564005851745605, |
| "learning_rate": 0.00014825870646766168, |
| "loss": 0.4013, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.0148842337375965, |
| "grad_norm": 0.225655660033226, |
| "learning_rate": 0.00014817578772802654, |
| "loss": 0.3976, |
| "step": 1841 |
| }, |
| { |
| "epoch": 1.0154355016538037, |
| "grad_norm": 0.21095581352710724, |
| "learning_rate": 0.00014809286898839137, |
| "loss": 0.3812, |
| "step": 1842 |
| }, |
| { |
| "epoch": 1.015986769570011, |
| "grad_norm": 0.23854820430278778, |
| "learning_rate": 0.0001480099502487562, |
| "loss": 0.4089, |
| "step": 1843 |
| }, |
| { |
| "epoch": 1.0165380374862183, |
| "grad_norm": 0.22585038840770721, |
| "learning_rate": 0.00014792703150912103, |
| "loss": 0.4193, |
| "step": 1844 |
| }, |
| { |
| "epoch": 1.0170893054024255, |
| "grad_norm": 0.2209796905517578, |
| "learning_rate": 0.0001478441127694859, |
| "loss": 0.3989, |
| "step": 1845 |
| }, |
| { |
| "epoch": 1.017640573318633, |
| "grad_norm": 0.2113056629896164, |
| "learning_rate": 0.00014776119402985072, |
| "loss": 0.4089, |
| "step": 1846 |
| }, |
| { |
| "epoch": 1.01819184123484, |
| "grad_norm": 0.22150270640850067, |
| "learning_rate": 0.00014767827529021558, |
| "loss": 0.3946, |
| "step": 1847 |
| }, |
| { |
| "epoch": 1.0187431091510475, |
| "grad_norm": 0.22819051146507263, |
| "learning_rate": 0.00014759535655058041, |
| "loss": 0.3914, |
| "step": 1848 |
| }, |
| { |
| "epoch": 1.0192943770672547, |
| "grad_norm": 0.21912482380867004, |
| "learning_rate": 0.00014751243781094525, |
| "loss": 0.3621, |
| "step": 1849 |
| }, |
| { |
| "epoch": 1.0198456449834619, |
| "grad_norm": 0.22611315548419952, |
| "learning_rate": 0.0001474295190713101, |
| "loss": 0.386, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.0203969128996693, |
| "grad_norm": 0.225437730550766, |
| "learning_rate": 0.00014734660033167494, |
| "loss": 0.4115, |
| "step": 1851 |
| }, |
| { |
| "epoch": 1.0209481808158765, |
| "grad_norm": 0.22555771470069885, |
| "learning_rate": 0.0001472636815920398, |
| "loss": 0.4121, |
| "step": 1852 |
| }, |
| { |
| "epoch": 1.0214994487320839, |
| "grad_norm": 0.22996987402439117, |
| "learning_rate": 0.00014718076285240463, |
| "loss": 0.3799, |
| "step": 1853 |
| }, |
| { |
| "epoch": 1.022050716648291, |
| "grad_norm": 0.227546826004982, |
| "learning_rate": 0.00014709784411276946, |
| "loss": 0.406, |
| "step": 1854 |
| }, |
| { |
| "epoch": 1.0226019845644982, |
| "grad_norm": 0.21384532749652863, |
| "learning_rate": 0.00014701492537313432, |
| "loss": 0.393, |
| "step": 1855 |
| }, |
| { |
| "epoch": 1.0231532524807057, |
| "grad_norm": 0.21834981441497803, |
| "learning_rate": 0.00014693200663349915, |
| "loss": 0.3737, |
| "step": 1856 |
| }, |
| { |
| "epoch": 1.0237045203969128, |
| "grad_norm": 0.2231069952249527, |
| "learning_rate": 0.000146849087893864, |
| "loss": 0.3755, |
| "step": 1857 |
| }, |
| { |
| "epoch": 1.0242557883131203, |
| "grad_norm": 0.22336961328983307, |
| "learning_rate": 0.00014676616915422884, |
| "loss": 0.3936, |
| "step": 1858 |
| }, |
| { |
| "epoch": 1.0248070562293274, |
| "grad_norm": 0.22250871360301971, |
| "learning_rate": 0.00014668325041459367, |
| "loss": 0.4021, |
| "step": 1859 |
| }, |
| { |
| "epoch": 1.0253583241455346, |
| "grad_norm": 0.21691983938217163, |
| "learning_rate": 0.00014660033167495853, |
| "loss": 0.375, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.025909592061742, |
| "grad_norm": 0.2267792969942093, |
| "learning_rate": 0.00014651741293532337, |
| "loss": 0.4089, |
| "step": 1861 |
| }, |
| { |
| "epoch": 1.0264608599779492, |
| "grad_norm": 0.22236919403076172, |
| "learning_rate": 0.00014643449419568823, |
| "loss": 0.384, |
| "step": 1862 |
| }, |
| { |
| "epoch": 1.0270121278941566, |
| "grad_norm": 0.2280534952878952, |
| "learning_rate": 0.00014635157545605306, |
| "loss": 0.3982, |
| "step": 1863 |
| }, |
| { |
| "epoch": 1.0275633958103638, |
| "grad_norm": 0.23323461413383484, |
| "learning_rate": 0.0001462686567164179, |
| "loss": 0.3947, |
| "step": 1864 |
| }, |
| { |
| "epoch": 1.028114663726571, |
| "grad_norm": 0.2187027931213379, |
| "learning_rate": 0.00014618573797678275, |
| "loss": 0.3758, |
| "step": 1865 |
| }, |
| { |
| "epoch": 1.0286659316427784, |
| "grad_norm": 0.2233375459909439, |
| "learning_rate": 0.00014610281923714758, |
| "loss": 0.3889, |
| "step": 1866 |
| }, |
| { |
| "epoch": 1.0292171995589856, |
| "grad_norm": 0.23430676758289337, |
| "learning_rate": 0.00014601990049751244, |
| "loss": 0.3919, |
| "step": 1867 |
| }, |
| { |
| "epoch": 1.029768467475193, |
| "grad_norm": 0.22947613894939423, |
| "learning_rate": 0.00014593698175787727, |
| "loss": 0.3886, |
| "step": 1868 |
| }, |
| { |
| "epoch": 1.0303197353914002, |
| "grad_norm": 0.23334287106990814, |
| "learning_rate": 0.0001458540630182421, |
| "loss": 0.413, |
| "step": 1869 |
| }, |
| { |
| "epoch": 1.0308710033076074, |
| "grad_norm": 0.2178686261177063, |
| "learning_rate": 0.00014577114427860696, |
| "loss": 0.393, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.0314222712238148, |
| "grad_norm": 0.2510049045085907, |
| "learning_rate": 0.0001456882255389718, |
| "loss": 0.413, |
| "step": 1871 |
| }, |
| { |
| "epoch": 1.031973539140022, |
| "grad_norm": 0.23210124671459198, |
| "learning_rate": 0.00014560530679933665, |
| "loss": 0.3817, |
| "step": 1872 |
| }, |
| { |
| "epoch": 1.0325248070562294, |
| "grad_norm": 0.23246748745441437, |
| "learning_rate": 0.00014552238805970149, |
| "loss": 0.4026, |
| "step": 1873 |
| }, |
| { |
| "epoch": 1.0330760749724366, |
| "grad_norm": 0.22752533853054047, |
| "learning_rate": 0.00014543946932006632, |
| "loss": 0.411, |
| "step": 1874 |
| }, |
| { |
| "epoch": 1.0336273428886438, |
| "grad_norm": 0.21562816202640533, |
| "learning_rate": 0.00014535655058043118, |
| "loss": 0.3966, |
| "step": 1875 |
| }, |
| { |
| "epoch": 1.0341786108048512, |
| "grad_norm": 0.227711021900177, |
| "learning_rate": 0.000145273631840796, |
| "loss": 0.4008, |
| "step": 1876 |
| }, |
| { |
| "epoch": 1.0347298787210584, |
| "grad_norm": 0.22064116597175598, |
| "learning_rate": 0.00014519071310116087, |
| "loss": 0.3855, |
| "step": 1877 |
| }, |
| { |
| "epoch": 1.0352811466372658, |
| "grad_norm": 0.22657108306884766, |
| "learning_rate": 0.0001451077943615257, |
| "loss": 0.4147, |
| "step": 1878 |
| }, |
| { |
| "epoch": 1.035832414553473, |
| "grad_norm": 0.220686674118042, |
| "learning_rate": 0.00014502487562189053, |
| "loss": 0.3953, |
| "step": 1879 |
| }, |
| { |
| "epoch": 1.0363836824696802, |
| "grad_norm": 0.21113237738609314, |
| "learning_rate": 0.0001449419568822554, |
| "loss": 0.3908, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.0369349503858876, |
| "grad_norm": 0.21575047075748444, |
| "learning_rate": 0.00014485903814262022, |
| "loss": 0.3917, |
| "step": 1881 |
| }, |
| { |
| "epoch": 1.0374862183020948, |
| "grad_norm": 0.22273024916648865, |
| "learning_rate": 0.00014477611940298508, |
| "loss": 0.4007, |
| "step": 1882 |
| }, |
| { |
| "epoch": 1.0380374862183022, |
| "grad_norm": 0.22036762535572052, |
| "learning_rate": 0.00014469320066334991, |
| "loss": 0.3797, |
| "step": 1883 |
| }, |
| { |
| "epoch": 1.0385887541345094, |
| "grad_norm": 0.22144779562950134, |
| "learning_rate": 0.00014461028192371475, |
| "loss": 0.3911, |
| "step": 1884 |
| }, |
| { |
| "epoch": 1.0391400220507165, |
| "grad_norm": 0.22937916219234467, |
| "learning_rate": 0.0001445273631840796, |
| "loss": 0.406, |
| "step": 1885 |
| }, |
| { |
| "epoch": 1.039691289966924, |
| "grad_norm": 0.21770672500133514, |
| "learning_rate": 0.0001444444444444444, |
| "loss": 0.389, |
| "step": 1886 |
| }, |
| { |
| "epoch": 1.0402425578831311, |
| "grad_norm": 0.2170240730047226, |
| "learning_rate": 0.00014436152570480927, |
| "loss": 0.4225, |
| "step": 1887 |
| }, |
| { |
| "epoch": 1.0407938257993385, |
| "grad_norm": 0.23694483935832977, |
| "learning_rate": 0.0001442786069651741, |
| "loss": 0.4124, |
| "step": 1888 |
| }, |
| { |
| "epoch": 1.0413450937155457, |
| "grad_norm": 0.2358977198600769, |
| "learning_rate": 0.00014419568822553896, |
| "loss": 0.3932, |
| "step": 1889 |
| }, |
| { |
| "epoch": 1.041896361631753, |
| "grad_norm": 0.2379174828529358, |
| "learning_rate": 0.0001441127694859038, |
| "loss": 0.3921, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.0424476295479603, |
| "grad_norm": 0.22685475647449493, |
| "learning_rate": 0.00014402985074626863, |
| "loss": 0.398, |
| "step": 1891 |
| }, |
| { |
| "epoch": 1.0429988974641675, |
| "grad_norm": 0.2381109744310379, |
| "learning_rate": 0.00014394693200663348, |
| "loss": 0.4002, |
| "step": 1892 |
| }, |
| { |
| "epoch": 1.043550165380375, |
| "grad_norm": 0.23132000863552094, |
| "learning_rate": 0.00014386401326699832, |
| "loss": 0.3917, |
| "step": 1893 |
| }, |
| { |
| "epoch": 1.0441014332965821, |
| "grad_norm": 0.23595485091209412, |
| "learning_rate": 0.00014378109452736318, |
| "loss": 0.3811, |
| "step": 1894 |
| }, |
| { |
| "epoch": 1.0446527012127893, |
| "grad_norm": 0.23046362400054932, |
| "learning_rate": 0.000143698175787728, |
| "loss": 0.389, |
| "step": 1895 |
| }, |
| { |
| "epoch": 1.0452039691289967, |
| "grad_norm": 0.21979711949825287, |
| "learning_rate": 0.00014361525704809284, |
| "loss": 0.4008, |
| "step": 1896 |
| }, |
| { |
| "epoch": 1.045755237045204, |
| "grad_norm": 0.21169352531433105, |
| "learning_rate": 0.0001435323383084577, |
| "loss": 0.3767, |
| "step": 1897 |
| }, |
| { |
| "epoch": 1.0463065049614113, |
| "grad_norm": 0.2226918339729309, |
| "learning_rate": 0.00014344941956882253, |
| "loss": 0.4059, |
| "step": 1898 |
| }, |
| { |
| "epoch": 1.0468577728776185, |
| "grad_norm": 0.23048485815525055, |
| "learning_rate": 0.0001433665008291874, |
| "loss": 0.4013, |
| "step": 1899 |
| }, |
| { |
| "epoch": 1.0474090407938257, |
| "grad_norm": 0.22347117960453033, |
| "learning_rate": 0.00014328358208955222, |
| "loss": 0.4042, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.047960308710033, |
| "grad_norm": 0.2321341335773468, |
| "learning_rate": 0.00014320066334991705, |
| "loss": 0.4055, |
| "step": 1901 |
| }, |
| { |
| "epoch": 1.0485115766262403, |
| "grad_norm": 0.22918953001499176, |
| "learning_rate": 0.0001431177446102819, |
| "loss": 0.3845, |
| "step": 1902 |
| }, |
| { |
| "epoch": 1.0490628445424477, |
| "grad_norm": 0.21781106293201447, |
| "learning_rate": 0.00014303482587064675, |
| "loss": 0.4067, |
| "step": 1903 |
| }, |
| { |
| "epoch": 1.0496141124586549, |
| "grad_norm": 0.21180634200572968, |
| "learning_rate": 0.0001429519071310116, |
| "loss": 0.3891, |
| "step": 1904 |
| }, |
| { |
| "epoch": 1.0501653803748623, |
| "grad_norm": 0.2400248795747757, |
| "learning_rate": 0.00014286898839137644, |
| "loss": 0.3878, |
| "step": 1905 |
| }, |
| { |
| "epoch": 1.0507166482910695, |
| "grad_norm": 0.22464604675769806, |
| "learning_rate": 0.00014278606965174127, |
| "loss": 0.3909, |
| "step": 1906 |
| }, |
| { |
| "epoch": 1.0512679162072767, |
| "grad_norm": 0.23820553719997406, |
| "learning_rate": 0.00014270315091210613, |
| "loss": 0.3967, |
| "step": 1907 |
| }, |
| { |
| "epoch": 1.051819184123484, |
| "grad_norm": 0.23168790340423584, |
| "learning_rate": 0.00014262023217247096, |
| "loss": 0.4057, |
| "step": 1908 |
| }, |
| { |
| "epoch": 1.0523704520396913, |
| "grad_norm": 0.2253868579864502, |
| "learning_rate": 0.00014253731343283582, |
| "loss": 0.3844, |
| "step": 1909 |
| }, |
| { |
| "epoch": 1.0529217199558987, |
| "grad_norm": 0.21465058624744415, |
| "learning_rate": 0.00014245439469320065, |
| "loss": 0.3804, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.0534729878721059, |
| "grad_norm": 0.22617360949516296, |
| "learning_rate": 0.00014237147595356548, |
| "loss": 0.3738, |
| "step": 1911 |
| }, |
| { |
| "epoch": 1.054024255788313, |
| "grad_norm": 0.23942868411540985, |
| "learning_rate": 0.00014228855721393034, |
| "loss": 0.4044, |
| "step": 1912 |
| }, |
| { |
| "epoch": 1.0545755237045205, |
| "grad_norm": 0.23497670888900757, |
| "learning_rate": 0.00014220563847429517, |
| "loss": 0.4138, |
| "step": 1913 |
| }, |
| { |
| "epoch": 1.0551267916207276, |
| "grad_norm": 0.229624941945076, |
| "learning_rate": 0.00014212271973466003, |
| "loss": 0.402, |
| "step": 1914 |
| }, |
| { |
| "epoch": 1.055678059536935, |
| "grad_norm": 0.22944937646389008, |
| "learning_rate": 0.00014203980099502486, |
| "loss": 0.4016, |
| "step": 1915 |
| }, |
| { |
| "epoch": 1.0562293274531422, |
| "grad_norm": 0.2452874332666397, |
| "learning_rate": 0.0001419568822553897, |
| "loss": 0.4149, |
| "step": 1916 |
| }, |
| { |
| "epoch": 1.0567805953693494, |
| "grad_norm": 0.23434410989284515, |
| "learning_rate": 0.00014187396351575456, |
| "loss": 0.3818, |
| "step": 1917 |
| }, |
| { |
| "epoch": 1.0573318632855568, |
| "grad_norm": 0.22487396001815796, |
| "learning_rate": 0.0001417910447761194, |
| "loss": 0.4071, |
| "step": 1918 |
| }, |
| { |
| "epoch": 1.057883131201764, |
| "grad_norm": 0.2129317820072174, |
| "learning_rate": 0.00014170812603648425, |
| "loss": 0.3653, |
| "step": 1919 |
| }, |
| { |
| "epoch": 1.0584343991179714, |
| "grad_norm": 0.21573378145694733, |
| "learning_rate": 0.00014162520729684908, |
| "loss": 0.3924, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.0589856670341786, |
| "grad_norm": 0.23635123670101166, |
| "learning_rate": 0.0001415422885572139, |
| "loss": 0.3883, |
| "step": 1921 |
| }, |
| { |
| "epoch": 1.0595369349503858, |
| "grad_norm": 0.23705770075321198, |
| "learning_rate": 0.00014145936981757877, |
| "loss": 0.3865, |
| "step": 1922 |
| }, |
| { |
| "epoch": 1.0600882028665932, |
| "grad_norm": 0.22904790937900543, |
| "learning_rate": 0.0001413764510779436, |
| "loss": 0.3851, |
| "step": 1923 |
| }, |
| { |
| "epoch": 1.0606394707828004, |
| "grad_norm": 0.21958112716674805, |
| "learning_rate": 0.00014129353233830846, |
| "loss": 0.3965, |
| "step": 1924 |
| }, |
| { |
| "epoch": 1.0611907386990078, |
| "grad_norm": 0.232145294547081, |
| "learning_rate": 0.0001412106135986733, |
| "loss": 0.4001, |
| "step": 1925 |
| }, |
| { |
| "epoch": 1.061742006615215, |
| "grad_norm": 0.23748160898685455, |
| "learning_rate": 0.00014112769485903813, |
| "loss": 0.3809, |
| "step": 1926 |
| }, |
| { |
| "epoch": 1.0622932745314222, |
| "grad_norm": 0.25450122356414795, |
| "learning_rate": 0.00014104477611940298, |
| "loss": 0.3986, |
| "step": 1927 |
| }, |
| { |
| "epoch": 1.0628445424476296, |
| "grad_norm": 0.23028801381587982, |
| "learning_rate": 0.00014096185737976782, |
| "loss": 0.3905, |
| "step": 1928 |
| }, |
| { |
| "epoch": 1.0633958103638368, |
| "grad_norm": 0.23206226527690887, |
| "learning_rate": 0.00014087893864013268, |
| "loss": 0.3757, |
| "step": 1929 |
| }, |
| { |
| "epoch": 1.0639470782800442, |
| "grad_norm": 0.23685060441493988, |
| "learning_rate": 0.00014079601990049748, |
| "loss": 0.3844, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.0644983461962514, |
| "grad_norm": 0.22835825383663177, |
| "learning_rate": 0.00014071310116086234, |
| "loss": 0.388, |
| "step": 1931 |
| }, |
| { |
| "epoch": 1.0650496141124586, |
| "grad_norm": 0.2305503487586975, |
| "learning_rate": 0.00014063018242122717, |
| "loss": 0.4015, |
| "step": 1932 |
| }, |
| { |
| "epoch": 1.065600882028666, |
| "grad_norm": 0.23914876580238342, |
| "learning_rate": 0.00014054726368159203, |
| "loss": 0.3826, |
| "step": 1933 |
| }, |
| { |
| "epoch": 1.0661521499448732, |
| "grad_norm": 0.2508886158466339, |
| "learning_rate": 0.00014046434494195686, |
| "loss": 0.3948, |
| "step": 1934 |
| }, |
| { |
| "epoch": 1.0667034178610806, |
| "grad_norm": 0.280200332403183, |
| "learning_rate": 0.0001403814262023217, |
| "loss": 0.4042, |
| "step": 1935 |
| }, |
| { |
| "epoch": 1.0672546857772878, |
| "grad_norm": 0.22536714375019073, |
| "learning_rate": 0.00014029850746268655, |
| "loss": 0.3948, |
| "step": 1936 |
| }, |
| { |
| "epoch": 1.067805953693495, |
| "grad_norm": 0.24053654074668884, |
| "learning_rate": 0.0001402155887230514, |
| "loss": 0.3976, |
| "step": 1937 |
| }, |
| { |
| "epoch": 1.0683572216097024, |
| "grad_norm": 0.2461492270231247, |
| "learning_rate": 0.00014013266998341625, |
| "loss": 0.385, |
| "step": 1938 |
| }, |
| { |
| "epoch": 1.0689084895259096, |
| "grad_norm": 0.24768413603305817, |
| "learning_rate": 0.00014004975124378108, |
| "loss": 0.3734, |
| "step": 1939 |
| }, |
| { |
| "epoch": 1.069459757442117, |
| "grad_norm": 0.2460828721523285, |
| "learning_rate": 0.0001399668325041459, |
| "loss": 0.3924, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.0700110253583242, |
| "grad_norm": 0.2739814519882202, |
| "learning_rate": 0.00013988391376451077, |
| "loss": 0.3779, |
| "step": 1941 |
| }, |
| { |
| "epoch": 1.0705622932745313, |
| "grad_norm": 0.23434729874134064, |
| "learning_rate": 0.0001398009950248756, |
| "loss": 0.4186, |
| "step": 1942 |
| }, |
| { |
| "epoch": 1.0711135611907387, |
| "grad_norm": 0.23552288115024567, |
| "learning_rate": 0.00013971807628524046, |
| "loss": 0.3951, |
| "step": 1943 |
| }, |
| { |
| "epoch": 1.071664829106946, |
| "grad_norm": 0.2381044626235962, |
| "learning_rate": 0.0001396351575456053, |
| "loss": 0.3938, |
| "step": 1944 |
| }, |
| { |
| "epoch": 1.0722160970231533, |
| "grad_norm": 0.25459203124046326, |
| "learning_rate": 0.00013955223880597012, |
| "loss": 0.3997, |
| "step": 1945 |
| }, |
| { |
| "epoch": 1.0727673649393605, |
| "grad_norm": 0.2563784718513489, |
| "learning_rate": 0.00013946932006633498, |
| "loss": 0.404, |
| "step": 1946 |
| }, |
| { |
| "epoch": 1.0733186328555677, |
| "grad_norm": 0.23130348324775696, |
| "learning_rate": 0.00013938640132669982, |
| "loss": 0.3844, |
| "step": 1947 |
| }, |
| { |
| "epoch": 1.0738699007717751, |
| "grad_norm": 0.24562886357307434, |
| "learning_rate": 0.00013930348258706467, |
| "loss": 0.4131, |
| "step": 1948 |
| }, |
| { |
| "epoch": 1.0744211686879823, |
| "grad_norm": 0.22779060900211334, |
| "learning_rate": 0.0001392205638474295, |
| "loss": 0.4107, |
| "step": 1949 |
| }, |
| { |
| "epoch": 1.0749724366041897, |
| "grad_norm": 0.23528602719306946, |
| "learning_rate": 0.00013913764510779434, |
| "loss": 0.4128, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.075523704520397, |
| "grad_norm": 0.23987142741680145, |
| "learning_rate": 0.0001390547263681592, |
| "loss": 0.3987, |
| "step": 1951 |
| }, |
| { |
| "epoch": 1.076074972436604, |
| "grad_norm": 0.2401638627052307, |
| "learning_rate": 0.00013897180762852403, |
| "loss": 0.3923, |
| "step": 1952 |
| }, |
| { |
| "epoch": 1.0766262403528115, |
| "grad_norm": 0.24218258261680603, |
| "learning_rate": 0.0001388888888888889, |
| "loss": 0.4001, |
| "step": 1953 |
| }, |
| { |
| "epoch": 1.0771775082690187, |
| "grad_norm": 0.23231711983680725, |
| "learning_rate": 0.00013880597014925372, |
| "loss": 0.3795, |
| "step": 1954 |
| }, |
| { |
| "epoch": 1.0777287761852261, |
| "grad_norm": 0.2225574404001236, |
| "learning_rate": 0.00013872305140961855, |
| "loss": 0.3867, |
| "step": 1955 |
| }, |
| { |
| "epoch": 1.0782800441014333, |
| "grad_norm": 0.22481811046600342, |
| "learning_rate": 0.0001386401326699834, |
| "loss": 0.3946, |
| "step": 1956 |
| }, |
| { |
| "epoch": 1.0788313120176405, |
| "grad_norm": 0.22649556398391724, |
| "learning_rate": 0.00013855721393034824, |
| "loss": 0.3834, |
| "step": 1957 |
| }, |
| { |
| "epoch": 1.079382579933848, |
| "grad_norm": 0.21780644357204437, |
| "learning_rate": 0.0001384742951907131, |
| "loss": 0.3874, |
| "step": 1958 |
| }, |
| { |
| "epoch": 1.079933847850055, |
| "grad_norm": 0.21539410948753357, |
| "learning_rate": 0.00013839137645107794, |
| "loss": 0.3788, |
| "step": 1959 |
| }, |
| { |
| "epoch": 1.0804851157662625, |
| "grad_norm": 0.22845754027366638, |
| "learning_rate": 0.00013830845771144277, |
| "loss": 0.395, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.0810363836824697, |
| "grad_norm": 0.23722249269485474, |
| "learning_rate": 0.00013822553897180763, |
| "loss": 0.3993, |
| "step": 1961 |
| }, |
| { |
| "epoch": 1.0815876515986769, |
| "grad_norm": 0.2395038902759552, |
| "learning_rate": 0.00013814262023217246, |
| "loss": 0.4204, |
| "step": 1962 |
| }, |
| { |
| "epoch": 1.0821389195148843, |
| "grad_norm": 0.2149537056684494, |
| "learning_rate": 0.00013805970149253732, |
| "loss": 0.381, |
| "step": 1963 |
| }, |
| { |
| "epoch": 1.0826901874310915, |
| "grad_norm": 0.24547190964221954, |
| "learning_rate": 0.00013797678275290215, |
| "loss": 0.404, |
| "step": 1964 |
| }, |
| { |
| "epoch": 1.0832414553472989, |
| "grad_norm": 0.21485422551631927, |
| "learning_rate": 0.00013789386401326698, |
| "loss": 0.3756, |
| "step": 1965 |
| }, |
| { |
| "epoch": 1.083792723263506, |
| "grad_norm": 0.2199661284685135, |
| "learning_rate": 0.00013781094527363184, |
| "loss": 0.39, |
| "step": 1966 |
| }, |
| { |
| "epoch": 1.0843439911797133, |
| "grad_norm": 0.2321014702320099, |
| "learning_rate": 0.00013772802653399667, |
| "loss": 0.3877, |
| "step": 1967 |
| }, |
| { |
| "epoch": 1.0848952590959207, |
| "grad_norm": 0.23033714294433594, |
| "learning_rate": 0.00013764510779436153, |
| "loss": 0.4018, |
| "step": 1968 |
| }, |
| { |
| "epoch": 1.0854465270121278, |
| "grad_norm": 0.2251034677028656, |
| "learning_rate": 0.00013756218905472636, |
| "loss": 0.3911, |
| "step": 1969 |
| }, |
| { |
| "epoch": 1.0859977949283353, |
| "grad_norm": 0.22630800306797028, |
| "learning_rate": 0.0001374792703150912, |
| "loss": 0.397, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.0865490628445424, |
| "grad_norm": 0.22938160598278046, |
| "learning_rate": 0.00013739635157545606, |
| "loss": 0.401, |
| "step": 1971 |
| }, |
| { |
| "epoch": 1.0871003307607496, |
| "grad_norm": 0.24200983345508575, |
| "learning_rate": 0.0001373134328358209, |
| "loss": 0.3988, |
| "step": 1972 |
| }, |
| { |
| "epoch": 1.087651598676957, |
| "grad_norm": 0.25386059284210205, |
| "learning_rate": 0.00013723051409618575, |
| "loss": 0.4093, |
| "step": 1973 |
| }, |
| { |
| "epoch": 1.0882028665931642, |
| "grad_norm": 0.2258448451757431, |
| "learning_rate": 0.00013714759535655055, |
| "loss": 0.386, |
| "step": 1974 |
| }, |
| { |
| "epoch": 1.0887541345093716, |
| "grad_norm": 0.2277601659297943, |
| "learning_rate": 0.0001370646766169154, |
| "loss": 0.4041, |
| "step": 1975 |
| }, |
| { |
| "epoch": 1.0893054024255788, |
| "grad_norm": 0.20614218711853027, |
| "learning_rate": 0.00013698175787728024, |
| "loss": 0.3784, |
| "step": 1976 |
| }, |
| { |
| "epoch": 1.089856670341786, |
| "grad_norm": 0.22764301300048828, |
| "learning_rate": 0.0001368988391376451, |
| "loss": 0.395, |
| "step": 1977 |
| }, |
| { |
| "epoch": 1.0904079382579934, |
| "grad_norm": 0.23423810303211212, |
| "learning_rate": 0.00013681592039800993, |
| "loss": 0.4114, |
| "step": 1978 |
| }, |
| { |
| "epoch": 1.0909592061742006, |
| "grad_norm": 0.2042825073003769, |
| "learning_rate": 0.00013673300165837477, |
| "loss": 0.3724, |
| "step": 1979 |
| }, |
| { |
| "epoch": 1.091510474090408, |
| "grad_norm": 0.2203364223241806, |
| "learning_rate": 0.00013665008291873962, |
| "loss": 0.4084, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.0920617420066152, |
| "grad_norm": 0.23350727558135986, |
| "learning_rate": 0.00013656716417910446, |
| "loss": 0.4041, |
| "step": 1981 |
| }, |
| { |
| "epoch": 1.0926130099228224, |
| "grad_norm": 0.23900878429412842, |
| "learning_rate": 0.00013648424543946932, |
| "loss": 0.3976, |
| "step": 1982 |
| }, |
| { |
| "epoch": 1.0931642778390298, |
| "grad_norm": 0.22579023241996765, |
| "learning_rate": 0.00013640132669983415, |
| "loss": 0.4019, |
| "step": 1983 |
| }, |
| { |
| "epoch": 1.093715545755237, |
| "grad_norm": 0.23907893896102905, |
| "learning_rate": 0.00013631840796019898, |
| "loss": 0.4185, |
| "step": 1984 |
| }, |
| { |
| "epoch": 1.0942668136714444, |
| "grad_norm": 0.22953177988529205, |
| "learning_rate": 0.00013623548922056384, |
| "loss": 0.4009, |
| "step": 1985 |
| }, |
| { |
| "epoch": 1.0948180815876516, |
| "grad_norm": 0.22816117107868195, |
| "learning_rate": 0.00013615257048092867, |
| "loss": 0.3773, |
| "step": 1986 |
| }, |
| { |
| "epoch": 1.0953693495038588, |
| "grad_norm": 0.2403888702392578, |
| "learning_rate": 0.00013606965174129353, |
| "loss": 0.3857, |
| "step": 1987 |
| }, |
| { |
| "epoch": 1.0959206174200662, |
| "grad_norm": 0.2400594800710678, |
| "learning_rate": 0.00013598673300165836, |
| "loss": 0.398, |
| "step": 1988 |
| }, |
| { |
| "epoch": 1.0964718853362734, |
| "grad_norm": 0.2451186329126358, |
| "learning_rate": 0.0001359038142620232, |
| "loss": 0.4066, |
| "step": 1989 |
| }, |
| { |
| "epoch": 1.0970231532524808, |
| "grad_norm": 0.2371450811624527, |
| "learning_rate": 0.00013582089552238805, |
| "loss": 0.3855, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.097574421168688, |
| "grad_norm": 0.2529587745666504, |
| "learning_rate": 0.00013573797678275289, |
| "loss": 0.3851, |
| "step": 1991 |
| }, |
| { |
| "epoch": 1.0981256890848952, |
| "grad_norm": 0.23810137808322906, |
| "learning_rate": 0.00013565505804311774, |
| "loss": 0.3644, |
| "step": 1992 |
| }, |
| { |
| "epoch": 1.0986769570011026, |
| "grad_norm": 0.23532289266586304, |
| "learning_rate": 0.00013557213930348258, |
| "loss": 0.3813, |
| "step": 1993 |
| }, |
| { |
| "epoch": 1.0992282249173098, |
| "grad_norm": 0.2418917566537857, |
| "learning_rate": 0.0001354892205638474, |
| "loss": 0.3775, |
| "step": 1994 |
| }, |
| { |
| "epoch": 1.0997794928335172, |
| "grad_norm": 0.2366194874048233, |
| "learning_rate": 0.00013540630182421227, |
| "loss": 0.4047, |
| "step": 1995 |
| }, |
| { |
| "epoch": 1.1003307607497244, |
| "grad_norm": 0.23951660096645355, |
| "learning_rate": 0.0001353233830845771, |
| "loss": 0.3956, |
| "step": 1996 |
| }, |
| { |
| "epoch": 1.1008820286659315, |
| "grad_norm": 0.260423481464386, |
| "learning_rate": 0.00013524046434494196, |
| "loss": 0.3979, |
| "step": 1997 |
| }, |
| { |
| "epoch": 1.101433296582139, |
| "grad_norm": 0.22453179955482483, |
| "learning_rate": 0.0001351575456053068, |
| "loss": 0.3918, |
| "step": 1998 |
| }, |
| { |
| "epoch": 1.1019845644983461, |
| "grad_norm": 0.2185899168252945, |
| "learning_rate": 0.00013507462686567162, |
| "loss": 0.38, |
| "step": 1999 |
| }, |
| { |
| "epoch": 1.1025358324145536, |
| "grad_norm": 0.2236957997083664, |
| "learning_rate": 0.00013499170812603648, |
| "loss": 0.4007, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.1025358324145536, |
| "eval_loss": 0.4581758677959442, |
| "eval_runtime": 312.0177, |
| "eval_samples_per_second": 3.734, |
| "eval_steps_per_second": 0.468, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.1030871003307607, |
| "grad_norm": 0.2543388903141022, |
| "learning_rate": 0.00013490878938640131, |
| "loss": 0.39, |
| "step": 2001 |
| }, |
| { |
| "epoch": 1.103638368246968, |
| "grad_norm": 0.22843103110790253, |
| "learning_rate": 0.00013482587064676615, |
| "loss": 0.3835, |
| "step": 2002 |
| }, |
| { |
| "epoch": 1.1041896361631753, |
| "grad_norm": 0.226676806807518, |
| "learning_rate": 0.000134742951907131, |
| "loss": 0.3907, |
| "step": 2003 |
| }, |
| { |
| "epoch": 1.1047409040793825, |
| "grad_norm": 0.22164440155029297, |
| "learning_rate": 0.00013466003316749584, |
| "loss": 0.3727, |
| "step": 2004 |
| }, |
| { |
| "epoch": 1.10529217199559, |
| "grad_norm": 0.2151675671339035, |
| "learning_rate": 0.0001345771144278607, |
| "loss": 0.3749, |
| "step": 2005 |
| }, |
| { |
| "epoch": 1.1058434399117971, |
| "grad_norm": 0.23192958533763885, |
| "learning_rate": 0.00013449419568822553, |
| "loss": 0.407, |
| "step": 2006 |
| }, |
| { |
| "epoch": 1.1063947078280043, |
| "grad_norm": 0.2130926102399826, |
| "learning_rate": 0.00013441127694859036, |
| "loss": 0.3702, |
| "step": 2007 |
| }, |
| { |
| "epoch": 1.1069459757442117, |
| "grad_norm": 0.22862909734249115, |
| "learning_rate": 0.00013432835820895522, |
| "loss": 0.3784, |
| "step": 2008 |
| }, |
| { |
| "epoch": 1.107497243660419, |
| "grad_norm": 0.22866345942020416, |
| "learning_rate": 0.00013424543946932005, |
| "loss": 0.4035, |
| "step": 2009 |
| }, |
| { |
| "epoch": 1.1080485115766263, |
| "grad_norm": 0.2159378081560135, |
| "learning_rate": 0.0001341625207296849, |
| "loss": 0.3996, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.1085997794928335, |
| "grad_norm": 0.22037655115127563, |
| "learning_rate": 0.00013407960199004974, |
| "loss": 0.3873, |
| "step": 2011 |
| }, |
| { |
| "epoch": 1.1091510474090407, |
| "grad_norm": 0.24213933944702148, |
| "learning_rate": 0.00013399668325041458, |
| "loss": 0.4144, |
| "step": 2012 |
| }, |
| { |
| "epoch": 1.109702315325248, |
| "grad_norm": 0.2235259711742401, |
| "learning_rate": 0.00013391376451077943, |
| "loss": 0.4028, |
| "step": 2013 |
| }, |
| { |
| "epoch": 1.1102535832414553, |
| "grad_norm": 0.2354377955198288, |
| "learning_rate": 0.00013383084577114427, |
| "loss": 0.4103, |
| "step": 2014 |
| }, |
| { |
| "epoch": 1.1108048511576627, |
| "grad_norm": 0.22363215684890747, |
| "learning_rate": 0.00013374792703150913, |
| "loss": 0.3962, |
| "step": 2015 |
| }, |
| { |
| "epoch": 1.1113561190738699, |
| "grad_norm": 0.22264409065246582, |
| "learning_rate": 0.00013366500829187396, |
| "loss": 0.3818, |
| "step": 2016 |
| }, |
| { |
| "epoch": 1.111907386990077, |
| "grad_norm": 0.22731584310531616, |
| "learning_rate": 0.0001335820895522388, |
| "loss": 0.4013, |
| "step": 2017 |
| }, |
| { |
| "epoch": 1.1124586549062845, |
| "grad_norm": 0.22340711951255798, |
| "learning_rate": 0.00013349917081260362, |
| "loss": 0.3734, |
| "step": 2018 |
| }, |
| { |
| "epoch": 1.1130099228224917, |
| "grad_norm": 0.23701246082782745, |
| "learning_rate": 0.00013341625207296848, |
| "loss": 0.3943, |
| "step": 2019 |
| }, |
| { |
| "epoch": 1.113561190738699, |
| "grad_norm": 0.22929784655570984, |
| "learning_rate": 0.0001333333333333333, |
| "loss": 0.3848, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.1141124586549063, |
| "grad_norm": 0.24790272116661072, |
| "learning_rate": 0.00013325041459369814, |
| "loss": 0.4047, |
| "step": 2021 |
| }, |
| { |
| "epoch": 1.1146637265711137, |
| "grad_norm": 0.22452253103256226, |
| "learning_rate": 0.000133167495854063, |
| "loss": 0.385, |
| "step": 2022 |
| }, |
| { |
| "epoch": 1.1152149944873209, |
| "grad_norm": 0.23337581753730774, |
| "learning_rate": 0.00013308457711442784, |
| "loss": 0.3791, |
| "step": 2023 |
| }, |
| { |
| "epoch": 1.115766262403528, |
| "grad_norm": 0.23171287775039673, |
| "learning_rate": 0.0001330016583747927, |
| "loss": 0.3885, |
| "step": 2024 |
| }, |
| { |
| "epoch": 1.1163175303197355, |
| "grad_norm": 0.24028973281383514, |
| "learning_rate": 0.00013291873963515753, |
| "loss": 0.4071, |
| "step": 2025 |
| }, |
| { |
| "epoch": 1.1168687982359427, |
| "grad_norm": 0.23416177928447723, |
| "learning_rate": 0.00013283582089552236, |
| "loss": 0.3815, |
| "step": 2026 |
| }, |
| { |
| "epoch": 1.11742006615215, |
| "grad_norm": 0.2444845736026764, |
| "learning_rate": 0.00013275290215588722, |
| "loss": 0.4048, |
| "step": 2027 |
| }, |
| { |
| "epoch": 1.1179713340683572, |
| "grad_norm": 0.23157843947410583, |
| "learning_rate": 0.00013266998341625205, |
| "loss": 0.402, |
| "step": 2028 |
| }, |
| { |
| "epoch": 1.1185226019845644, |
| "grad_norm": 0.24158456921577454, |
| "learning_rate": 0.0001325870646766169, |
| "loss": 0.3821, |
| "step": 2029 |
| }, |
| { |
| "epoch": 1.1190738699007718, |
| "grad_norm": 0.23520436882972717, |
| "learning_rate": 0.00013250414593698174, |
| "loss": 0.3848, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.119625137816979, |
| "grad_norm": 0.2458154559135437, |
| "learning_rate": 0.00013242122719734657, |
| "loss": 0.3926, |
| "step": 2031 |
| }, |
| { |
| "epoch": 1.1201764057331864, |
| "grad_norm": 0.2308206707239151, |
| "learning_rate": 0.00013233830845771143, |
| "loss": 0.3982, |
| "step": 2032 |
| }, |
| { |
| "epoch": 1.1207276736493936, |
| "grad_norm": 0.23016606271266937, |
| "learning_rate": 0.00013225538971807626, |
| "loss": 0.3936, |
| "step": 2033 |
| }, |
| { |
| "epoch": 1.1212789415656008, |
| "grad_norm": 0.24838510155677795, |
| "learning_rate": 0.00013217247097844112, |
| "loss": 0.4081, |
| "step": 2034 |
| }, |
| { |
| "epoch": 1.1218302094818082, |
| "grad_norm": 0.2287745475769043, |
| "learning_rate": 0.00013208955223880596, |
| "loss": 0.371, |
| "step": 2035 |
| }, |
| { |
| "epoch": 1.1223814773980154, |
| "grad_norm": 0.23816218972206116, |
| "learning_rate": 0.0001320066334991708, |
| "loss": 0.3952, |
| "step": 2036 |
| }, |
| { |
| "epoch": 1.1229327453142228, |
| "grad_norm": 0.2324012964963913, |
| "learning_rate": 0.00013192371475953565, |
| "loss": 0.3861, |
| "step": 2037 |
| }, |
| { |
| "epoch": 1.12348401323043, |
| "grad_norm": 0.23907962441444397, |
| "learning_rate": 0.00013184079601990048, |
| "loss": 0.3927, |
| "step": 2038 |
| }, |
| { |
| "epoch": 1.1240352811466372, |
| "grad_norm": 0.2464779168367386, |
| "learning_rate": 0.00013175787728026534, |
| "loss": 0.4246, |
| "step": 2039 |
| }, |
| { |
| "epoch": 1.1245865490628446, |
| "grad_norm": 0.23501858115196228, |
| "learning_rate": 0.00013167495854063017, |
| "loss": 0.3918, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.1251378169790518, |
| "grad_norm": 0.2514742314815521, |
| "learning_rate": 0.000131592039800995, |
| "loss": 0.3828, |
| "step": 2041 |
| }, |
| { |
| "epoch": 1.1256890848952592, |
| "grad_norm": 0.25326284766197205, |
| "learning_rate": 0.00013150912106135986, |
| "loss": 0.4042, |
| "step": 2042 |
| }, |
| { |
| "epoch": 1.1262403528114664, |
| "grad_norm": 0.23037280142307281, |
| "learning_rate": 0.0001314262023217247, |
| "loss": 0.3919, |
| "step": 2043 |
| }, |
| { |
| "epoch": 1.1267916207276736, |
| "grad_norm": 0.241755872964859, |
| "learning_rate": 0.00013134328358208955, |
| "loss": 0.3867, |
| "step": 2044 |
| }, |
| { |
| "epoch": 1.127342888643881, |
| "grad_norm": 0.27031564712524414, |
| "learning_rate": 0.00013126036484245438, |
| "loss": 0.3767, |
| "step": 2045 |
| }, |
| { |
| "epoch": 1.1278941565600882, |
| "grad_norm": 0.24623173475265503, |
| "learning_rate": 0.00013117744610281922, |
| "loss": 0.4077, |
| "step": 2046 |
| }, |
| { |
| "epoch": 1.1284454244762956, |
| "grad_norm": 0.24347223341464996, |
| "learning_rate": 0.00013109452736318408, |
| "loss": 0.3846, |
| "step": 2047 |
| }, |
| { |
| "epoch": 1.1289966923925028, |
| "grad_norm": 0.24663501977920532, |
| "learning_rate": 0.0001310116086235489, |
| "loss": 0.3992, |
| "step": 2048 |
| }, |
| { |
| "epoch": 1.12954796030871, |
| "grad_norm": 0.23556159436702728, |
| "learning_rate": 0.00013092868988391377, |
| "loss": 0.3949, |
| "step": 2049 |
| }, |
| { |
| "epoch": 1.1300992282249174, |
| "grad_norm": 0.21868300437927246, |
| "learning_rate": 0.0001308457711442786, |
| "loss": 0.3824, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.1306504961411246, |
| "grad_norm": 0.23438437283039093, |
| "learning_rate": 0.00013076285240464343, |
| "loss": 0.3801, |
| "step": 2051 |
| }, |
| { |
| "epoch": 1.131201764057332, |
| "grad_norm": 0.22960849106311798, |
| "learning_rate": 0.0001306799336650083, |
| "loss": 0.4088, |
| "step": 2052 |
| }, |
| { |
| "epoch": 1.1317530319735392, |
| "grad_norm": 0.240730881690979, |
| "learning_rate": 0.00013059701492537312, |
| "loss": 0.3644, |
| "step": 2053 |
| }, |
| { |
| "epoch": 1.1323042998897463, |
| "grad_norm": 0.2219470739364624, |
| "learning_rate": 0.00013051409618573798, |
| "loss": 0.3817, |
| "step": 2054 |
| }, |
| { |
| "epoch": 1.1328555678059538, |
| "grad_norm": 0.22481395304203033, |
| "learning_rate": 0.0001304311774461028, |
| "loss": 0.3858, |
| "step": 2055 |
| }, |
| { |
| "epoch": 1.133406835722161, |
| "grad_norm": 0.24147982895374298, |
| "learning_rate": 0.00013034825870646765, |
| "loss": 0.3977, |
| "step": 2056 |
| }, |
| { |
| "epoch": 1.1339581036383684, |
| "grad_norm": 0.2390933483839035, |
| "learning_rate": 0.0001302653399668325, |
| "loss": 0.3985, |
| "step": 2057 |
| }, |
| { |
| "epoch": 1.1345093715545755, |
| "grad_norm": 0.24776338040828705, |
| "learning_rate": 0.00013018242122719734, |
| "loss": 0.4026, |
| "step": 2058 |
| }, |
| { |
| "epoch": 1.1350606394707827, |
| "grad_norm": 0.23255294561386108, |
| "learning_rate": 0.0001300995024875622, |
| "loss": 0.3975, |
| "step": 2059 |
| }, |
| { |
| "epoch": 1.1356119073869901, |
| "grad_norm": 0.2401493936777115, |
| "learning_rate": 0.00013001658374792703, |
| "loss": 0.3924, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.1361631753031973, |
| "grad_norm": 0.2360658049583435, |
| "learning_rate": 0.00012993366500829186, |
| "loss": 0.3835, |
| "step": 2061 |
| }, |
| { |
| "epoch": 1.1367144432194047, |
| "grad_norm": 0.24272675812244415, |
| "learning_rate": 0.0001298507462686567, |
| "loss": 0.3816, |
| "step": 2062 |
| }, |
| { |
| "epoch": 1.137265711135612, |
| "grad_norm": 0.2370130568742752, |
| "learning_rate": 0.00012976782752902155, |
| "loss": 0.3807, |
| "step": 2063 |
| }, |
| { |
| "epoch": 1.137816979051819, |
| "grad_norm": 0.22449509799480438, |
| "learning_rate": 0.00012968490878938638, |
| "loss": 0.3857, |
| "step": 2064 |
| }, |
| { |
| "epoch": 1.1383682469680265, |
| "grad_norm": 0.2332579791545868, |
| "learning_rate": 0.00012960199004975121, |
| "loss": 0.3882, |
| "step": 2065 |
| }, |
| { |
| "epoch": 1.1389195148842337, |
| "grad_norm": 0.23922313749790192, |
| "learning_rate": 0.00012951907131011607, |
| "loss": 0.3924, |
| "step": 2066 |
| }, |
| { |
| "epoch": 1.1394707828004411, |
| "grad_norm": 0.23937387764453888, |
| "learning_rate": 0.0001294361525704809, |
| "loss": 0.3982, |
| "step": 2067 |
| }, |
| { |
| "epoch": 1.1400220507166483, |
| "grad_norm": 0.23198926448822021, |
| "learning_rate": 0.00012935323383084577, |
| "loss": 0.3971, |
| "step": 2068 |
| }, |
| { |
| "epoch": 1.1405733186328555, |
| "grad_norm": 0.23774142563343048, |
| "learning_rate": 0.0001292703150912106, |
| "loss": 0.419, |
| "step": 2069 |
| }, |
| { |
| "epoch": 1.141124586549063, |
| "grad_norm": 0.23457486927509308, |
| "learning_rate": 0.00012918739635157543, |
| "loss": 0.3947, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.14167585446527, |
| "grad_norm": 0.23662830889225006, |
| "learning_rate": 0.0001291044776119403, |
| "loss": 0.3989, |
| "step": 2071 |
| }, |
| { |
| "epoch": 1.1422271223814775, |
| "grad_norm": 0.2307705134153366, |
| "learning_rate": 0.00012902155887230512, |
| "loss": 0.3988, |
| "step": 2072 |
| }, |
| { |
| "epoch": 1.1427783902976847, |
| "grad_norm": 0.23430916666984558, |
| "learning_rate": 0.00012893864013266998, |
| "loss": 0.3956, |
| "step": 2073 |
| }, |
| { |
| "epoch": 1.1433296582138919, |
| "grad_norm": 0.24138319492340088, |
| "learning_rate": 0.0001288557213930348, |
| "loss": 0.4103, |
| "step": 2074 |
| }, |
| { |
| "epoch": 1.1438809261300993, |
| "grad_norm": 0.22443422675132751, |
| "learning_rate": 0.00012877280265339964, |
| "loss": 0.3839, |
| "step": 2075 |
| }, |
| { |
| "epoch": 1.1444321940463065, |
| "grad_norm": 0.2313619703054428, |
| "learning_rate": 0.0001286898839137645, |
| "loss": 0.4063, |
| "step": 2076 |
| }, |
| { |
| "epoch": 1.1449834619625139, |
| "grad_norm": 0.22947578132152557, |
| "learning_rate": 0.00012860696517412933, |
| "loss": 0.3852, |
| "step": 2077 |
| }, |
| { |
| "epoch": 1.145534729878721, |
| "grad_norm": 0.2276720404624939, |
| "learning_rate": 0.0001285240464344942, |
| "loss": 0.3968, |
| "step": 2078 |
| }, |
| { |
| "epoch": 1.1460859977949283, |
| "grad_norm": 0.22463871538639069, |
| "learning_rate": 0.00012844112769485903, |
| "loss": 0.3904, |
| "step": 2079 |
| }, |
| { |
| "epoch": 1.1466372657111357, |
| "grad_norm": 0.22553198039531708, |
| "learning_rate": 0.00012835820895522386, |
| "loss": 0.3902, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.1471885336273429, |
| "grad_norm": 0.23410287499427795, |
| "learning_rate": 0.00012827529021558872, |
| "loss": 0.3952, |
| "step": 2081 |
| }, |
| { |
| "epoch": 1.1477398015435503, |
| "grad_norm": 0.2365550547838211, |
| "learning_rate": 0.00012819237147595355, |
| "loss": 0.3907, |
| "step": 2082 |
| }, |
| { |
| "epoch": 1.1482910694597575, |
| "grad_norm": 0.22853030264377594, |
| "learning_rate": 0.0001281094527363184, |
| "loss": 0.4041, |
| "step": 2083 |
| }, |
| { |
| "epoch": 1.1488423373759646, |
| "grad_norm": 0.23059257864952087, |
| "learning_rate": 0.00012802653399668324, |
| "loss": 0.4047, |
| "step": 2084 |
| }, |
| { |
| "epoch": 1.149393605292172, |
| "grad_norm": 0.23414267599582672, |
| "learning_rate": 0.00012794361525704807, |
| "loss": 0.4077, |
| "step": 2085 |
| }, |
| { |
| "epoch": 1.1499448732083792, |
| "grad_norm": 0.23295001685619354, |
| "learning_rate": 0.00012786069651741293, |
| "loss": 0.3942, |
| "step": 2086 |
| }, |
| { |
| "epoch": 1.1504961411245866, |
| "grad_norm": 0.23734460771083832, |
| "learning_rate": 0.00012777777777777776, |
| "loss": 0.4074, |
| "step": 2087 |
| }, |
| { |
| "epoch": 1.1510474090407938, |
| "grad_norm": 0.21490591764450073, |
| "learning_rate": 0.00012769485903814262, |
| "loss": 0.3747, |
| "step": 2088 |
| }, |
| { |
| "epoch": 1.151598676957001, |
| "grad_norm": 0.22734799981117249, |
| "learning_rate": 0.00012761194029850745, |
| "loss": 0.3836, |
| "step": 2089 |
| }, |
| { |
| "epoch": 1.1521499448732084, |
| "grad_norm": 0.22835008800029755, |
| "learning_rate": 0.0001275290215588723, |
| "loss": 0.3983, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.1527012127894156, |
| "grad_norm": 0.2260267287492752, |
| "learning_rate": 0.00012744610281923715, |
| "loss": 0.3785, |
| "step": 2091 |
| }, |
| { |
| "epoch": 1.153252480705623, |
| "grad_norm": 0.22667206823825836, |
| "learning_rate": 0.00012736318407960198, |
| "loss": 0.3945, |
| "step": 2092 |
| }, |
| { |
| "epoch": 1.1538037486218302, |
| "grad_norm": 0.23218148946762085, |
| "learning_rate": 0.00012728026533996684, |
| "loss": 0.3967, |
| "step": 2093 |
| }, |
| { |
| "epoch": 1.1543550165380374, |
| "grad_norm": 0.24123932421207428, |
| "learning_rate": 0.00012719734660033167, |
| "loss": 0.3994, |
| "step": 2094 |
| }, |
| { |
| "epoch": 1.1549062844542448, |
| "grad_norm": 0.23074567317962646, |
| "learning_rate": 0.0001271144278606965, |
| "loss": 0.405, |
| "step": 2095 |
| }, |
| { |
| "epoch": 1.155457552370452, |
| "grad_norm": 0.23828662931919098, |
| "learning_rate": 0.00012703150912106136, |
| "loss": 0.3886, |
| "step": 2096 |
| }, |
| { |
| "epoch": 1.1560088202866594, |
| "grad_norm": 0.22315117716789246, |
| "learning_rate": 0.0001269485903814262, |
| "loss": 0.3925, |
| "step": 2097 |
| }, |
| { |
| "epoch": 1.1565600882028666, |
| "grad_norm": 0.22071965038776398, |
| "learning_rate": 0.00012686567164179105, |
| "loss": 0.3997, |
| "step": 2098 |
| }, |
| { |
| "epoch": 1.1571113561190738, |
| "grad_norm": 0.22145338356494904, |
| "learning_rate": 0.00012678275290215588, |
| "loss": 0.3784, |
| "step": 2099 |
| }, |
| { |
| "epoch": 1.1576626240352812, |
| "grad_norm": 0.2308942675590515, |
| "learning_rate": 0.00012669983416252072, |
| "loss": 0.3576, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.1582138919514884, |
| "grad_norm": 0.2193097174167633, |
| "learning_rate": 0.00012661691542288557, |
| "loss": 0.3806, |
| "step": 2101 |
| }, |
| { |
| "epoch": 1.1587651598676958, |
| "grad_norm": 0.2277258038520813, |
| "learning_rate": 0.0001265339966832504, |
| "loss": 0.389, |
| "step": 2102 |
| }, |
| { |
| "epoch": 1.159316427783903, |
| "grad_norm": 0.22830741107463837, |
| "learning_rate": 0.00012645107794361527, |
| "loss": 0.4132, |
| "step": 2103 |
| }, |
| { |
| "epoch": 1.1598676957001102, |
| "grad_norm": 0.22856192290782928, |
| "learning_rate": 0.0001263681592039801, |
| "loss": 0.3879, |
| "step": 2104 |
| }, |
| { |
| "epoch": 1.1604189636163176, |
| "grad_norm": 0.23155651986598969, |
| "learning_rate": 0.00012628524046434493, |
| "loss": 0.3902, |
| "step": 2105 |
| }, |
| { |
| "epoch": 1.1609702315325248, |
| "grad_norm": 0.22571994364261627, |
| "learning_rate": 0.00012620232172470976, |
| "loss": 0.4017, |
| "step": 2106 |
| }, |
| { |
| "epoch": 1.1615214994487322, |
| "grad_norm": 0.2258533239364624, |
| "learning_rate": 0.00012611940298507462, |
| "loss": 0.4027, |
| "step": 2107 |
| }, |
| { |
| "epoch": 1.1620727673649394, |
| "grad_norm": 0.24114197492599487, |
| "learning_rate": 0.00012603648424543945, |
| "loss": 0.3983, |
| "step": 2108 |
| }, |
| { |
| "epoch": 1.1626240352811466, |
| "grad_norm": 0.22286631166934967, |
| "learning_rate": 0.00012595356550580429, |
| "loss": 0.4026, |
| "step": 2109 |
| }, |
| { |
| "epoch": 1.163175303197354, |
| "grad_norm": 0.2404211014509201, |
| "learning_rate": 0.00012587064676616914, |
| "loss": 0.4082, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.1637265711135611, |
| "grad_norm": 0.22578535974025726, |
| "learning_rate": 0.00012578772802653398, |
| "loss": 0.3881, |
| "step": 2111 |
| }, |
| { |
| "epoch": 1.1642778390297686, |
| "grad_norm": 0.24066035449504852, |
| "learning_rate": 0.00012570480928689884, |
| "loss": 0.4144, |
| "step": 2112 |
| }, |
| { |
| "epoch": 1.1648291069459757, |
| "grad_norm": 0.22703833878040314, |
| "learning_rate": 0.00012562189054726367, |
| "loss": 0.3942, |
| "step": 2113 |
| }, |
| { |
| "epoch": 1.165380374862183, |
| "grad_norm": 0.2277577817440033, |
| "learning_rate": 0.0001255389718076285, |
| "loss": 0.4116, |
| "step": 2114 |
| }, |
| { |
| "epoch": 1.1659316427783903, |
| "grad_norm": 0.2201533019542694, |
| "learning_rate": 0.00012545605306799336, |
| "loss": 0.3961, |
| "step": 2115 |
| }, |
| { |
| "epoch": 1.1664829106945975, |
| "grad_norm": 0.22969132661819458, |
| "learning_rate": 0.0001253731343283582, |
| "loss": 0.4146, |
| "step": 2116 |
| }, |
| { |
| "epoch": 1.167034178610805, |
| "grad_norm": 0.2208871990442276, |
| "learning_rate": 0.00012529021558872305, |
| "loss": 0.3925, |
| "step": 2117 |
| }, |
| { |
| "epoch": 1.1675854465270121, |
| "grad_norm": 0.24675814807415009, |
| "learning_rate": 0.00012520729684908788, |
| "loss": 0.3923, |
| "step": 2118 |
| }, |
| { |
| "epoch": 1.1681367144432193, |
| "grad_norm": 0.25365886092185974, |
| "learning_rate": 0.00012512437810945271, |
| "loss": 0.4018, |
| "step": 2119 |
| }, |
| { |
| "epoch": 1.1686879823594267, |
| "grad_norm": 0.2352716475725174, |
| "learning_rate": 0.00012504145936981757, |
| "loss": 0.4136, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.169239250275634, |
| "grad_norm": 0.22656375169754028, |
| "learning_rate": 0.0001249585406301824, |
| "loss": 0.3896, |
| "step": 2121 |
| }, |
| { |
| "epoch": 1.1697905181918413, |
| "grad_norm": 0.22290179133415222, |
| "learning_rate": 0.00012487562189054724, |
| "loss": 0.4059, |
| "step": 2122 |
| }, |
| { |
| "epoch": 1.1703417861080485, |
| "grad_norm": 0.24139589071273804, |
| "learning_rate": 0.0001247927031509121, |
| "loss": 0.3999, |
| "step": 2123 |
| }, |
| { |
| "epoch": 1.1708930540242557, |
| "grad_norm": 0.24391639232635498, |
| "learning_rate": 0.00012470978441127693, |
| "loss": 0.3876, |
| "step": 2124 |
| }, |
| { |
| "epoch": 1.171444321940463, |
| "grad_norm": 0.2283831685781479, |
| "learning_rate": 0.0001246268656716418, |
| "loss": 0.3988, |
| "step": 2125 |
| }, |
| { |
| "epoch": 1.1719955898566703, |
| "grad_norm": 0.24799783527851105, |
| "learning_rate": 0.00012454394693200662, |
| "loss": 0.396, |
| "step": 2126 |
| }, |
| { |
| "epoch": 1.1725468577728777, |
| "grad_norm": 0.22174561023712158, |
| "learning_rate": 0.00012446102819237145, |
| "loss": 0.3809, |
| "step": 2127 |
| }, |
| { |
| "epoch": 1.173098125689085, |
| "grad_norm": 0.22951188683509827, |
| "learning_rate": 0.0001243781094527363, |
| "loss": 0.3882, |
| "step": 2128 |
| }, |
| { |
| "epoch": 1.173649393605292, |
| "grad_norm": 0.21973788738250732, |
| "learning_rate": 0.00012429519071310114, |
| "loss": 0.3872, |
| "step": 2129 |
| }, |
| { |
| "epoch": 1.1742006615214995, |
| "grad_norm": 0.22701437771320343, |
| "learning_rate": 0.000124212271973466, |
| "loss": 0.3876, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.1747519294377067, |
| "grad_norm": 0.22394593060016632, |
| "learning_rate": 0.00012412935323383083, |
| "loss": 0.3874, |
| "step": 2131 |
| }, |
| { |
| "epoch": 1.175303197353914, |
| "grad_norm": 0.24040114879608154, |
| "learning_rate": 0.00012404643449419567, |
| "loss": 0.3856, |
| "step": 2132 |
| }, |
| { |
| "epoch": 1.1758544652701213, |
| "grad_norm": 0.2295607030391693, |
| "learning_rate": 0.00012396351575456052, |
| "loss": 0.3861, |
| "step": 2133 |
| }, |
| { |
| "epoch": 1.1764057331863285, |
| "grad_norm": 0.229506716132164, |
| "learning_rate": 0.00012388059701492536, |
| "loss": 0.3877, |
| "step": 2134 |
| }, |
| { |
| "epoch": 1.1769570011025359, |
| "grad_norm": 0.24226558208465576, |
| "learning_rate": 0.00012379767827529022, |
| "loss": 0.4051, |
| "step": 2135 |
| }, |
| { |
| "epoch": 1.177508269018743, |
| "grad_norm": 0.23359960317611694, |
| "learning_rate": 0.00012371475953565505, |
| "loss": 0.3911, |
| "step": 2136 |
| }, |
| { |
| "epoch": 1.1780595369349505, |
| "grad_norm": 0.24533167481422424, |
| "learning_rate": 0.00012363184079601988, |
| "loss": 0.4075, |
| "step": 2137 |
| }, |
| { |
| "epoch": 1.1786108048511577, |
| "grad_norm": 0.22445149719715118, |
| "learning_rate": 0.00012354892205638474, |
| "loss": 0.3762, |
| "step": 2138 |
| }, |
| { |
| "epoch": 1.1791620727673648, |
| "grad_norm": 0.2399044781923294, |
| "learning_rate": 0.00012346600331674957, |
| "loss": 0.375, |
| "step": 2139 |
| }, |
| { |
| "epoch": 1.1797133406835723, |
| "grad_norm": 0.2472797930240631, |
| "learning_rate": 0.00012338308457711443, |
| "loss": 0.4036, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.1802646085997794, |
| "grad_norm": 0.2297624945640564, |
| "learning_rate": 0.00012330016583747926, |
| "loss": 0.4154, |
| "step": 2141 |
| }, |
| { |
| "epoch": 1.1808158765159869, |
| "grad_norm": 0.23524117469787598, |
| "learning_rate": 0.0001232172470978441, |
| "loss": 0.3879, |
| "step": 2142 |
| }, |
| { |
| "epoch": 1.181367144432194, |
| "grad_norm": 0.23935049772262573, |
| "learning_rate": 0.00012313432835820895, |
| "loss": 0.4107, |
| "step": 2143 |
| }, |
| { |
| "epoch": 1.1819184123484012, |
| "grad_norm": 0.21305608749389648, |
| "learning_rate": 0.00012305140961857379, |
| "loss": 0.3964, |
| "step": 2144 |
| }, |
| { |
| "epoch": 1.1824696802646086, |
| "grad_norm": 0.2339240163564682, |
| "learning_rate": 0.00012296849087893864, |
| "loss": 0.4185, |
| "step": 2145 |
| }, |
| { |
| "epoch": 1.1830209481808158, |
| "grad_norm": 0.23344539105892181, |
| "learning_rate": 0.00012288557213930348, |
| "loss": 0.3934, |
| "step": 2146 |
| }, |
| { |
| "epoch": 1.1835722160970232, |
| "grad_norm": 0.2274356484413147, |
| "learning_rate": 0.0001228026533996683, |
| "loss": 0.3854, |
| "step": 2147 |
| }, |
| { |
| "epoch": 1.1841234840132304, |
| "grad_norm": 0.23241972923278809, |
| "learning_rate": 0.00012271973466003317, |
| "loss": 0.4106, |
| "step": 2148 |
| }, |
| { |
| "epoch": 1.1846747519294376, |
| "grad_norm": 0.22595259547233582, |
| "learning_rate": 0.000122636815920398, |
| "loss": 0.401, |
| "step": 2149 |
| }, |
| { |
| "epoch": 1.185226019845645, |
| "grad_norm": 0.22598454356193542, |
| "learning_rate": 0.00012255389718076283, |
| "loss": 0.4041, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.1857772877618522, |
| "grad_norm": 0.233281672000885, |
| "learning_rate": 0.00012247097844112766, |
| "loss": 0.3763, |
| "step": 2151 |
| }, |
| { |
| "epoch": 1.1863285556780596, |
| "grad_norm": 0.22901344299316406, |
| "learning_rate": 0.00012238805970149252, |
| "loss": 0.3949, |
| "step": 2152 |
| }, |
| { |
| "epoch": 1.1868798235942668, |
| "grad_norm": 0.24648213386535645, |
| "learning_rate": 0.00012230514096185736, |
| "loss": 0.4229, |
| "step": 2153 |
| }, |
| { |
| "epoch": 1.187431091510474, |
| "grad_norm": 0.24580827355384827, |
| "learning_rate": 0.00012222222222222221, |
| "loss": 0.4125, |
| "step": 2154 |
| }, |
| { |
| "epoch": 1.1879823594266814, |
| "grad_norm": 0.23127946257591248, |
| "learning_rate": 0.00012213930348258705, |
| "loss": 0.3727, |
| "step": 2155 |
| }, |
| { |
| "epoch": 1.1885336273428886, |
| "grad_norm": 0.2267657071352005, |
| "learning_rate": 0.00012205638474295189, |
| "loss": 0.3951, |
| "step": 2156 |
| }, |
| { |
| "epoch": 1.189084895259096, |
| "grad_norm": 0.23497919738292694, |
| "learning_rate": 0.00012197346600331674, |
| "loss": 0.3721, |
| "step": 2157 |
| }, |
| { |
| "epoch": 1.1896361631753032, |
| "grad_norm": 0.22601653635501862, |
| "learning_rate": 0.00012189054726368157, |
| "loss": 0.3945, |
| "step": 2158 |
| }, |
| { |
| "epoch": 1.1901874310915104, |
| "grad_norm": 0.21945270895957947, |
| "learning_rate": 0.00012180762852404642, |
| "loss": 0.3574, |
| "step": 2159 |
| }, |
| { |
| "epoch": 1.1907386990077178, |
| "grad_norm": 0.2285127341747284, |
| "learning_rate": 0.00012172470978441126, |
| "loss": 0.3891, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.191289966923925, |
| "grad_norm": 0.23766474425792694, |
| "learning_rate": 0.0001216417910447761, |
| "loss": 0.3968, |
| "step": 2161 |
| }, |
| { |
| "epoch": 1.1918412348401324, |
| "grad_norm": 0.23863717913627625, |
| "learning_rate": 0.00012155887230514095, |
| "loss": 0.389, |
| "step": 2162 |
| }, |
| { |
| "epoch": 1.1923925027563396, |
| "grad_norm": 0.22550217807292938, |
| "learning_rate": 0.00012147595356550578, |
| "loss": 0.3842, |
| "step": 2163 |
| }, |
| { |
| "epoch": 1.1929437706725468, |
| "grad_norm": 0.22460085153579712, |
| "learning_rate": 0.00012139303482587063, |
| "loss": 0.3874, |
| "step": 2164 |
| }, |
| { |
| "epoch": 1.1934950385887542, |
| "grad_norm": 0.2168971300125122, |
| "learning_rate": 0.00012131011608623548, |
| "loss": 0.3783, |
| "step": 2165 |
| }, |
| { |
| "epoch": 1.1940463065049614, |
| "grad_norm": 0.2768751084804535, |
| "learning_rate": 0.00012122719734660032, |
| "loss": 0.4206, |
| "step": 2166 |
| }, |
| { |
| "epoch": 1.1945975744211688, |
| "grad_norm": 0.2357032299041748, |
| "learning_rate": 0.00012114427860696517, |
| "loss": 0.3943, |
| "step": 2167 |
| }, |
| { |
| "epoch": 1.195148842337376, |
| "grad_norm": 0.24314233660697937, |
| "learning_rate": 0.00012106135986733, |
| "loss": 0.3983, |
| "step": 2168 |
| }, |
| { |
| "epoch": 1.1957001102535831, |
| "grad_norm": 0.2605820596218109, |
| "learning_rate": 0.00012097844112769484, |
| "loss": 0.4036, |
| "step": 2169 |
| }, |
| { |
| "epoch": 1.1962513781697905, |
| "grad_norm": 0.22138415277004242, |
| "learning_rate": 0.00012089552238805969, |
| "loss": 0.3794, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.1968026460859977, |
| "grad_norm": 0.2328760325908661, |
| "learning_rate": 0.00012081260364842454, |
| "loss": 0.3948, |
| "step": 2171 |
| }, |
| { |
| "epoch": 1.1973539140022051, |
| "grad_norm": 0.22606134414672852, |
| "learning_rate": 0.00012072968490878938, |
| "loss": 0.3958, |
| "step": 2172 |
| }, |
| { |
| "epoch": 1.1979051819184123, |
| "grad_norm": 0.25683924555778503, |
| "learning_rate": 0.00012064676616915421, |
| "loss": 0.3939, |
| "step": 2173 |
| }, |
| { |
| "epoch": 1.1984564498346195, |
| "grad_norm": 0.22325700521469116, |
| "learning_rate": 0.00012056384742951906, |
| "loss": 0.3915, |
| "step": 2174 |
| }, |
| { |
| "epoch": 1.199007717750827, |
| "grad_norm": 0.21337918937206268, |
| "learning_rate": 0.0001204809286898839, |
| "loss": 0.3699, |
| "step": 2175 |
| }, |
| { |
| "epoch": 1.1995589856670341, |
| "grad_norm": 0.2343214452266693, |
| "learning_rate": 0.00012039800995024875, |
| "loss": 0.4029, |
| "step": 2176 |
| }, |
| { |
| "epoch": 1.2001102535832415, |
| "grad_norm": 0.2408185601234436, |
| "learning_rate": 0.0001203150912106136, |
| "loss": 0.3915, |
| "step": 2177 |
| }, |
| { |
| "epoch": 1.2006615214994487, |
| "grad_norm": 0.2592547535896301, |
| "learning_rate": 0.00012023217247097843, |
| "loss": 0.409, |
| "step": 2178 |
| }, |
| { |
| "epoch": 1.201212789415656, |
| "grad_norm": 0.2201685607433319, |
| "learning_rate": 0.00012014925373134327, |
| "loss": 0.381, |
| "step": 2179 |
| }, |
| { |
| "epoch": 1.2017640573318633, |
| "grad_norm": 0.23619139194488525, |
| "learning_rate": 0.00012006633499170812, |
| "loss": 0.3708, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.2023153252480705, |
| "grad_norm": 0.24719634652137756, |
| "learning_rate": 0.00011998341625207296, |
| "loss": 0.3996, |
| "step": 2181 |
| }, |
| { |
| "epoch": 1.202866593164278, |
| "grad_norm": 0.24691031873226166, |
| "learning_rate": 0.00011990049751243781, |
| "loss": 0.3897, |
| "step": 2182 |
| }, |
| { |
| "epoch": 1.203417861080485, |
| "grad_norm": 0.2518804967403412, |
| "learning_rate": 0.00011981757877280264, |
| "loss": 0.3886, |
| "step": 2183 |
| }, |
| { |
| "epoch": 1.2039691289966923, |
| "grad_norm": 0.2279016375541687, |
| "learning_rate": 0.00011973466003316749, |
| "loss": 0.3791, |
| "step": 2184 |
| }, |
| { |
| "epoch": 1.2045203969128997, |
| "grad_norm": 0.24580788612365723, |
| "learning_rate": 0.00011965174129353233, |
| "loss": 0.4013, |
| "step": 2185 |
| }, |
| { |
| "epoch": 1.2050716648291069, |
| "grad_norm": 0.2422635406255722, |
| "learning_rate": 0.00011956882255389718, |
| "loss": 0.3831, |
| "step": 2186 |
| }, |
| { |
| "epoch": 1.2056229327453143, |
| "grad_norm": 0.24743367731571198, |
| "learning_rate": 0.00011948590381426202, |
| "loss": 0.3939, |
| "step": 2187 |
| }, |
| { |
| "epoch": 1.2061742006615215, |
| "grad_norm": 0.24504512548446655, |
| "learning_rate": 0.00011940298507462686, |
| "loss": 0.3976, |
| "step": 2188 |
| }, |
| { |
| "epoch": 1.2067254685777287, |
| "grad_norm": 0.2121214121580124, |
| "learning_rate": 0.0001193200663349917, |
| "loss": 0.3692, |
| "step": 2189 |
| }, |
| { |
| "epoch": 1.207276736493936, |
| "grad_norm": 0.23639699816703796, |
| "learning_rate": 0.00011923714759535655, |
| "loss": 0.3999, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.2078280044101433, |
| "grad_norm": 0.2503402531147003, |
| "learning_rate": 0.00011915422885572139, |
| "loss": 0.3807, |
| "step": 2191 |
| }, |
| { |
| "epoch": 1.2083792723263507, |
| "grad_norm": 0.2412857562303543, |
| "learning_rate": 0.00011907131011608624, |
| "loss": 0.397, |
| "step": 2192 |
| }, |
| { |
| "epoch": 1.2089305402425579, |
| "grad_norm": 0.2293364554643631, |
| "learning_rate": 0.00011898839137645107, |
| "loss": 0.3752, |
| "step": 2193 |
| }, |
| { |
| "epoch": 1.209481808158765, |
| "grad_norm": 0.23062635958194733, |
| "learning_rate": 0.00011890547263681592, |
| "loss": 0.3779, |
| "step": 2194 |
| }, |
| { |
| "epoch": 1.2100330760749725, |
| "grad_norm": 0.23140175640583038, |
| "learning_rate": 0.00011882255389718075, |
| "loss": 0.3763, |
| "step": 2195 |
| }, |
| { |
| "epoch": 1.2105843439911796, |
| "grad_norm": 0.23366335034370422, |
| "learning_rate": 0.0001187396351575456, |
| "loss": 0.3959, |
| "step": 2196 |
| }, |
| { |
| "epoch": 1.211135611907387, |
| "grad_norm": 0.2382514774799347, |
| "learning_rate": 0.00011865671641791043, |
| "loss": 0.3876, |
| "step": 2197 |
| }, |
| { |
| "epoch": 1.2116868798235942, |
| "grad_norm": 0.23558002710342407, |
| "learning_rate": 0.00011857379767827527, |
| "loss": 0.4032, |
| "step": 2198 |
| }, |
| { |
| "epoch": 1.2122381477398014, |
| "grad_norm": 0.23793788254261017, |
| "learning_rate": 0.00011849087893864012, |
| "loss": 0.3909, |
| "step": 2199 |
| }, |
| { |
| "epoch": 1.2127894156560088, |
| "grad_norm": 0.2181142121553421, |
| "learning_rate": 0.00011840796019900496, |
| "loss": 0.3923, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.213340683572216, |
| "grad_norm": 0.21802657842636108, |
| "learning_rate": 0.00011832504145936981, |
| "loss": 0.3795, |
| "step": 2201 |
| }, |
| { |
| "epoch": 1.2138919514884234, |
| "grad_norm": 0.2436913102865219, |
| "learning_rate": 0.00011824212271973464, |
| "loss": 0.3985, |
| "step": 2202 |
| }, |
| { |
| "epoch": 1.2144432194046306, |
| "grad_norm": 0.22913113236427307, |
| "learning_rate": 0.00011815920398009949, |
| "loss": 0.3872, |
| "step": 2203 |
| }, |
| { |
| "epoch": 1.2149944873208378, |
| "grad_norm": 0.2223367691040039, |
| "learning_rate": 0.00011807628524046433, |
| "loss": 0.3905, |
| "step": 2204 |
| }, |
| { |
| "epoch": 1.2155457552370452, |
| "grad_norm": 0.23263731598854065, |
| "learning_rate": 0.00011799336650082918, |
| "loss": 0.4048, |
| "step": 2205 |
| }, |
| { |
| "epoch": 1.2160970231532524, |
| "grad_norm": 0.2505498230457306, |
| "learning_rate": 0.00011791044776119402, |
| "loss": 0.395, |
| "step": 2206 |
| }, |
| { |
| "epoch": 1.2166482910694598, |
| "grad_norm": 0.2553291916847229, |
| "learning_rate": 0.00011782752902155885, |
| "loss": 0.3935, |
| "step": 2207 |
| }, |
| { |
| "epoch": 1.217199558985667, |
| "grad_norm": 0.22239425778388977, |
| "learning_rate": 0.0001177446102819237, |
| "loss": 0.381, |
| "step": 2208 |
| }, |
| { |
| "epoch": 1.2177508269018742, |
| "grad_norm": 0.21807150542736053, |
| "learning_rate": 0.00011766169154228855, |
| "loss": 0.3878, |
| "step": 2209 |
| }, |
| { |
| "epoch": 1.2183020948180816, |
| "grad_norm": 0.23478740453720093, |
| "learning_rate": 0.00011757877280265339, |
| "loss": 0.3815, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.2188533627342888, |
| "grad_norm": 0.23702913522720337, |
| "learning_rate": 0.00011749585406301822, |
| "loss": 0.4001, |
| "step": 2211 |
| }, |
| { |
| "epoch": 1.2194046306504962, |
| "grad_norm": 0.23261341452598572, |
| "learning_rate": 0.00011741293532338307, |
| "loss": 0.3935, |
| "step": 2212 |
| }, |
| { |
| "epoch": 1.2199558985667034, |
| "grad_norm": 0.22314967215061188, |
| "learning_rate": 0.00011733001658374791, |
| "loss": 0.4048, |
| "step": 2213 |
| }, |
| { |
| "epoch": 1.2205071664829106, |
| "grad_norm": 0.23277883231639862, |
| "learning_rate": 0.00011724709784411276, |
| "loss": 0.3739, |
| "step": 2214 |
| }, |
| { |
| "epoch": 1.221058434399118, |
| "grad_norm": 0.24505817890167236, |
| "learning_rate": 0.0001171641791044776, |
| "loss": 0.3922, |
| "step": 2215 |
| }, |
| { |
| "epoch": 1.2216097023153252, |
| "grad_norm": 0.24386508762836456, |
| "learning_rate": 0.00011708126036484244, |
| "loss": 0.3872, |
| "step": 2216 |
| }, |
| { |
| "epoch": 1.2221609702315326, |
| "grad_norm": 0.2437102198600769, |
| "learning_rate": 0.00011699834162520728, |
| "loss": 0.4048, |
| "step": 2217 |
| }, |
| { |
| "epoch": 1.2227122381477398, |
| "grad_norm": 0.22707347571849823, |
| "learning_rate": 0.00011691542288557213, |
| "loss": 0.3996, |
| "step": 2218 |
| }, |
| { |
| "epoch": 1.223263506063947, |
| "grad_norm": 0.23951935768127441, |
| "learning_rate": 0.00011683250414593697, |
| "loss": 0.399, |
| "step": 2219 |
| }, |
| { |
| "epoch": 1.2238147739801544, |
| "grad_norm": 0.27458345890045166, |
| "learning_rate": 0.00011674958540630182, |
| "loss": 0.4093, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.2243660418963616, |
| "grad_norm": 0.23940932750701904, |
| "learning_rate": 0.00011666666666666665, |
| "loss": 0.3915, |
| "step": 2221 |
| }, |
| { |
| "epoch": 1.224917309812569, |
| "grad_norm": 0.24100755155086517, |
| "learning_rate": 0.0001165837479270315, |
| "loss": 0.3915, |
| "step": 2222 |
| }, |
| { |
| "epoch": 1.2254685777287762, |
| "grad_norm": 0.2423773556947708, |
| "learning_rate": 0.00011650082918739634, |
| "loss": 0.4061, |
| "step": 2223 |
| }, |
| { |
| "epoch": 1.2260198456449833, |
| "grad_norm": 0.2552812099456787, |
| "learning_rate": 0.00011641791044776119, |
| "loss": 0.3922, |
| "step": 2224 |
| }, |
| { |
| "epoch": 1.2265711135611908, |
| "grad_norm": 0.24121615290641785, |
| "learning_rate": 0.00011633499170812603, |
| "loss": 0.3949, |
| "step": 2225 |
| }, |
| { |
| "epoch": 1.227122381477398, |
| "grad_norm": 0.24254634976387024, |
| "learning_rate": 0.00011625207296849087, |
| "loss": 0.3776, |
| "step": 2226 |
| }, |
| { |
| "epoch": 1.2276736493936053, |
| "grad_norm": 0.2757539451122284, |
| "learning_rate": 0.00011616915422885571, |
| "loss": 0.4181, |
| "step": 2227 |
| }, |
| { |
| "epoch": 1.2282249173098125, |
| "grad_norm": 0.25508221983909607, |
| "learning_rate": 0.00011608623548922056, |
| "loss": 0.4069, |
| "step": 2228 |
| }, |
| { |
| "epoch": 1.2287761852260197, |
| "grad_norm": 0.24166013300418854, |
| "learning_rate": 0.0001160033167495854, |
| "loss": 0.3848, |
| "step": 2229 |
| }, |
| { |
| "epoch": 1.2293274531422271, |
| "grad_norm": 0.23408280313014984, |
| "learning_rate": 0.00011592039800995025, |
| "loss": 0.3867, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.2298787210584343, |
| "grad_norm": 0.2366735339164734, |
| "learning_rate": 0.00011583747927031508, |
| "loss": 0.407, |
| "step": 2231 |
| }, |
| { |
| "epoch": 1.2304299889746417, |
| "grad_norm": 0.247688889503479, |
| "learning_rate": 0.00011575456053067993, |
| "loss": 0.3898, |
| "step": 2232 |
| }, |
| { |
| "epoch": 1.230981256890849, |
| "grad_norm": 0.23416852951049805, |
| "learning_rate": 0.00011567164179104477, |
| "loss": 0.3871, |
| "step": 2233 |
| }, |
| { |
| "epoch": 1.231532524807056, |
| "grad_norm": 0.243104949593544, |
| "learning_rate": 0.00011558872305140962, |
| "loss": 0.4209, |
| "step": 2234 |
| }, |
| { |
| "epoch": 1.2320837927232635, |
| "grad_norm": 0.23723013699054718, |
| "learning_rate": 0.00011550580431177446, |
| "loss": 0.3867, |
| "step": 2235 |
| }, |
| { |
| "epoch": 1.2326350606394707, |
| "grad_norm": 0.2383720874786377, |
| "learning_rate": 0.0001154228855721393, |
| "loss": 0.3861, |
| "step": 2236 |
| }, |
| { |
| "epoch": 1.2331863285556781, |
| "grad_norm": 0.25127896666526794, |
| "learning_rate": 0.00011533996683250414, |
| "loss": 0.4039, |
| "step": 2237 |
| }, |
| { |
| "epoch": 1.2337375964718853, |
| "grad_norm": 0.23529255390167236, |
| "learning_rate": 0.00011525704809286899, |
| "loss": 0.3838, |
| "step": 2238 |
| }, |
| { |
| "epoch": 1.2342888643880925, |
| "grad_norm": 0.2100450098514557, |
| "learning_rate": 0.00011517412935323382, |
| "loss": 0.3639, |
| "step": 2239 |
| }, |
| { |
| "epoch": 1.2348401323043, |
| "grad_norm": 0.24556870758533478, |
| "learning_rate": 0.00011509121061359865, |
| "loss": 0.3901, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.235391400220507, |
| "grad_norm": 0.2549160420894623, |
| "learning_rate": 0.0001150082918739635, |
| "loss": 0.3871, |
| "step": 2241 |
| }, |
| { |
| "epoch": 1.2359426681367145, |
| "grad_norm": 0.23175586760044098, |
| "learning_rate": 0.00011492537313432834, |
| "loss": 0.3886, |
| "step": 2242 |
| }, |
| { |
| "epoch": 1.2364939360529217, |
| "grad_norm": 0.2296617478132248, |
| "learning_rate": 0.00011484245439469319, |
| "loss": 0.406, |
| "step": 2243 |
| }, |
| { |
| "epoch": 1.237045203969129, |
| "grad_norm": 0.2378944754600525, |
| "learning_rate": 0.00011475953565505803, |
| "loss": 0.3949, |
| "step": 2244 |
| }, |
| { |
| "epoch": 1.2375964718853363, |
| "grad_norm": 0.23094962537288666, |
| "learning_rate": 0.00011467661691542286, |
| "loss": 0.3875, |
| "step": 2245 |
| }, |
| { |
| "epoch": 1.2381477398015435, |
| "grad_norm": 0.22399038076400757, |
| "learning_rate": 0.00011459369817578771, |
| "loss": 0.4009, |
| "step": 2246 |
| }, |
| { |
| "epoch": 1.2386990077177509, |
| "grad_norm": 0.24871258437633514, |
| "learning_rate": 0.00011451077943615256, |
| "loss": 0.3926, |
| "step": 2247 |
| }, |
| { |
| "epoch": 1.239250275633958, |
| "grad_norm": 0.23597979545593262, |
| "learning_rate": 0.0001144278606965174, |
| "loss": 0.3803, |
| "step": 2248 |
| }, |
| { |
| "epoch": 1.2398015435501655, |
| "grad_norm": 0.23361554741859436, |
| "learning_rate": 0.00011434494195688225, |
| "loss": 0.3994, |
| "step": 2249 |
| }, |
| { |
| "epoch": 1.2403528114663727, |
| "grad_norm": 0.2614096999168396, |
| "learning_rate": 0.00011426202321724708, |
| "loss": 0.3946, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.2409040793825798, |
| "grad_norm": 0.23481406271457672, |
| "learning_rate": 0.00011417910447761192, |
| "loss": 0.3981, |
| "step": 2251 |
| }, |
| { |
| "epoch": 1.2414553472987873, |
| "grad_norm": 0.21524877846240997, |
| "learning_rate": 0.00011409618573797677, |
| "loss": 0.3725, |
| "step": 2252 |
| }, |
| { |
| "epoch": 1.2420066152149944, |
| "grad_norm": 0.2307668924331665, |
| "learning_rate": 0.00011401326699834162, |
| "loss": 0.3829, |
| "step": 2253 |
| }, |
| { |
| "epoch": 1.2425578831312019, |
| "grad_norm": 0.2581194341182709, |
| "learning_rate": 0.00011393034825870646, |
| "loss": 0.3901, |
| "step": 2254 |
| }, |
| { |
| "epoch": 1.243109151047409, |
| "grad_norm": 0.235372856259346, |
| "learning_rate": 0.0001138474295190713, |
| "loss": 0.3922, |
| "step": 2255 |
| }, |
| { |
| "epoch": 1.2436604189636162, |
| "grad_norm": 0.23432569205760956, |
| "learning_rate": 0.00011376451077943614, |
| "loss": 0.3767, |
| "step": 2256 |
| }, |
| { |
| "epoch": 1.2442116868798236, |
| "grad_norm": 0.2407122552394867, |
| "learning_rate": 0.00011368159203980098, |
| "loss": 0.4207, |
| "step": 2257 |
| }, |
| { |
| "epoch": 1.2447629547960308, |
| "grad_norm": 0.25739043951034546, |
| "learning_rate": 0.00011359867330016583, |
| "loss": 0.3838, |
| "step": 2258 |
| }, |
| { |
| "epoch": 1.2453142227122382, |
| "grad_norm": 0.25240135192871094, |
| "learning_rate": 0.00011351575456053068, |
| "loss": 0.3989, |
| "step": 2259 |
| }, |
| { |
| "epoch": 1.2458654906284454, |
| "grad_norm": 0.22552815079689026, |
| "learning_rate": 0.00011343283582089551, |
| "loss": 0.3848, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.2464167585446526, |
| "grad_norm": 0.2320718765258789, |
| "learning_rate": 0.00011334991708126035, |
| "loss": 0.382, |
| "step": 2261 |
| }, |
| { |
| "epoch": 1.24696802646086, |
| "grad_norm": 0.23423726856708527, |
| "learning_rate": 0.0001132669983416252, |
| "loss": 0.3817, |
| "step": 2262 |
| }, |
| { |
| "epoch": 1.2475192943770672, |
| "grad_norm": 0.22892701625823975, |
| "learning_rate": 0.00011318407960199004, |
| "loss": 0.3858, |
| "step": 2263 |
| }, |
| { |
| "epoch": 1.2480705622932746, |
| "grad_norm": 0.23635762929916382, |
| "learning_rate": 0.00011310116086235489, |
| "loss": 0.3946, |
| "step": 2264 |
| }, |
| { |
| "epoch": 1.2486218302094818, |
| "grad_norm": 0.23909956216812134, |
| "learning_rate": 0.00011301824212271972, |
| "loss": 0.3826, |
| "step": 2265 |
| }, |
| { |
| "epoch": 1.249173098125689, |
| "grad_norm": 0.23733805119991302, |
| "learning_rate": 0.00011293532338308457, |
| "loss": 0.4215, |
| "step": 2266 |
| }, |
| { |
| "epoch": 1.2497243660418964, |
| "grad_norm": 0.2257446050643921, |
| "learning_rate": 0.00011285240464344941, |
| "loss": 0.3959, |
| "step": 2267 |
| }, |
| { |
| "epoch": 1.2502756339581036, |
| "grad_norm": 0.2394627183675766, |
| "learning_rate": 0.00011276948590381426, |
| "loss": 0.398, |
| "step": 2268 |
| }, |
| { |
| "epoch": 1.2508269018743108, |
| "grad_norm": 0.22113938629627228, |
| "learning_rate": 0.0001126865671641791, |
| "loss": 0.3837, |
| "step": 2269 |
| }, |
| { |
| "epoch": 1.2513781697905182, |
| "grad_norm": 0.22951479256153107, |
| "learning_rate": 0.00011260364842454394, |
| "loss": 0.391, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.2519294377067256, |
| "grad_norm": 0.22468437254428864, |
| "learning_rate": 0.00011252072968490878, |
| "loss": 0.3788, |
| "step": 2271 |
| }, |
| { |
| "epoch": 1.2524807056229328, |
| "grad_norm": 0.21054887771606445, |
| "learning_rate": 0.00011243781094527363, |
| "loss": 0.3891, |
| "step": 2272 |
| }, |
| { |
| "epoch": 1.25303197353914, |
| "grad_norm": 0.2274617701768875, |
| "learning_rate": 0.00011235489220563847, |
| "loss": 0.3883, |
| "step": 2273 |
| }, |
| { |
| "epoch": 1.2535832414553472, |
| "grad_norm": 0.22995011508464813, |
| "learning_rate": 0.0001122719734660033, |
| "loss": 0.3847, |
| "step": 2274 |
| }, |
| { |
| "epoch": 1.2541345093715546, |
| "grad_norm": 0.22627364099025726, |
| "learning_rate": 0.00011218905472636815, |
| "loss": 0.3924, |
| "step": 2275 |
| }, |
| { |
| "epoch": 1.254685777287762, |
| "grad_norm": 0.23559615015983582, |
| "learning_rate": 0.000112106135986733, |
| "loss": 0.3966, |
| "step": 2276 |
| }, |
| { |
| "epoch": 1.2552370452039692, |
| "grad_norm": 0.21304303407669067, |
| "learning_rate": 0.00011202321724709784, |
| "loss": 0.3624, |
| "step": 2277 |
| }, |
| { |
| "epoch": 1.2557883131201764, |
| "grad_norm": 0.241587296128273, |
| "learning_rate": 0.00011194029850746269, |
| "loss": 0.3719, |
| "step": 2278 |
| }, |
| { |
| "epoch": 1.2563395810363835, |
| "grad_norm": 0.22992491722106934, |
| "learning_rate": 0.00011185737976782752, |
| "loss": 0.4019, |
| "step": 2279 |
| }, |
| { |
| "epoch": 1.256890848952591, |
| "grad_norm": 0.2323186844587326, |
| "learning_rate": 0.00011177446102819237, |
| "loss": 0.3725, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.2574421168687984, |
| "grad_norm": 0.23510509729385376, |
| "learning_rate": 0.00011169154228855721, |
| "loss": 0.4176, |
| "step": 2281 |
| }, |
| { |
| "epoch": 1.2579933847850056, |
| "grad_norm": 0.23601877689361572, |
| "learning_rate": 0.00011160862354892206, |
| "loss": 0.4036, |
| "step": 2282 |
| }, |
| { |
| "epoch": 1.2585446527012127, |
| "grad_norm": 0.23654739558696747, |
| "learning_rate": 0.00011152570480928687, |
| "loss": 0.403, |
| "step": 2283 |
| }, |
| { |
| "epoch": 1.25909592061742, |
| "grad_norm": 0.2428976446390152, |
| "learning_rate": 0.00011144278606965172, |
| "loss": 0.3703, |
| "step": 2284 |
| }, |
| { |
| "epoch": 1.2596471885336273, |
| "grad_norm": 0.23753516376018524, |
| "learning_rate": 0.00011135986733001657, |
| "loss": 0.3979, |
| "step": 2285 |
| }, |
| { |
| "epoch": 1.2601984564498347, |
| "grad_norm": 0.2367447316646576, |
| "learning_rate": 0.00011127694859038141, |
| "loss": 0.3822, |
| "step": 2286 |
| }, |
| { |
| "epoch": 1.260749724366042, |
| "grad_norm": 0.2365788072347641, |
| "learning_rate": 0.00011119402985074626, |
| "loss": 0.389, |
| "step": 2287 |
| }, |
| { |
| "epoch": 1.2613009922822491, |
| "grad_norm": 0.22868278622627258, |
| "learning_rate": 0.00011111111111111109, |
| "loss": 0.391, |
| "step": 2288 |
| }, |
| { |
| "epoch": 1.2618522601984565, |
| "grad_norm": 0.23099401593208313, |
| "learning_rate": 0.00011102819237147593, |
| "loss": 0.3947, |
| "step": 2289 |
| }, |
| { |
| "epoch": 1.2624035281146637, |
| "grad_norm": 0.24031782150268555, |
| "learning_rate": 0.00011094527363184078, |
| "loss": 0.3839, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.2629547960308711, |
| "grad_norm": 0.2490132451057434, |
| "learning_rate": 0.00011086235489220563, |
| "loss": 0.3896, |
| "step": 2291 |
| }, |
| { |
| "epoch": 1.2635060639470783, |
| "grad_norm": 0.2366219013929367, |
| "learning_rate": 0.00011077943615257047, |
| "loss": 0.3933, |
| "step": 2292 |
| }, |
| { |
| "epoch": 1.2640573318632855, |
| "grad_norm": 0.22578656673431396, |
| "learning_rate": 0.0001106965174129353, |
| "loss": 0.3723, |
| "step": 2293 |
| }, |
| { |
| "epoch": 1.264608599779493, |
| "grad_norm": 0.23483921587467194, |
| "learning_rate": 0.00011061359867330015, |
| "loss": 0.3895, |
| "step": 2294 |
| }, |
| { |
| "epoch": 1.2651598676957, |
| "grad_norm": 0.2586977481842041, |
| "learning_rate": 0.000110530679933665, |
| "loss": 0.4042, |
| "step": 2295 |
| }, |
| { |
| "epoch": 1.2657111356119075, |
| "grad_norm": 0.23051442205905914, |
| "learning_rate": 0.00011044776119402984, |
| "loss": 0.3862, |
| "step": 2296 |
| }, |
| { |
| "epoch": 1.2662624035281147, |
| "grad_norm": 0.2358439564704895, |
| "learning_rate": 0.00011036484245439469, |
| "loss": 0.3798, |
| "step": 2297 |
| }, |
| { |
| "epoch": 1.2668136714443219, |
| "grad_norm": 0.23679201304912567, |
| "learning_rate": 0.00011028192371475952, |
| "loss": 0.4037, |
| "step": 2298 |
| }, |
| { |
| "epoch": 1.2673649393605293, |
| "grad_norm": 0.23940104246139526, |
| "learning_rate": 0.00011019900497512436, |
| "loss": 0.3898, |
| "step": 2299 |
| }, |
| { |
| "epoch": 1.2679162072767365, |
| "grad_norm": 0.23662586510181427, |
| "learning_rate": 0.00011011608623548921, |
| "loss": 0.4001, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.268467475192944, |
| "grad_norm": 0.23159541189670563, |
| "learning_rate": 0.00011003316749585405, |
| "loss": 0.3919, |
| "step": 2301 |
| }, |
| { |
| "epoch": 1.269018743109151, |
| "grad_norm": 0.21939191222190857, |
| "learning_rate": 0.0001099502487562189, |
| "loss": 0.3902, |
| "step": 2302 |
| }, |
| { |
| "epoch": 1.2695700110253583, |
| "grad_norm": 0.24052447080612183, |
| "learning_rate": 0.00010986733001658373, |
| "loss": 0.391, |
| "step": 2303 |
| }, |
| { |
| "epoch": 1.2701212789415657, |
| "grad_norm": 0.22359569370746613, |
| "learning_rate": 0.00010978441127694858, |
| "loss": 0.3813, |
| "step": 2304 |
| }, |
| { |
| "epoch": 1.2706725468577729, |
| "grad_norm": 0.22367626428604126, |
| "learning_rate": 0.00010970149253731342, |
| "loss": 0.3873, |
| "step": 2305 |
| }, |
| { |
| "epoch": 1.2712238147739803, |
| "grad_norm": 0.24156810343265533, |
| "learning_rate": 0.00010961857379767827, |
| "loss": 0.3996, |
| "step": 2306 |
| }, |
| { |
| "epoch": 1.2717750826901875, |
| "grad_norm": 0.23700320720672607, |
| "learning_rate": 0.00010953565505804311, |
| "loss": 0.3901, |
| "step": 2307 |
| }, |
| { |
| "epoch": 1.2723263506063947, |
| "grad_norm": 0.2303237020969391, |
| "learning_rate": 0.00010945273631840795, |
| "loss": 0.4031, |
| "step": 2308 |
| }, |
| { |
| "epoch": 1.272877618522602, |
| "grad_norm": 0.2249428927898407, |
| "learning_rate": 0.00010936981757877279, |
| "loss": 0.3942, |
| "step": 2309 |
| }, |
| { |
| "epoch": 1.2734288864388092, |
| "grad_norm": 0.2448328137397766, |
| "learning_rate": 0.00010928689883913764, |
| "loss": 0.3941, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.2739801543550167, |
| "grad_norm": 0.23278410732746124, |
| "learning_rate": 0.00010920398009950248, |
| "loss": 0.395, |
| "step": 2311 |
| }, |
| { |
| "epoch": 1.2745314222712238, |
| "grad_norm": 0.24542638659477234, |
| "learning_rate": 0.00010912106135986733, |
| "loss": 0.4278, |
| "step": 2312 |
| }, |
| { |
| "epoch": 1.275082690187431, |
| "grad_norm": 0.22305360436439514, |
| "learning_rate": 0.00010903814262023216, |
| "loss": 0.3932, |
| "step": 2313 |
| }, |
| { |
| "epoch": 1.2756339581036384, |
| "grad_norm": 0.24365827441215515, |
| "learning_rate": 0.00010895522388059701, |
| "loss": 0.3963, |
| "step": 2314 |
| }, |
| { |
| "epoch": 1.2761852260198456, |
| "grad_norm": 0.24421466886997223, |
| "learning_rate": 0.00010887230514096185, |
| "loss": 0.3956, |
| "step": 2315 |
| }, |
| { |
| "epoch": 1.276736493936053, |
| "grad_norm": 0.24353346228599548, |
| "learning_rate": 0.0001087893864013267, |
| "loss": 0.3837, |
| "step": 2316 |
| }, |
| { |
| "epoch": 1.2772877618522602, |
| "grad_norm": 0.24044160544872284, |
| "learning_rate": 0.00010870646766169154, |
| "loss": 0.3964, |
| "step": 2317 |
| }, |
| { |
| "epoch": 1.2778390297684674, |
| "grad_norm": 0.2651362717151642, |
| "learning_rate": 0.00010862354892205638, |
| "loss": 0.388, |
| "step": 2318 |
| }, |
| { |
| "epoch": 1.2783902976846748, |
| "grad_norm": 0.23700033128261566, |
| "learning_rate": 0.00010854063018242122, |
| "loss": 0.38, |
| "step": 2319 |
| }, |
| { |
| "epoch": 1.278941565600882, |
| "grad_norm": 0.23535655438899994, |
| "learning_rate": 0.00010845771144278607, |
| "loss": 0.3934, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.2794928335170894, |
| "grad_norm": 0.26524481177330017, |
| "learning_rate": 0.00010837479270315091, |
| "loss": 0.3875, |
| "step": 2321 |
| }, |
| { |
| "epoch": 1.2800441014332966, |
| "grad_norm": 0.24175146222114563, |
| "learning_rate": 0.00010829187396351576, |
| "loss": 0.3634, |
| "step": 2322 |
| }, |
| { |
| "epoch": 1.2805953693495038, |
| "grad_norm": 0.231819286942482, |
| "learning_rate": 0.00010820895522388059, |
| "loss": 0.388, |
| "step": 2323 |
| }, |
| { |
| "epoch": 1.2811466372657112, |
| "grad_norm": 0.21814289689064026, |
| "learning_rate": 0.00010812603648424544, |
| "loss": 0.3711, |
| "step": 2324 |
| }, |
| { |
| "epoch": 1.2816979051819184, |
| "grad_norm": 0.23096728324890137, |
| "learning_rate": 0.00010804311774461028, |
| "loss": 0.3974, |
| "step": 2325 |
| }, |
| { |
| "epoch": 1.2822491730981258, |
| "grad_norm": 0.24553930759429932, |
| "learning_rate": 0.00010796019900497513, |
| "loss": 0.3897, |
| "step": 2326 |
| }, |
| { |
| "epoch": 1.282800441014333, |
| "grad_norm": 0.23141168057918549, |
| "learning_rate": 0.00010787728026533995, |
| "loss": 0.3898, |
| "step": 2327 |
| }, |
| { |
| "epoch": 1.2833517089305402, |
| "grad_norm": 0.23394468426704407, |
| "learning_rate": 0.00010779436152570479, |
| "loss": 0.4049, |
| "step": 2328 |
| }, |
| { |
| "epoch": 1.2839029768467476, |
| "grad_norm": 0.2231445461511612, |
| "learning_rate": 0.00010771144278606964, |
| "loss": 0.3911, |
| "step": 2329 |
| }, |
| { |
| "epoch": 1.2844542447629548, |
| "grad_norm": 0.2506980299949646, |
| "learning_rate": 0.00010762852404643448, |
| "loss": 0.423, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.2850055126791622, |
| "grad_norm": 0.23698961734771729, |
| "learning_rate": 0.00010754560530679931, |
| "loss": 0.4046, |
| "step": 2331 |
| }, |
| { |
| "epoch": 1.2855567805953694, |
| "grad_norm": 0.24735629558563232, |
| "learning_rate": 0.00010746268656716416, |
| "loss": 0.4078, |
| "step": 2332 |
| }, |
| { |
| "epoch": 1.2861080485115766, |
| "grad_norm": 0.25394487380981445, |
| "learning_rate": 0.000107379767827529, |
| "loss": 0.4027, |
| "step": 2333 |
| }, |
| { |
| "epoch": 1.286659316427784, |
| "grad_norm": 0.24036946892738342, |
| "learning_rate": 0.00010729684908789385, |
| "loss": 0.4042, |
| "step": 2334 |
| }, |
| { |
| "epoch": 1.2872105843439912, |
| "grad_norm": 0.24319007992744446, |
| "learning_rate": 0.0001072139303482587, |
| "loss": 0.3901, |
| "step": 2335 |
| }, |
| { |
| "epoch": 1.2877618522601986, |
| "grad_norm": 0.23505842685699463, |
| "learning_rate": 0.00010713101160862353, |
| "loss": 0.3914, |
| "step": 2336 |
| }, |
| { |
| "epoch": 1.2883131201764058, |
| "grad_norm": 0.24473319947719574, |
| "learning_rate": 0.00010704809286898837, |
| "loss": 0.4098, |
| "step": 2337 |
| }, |
| { |
| "epoch": 1.288864388092613, |
| "grad_norm": 0.24411208927631378, |
| "learning_rate": 0.00010696517412935322, |
| "loss": 0.4158, |
| "step": 2338 |
| }, |
| { |
| "epoch": 1.2894156560088204, |
| "grad_norm": 0.2365306317806244, |
| "learning_rate": 0.00010688225538971807, |
| "loss": 0.3955, |
| "step": 2339 |
| }, |
| { |
| "epoch": 1.2899669239250275, |
| "grad_norm": 0.23471403121948242, |
| "learning_rate": 0.00010679933665008291, |
| "loss": 0.3796, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.290518191841235, |
| "grad_norm": 0.22727487981319427, |
| "learning_rate": 0.00010671641791044774, |
| "loss": 0.4044, |
| "step": 2341 |
| }, |
| { |
| "epoch": 1.2910694597574421, |
| "grad_norm": 0.22571586072444916, |
| "learning_rate": 0.00010663349917081259, |
| "loss": 0.3551, |
| "step": 2342 |
| }, |
| { |
| "epoch": 1.2916207276736493, |
| "grad_norm": 0.24545998871326447, |
| "learning_rate": 0.00010655058043117743, |
| "loss": 0.4144, |
| "step": 2343 |
| }, |
| { |
| "epoch": 1.2921719955898567, |
| "grad_norm": 0.2357962727546692, |
| "learning_rate": 0.00010646766169154228, |
| "loss": 0.391, |
| "step": 2344 |
| }, |
| { |
| "epoch": 1.292723263506064, |
| "grad_norm": 0.23277200758457184, |
| "learning_rate": 0.00010638474295190713, |
| "loss": 0.4027, |
| "step": 2345 |
| }, |
| { |
| "epoch": 1.2932745314222713, |
| "grad_norm": 0.2385130524635315, |
| "learning_rate": 0.00010630182421227196, |
| "loss": 0.4039, |
| "step": 2346 |
| }, |
| { |
| "epoch": 1.2938257993384785, |
| "grad_norm": 0.21902373433113098, |
| "learning_rate": 0.0001062189054726368, |
| "loss": 0.3699, |
| "step": 2347 |
| }, |
| { |
| "epoch": 1.2943770672546857, |
| "grad_norm": 0.23025818169116974, |
| "learning_rate": 0.00010613598673300165, |
| "loss": 0.3822, |
| "step": 2348 |
| }, |
| { |
| "epoch": 1.2949283351708931, |
| "grad_norm": 0.2286684513092041, |
| "learning_rate": 0.0001060530679933665, |
| "loss": 0.401, |
| "step": 2349 |
| }, |
| { |
| "epoch": 1.2954796030871003, |
| "grad_norm": 0.23381029069423676, |
| "learning_rate": 0.00010597014925373134, |
| "loss": 0.3991, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.2960308710033077, |
| "grad_norm": 0.23572219908237457, |
| "learning_rate": 0.00010588723051409617, |
| "loss": 0.3993, |
| "step": 2351 |
| }, |
| { |
| "epoch": 1.296582138919515, |
| "grad_norm": 0.22969138622283936, |
| "learning_rate": 0.00010580431177446102, |
| "loss": 0.3859, |
| "step": 2352 |
| }, |
| { |
| "epoch": 1.297133406835722, |
| "grad_norm": 0.24054940044879913, |
| "learning_rate": 0.00010572139303482586, |
| "loss": 0.4137, |
| "step": 2353 |
| }, |
| { |
| "epoch": 1.2976846747519295, |
| "grad_norm": 0.235767662525177, |
| "learning_rate": 0.00010563847429519071, |
| "loss": 0.377, |
| "step": 2354 |
| }, |
| { |
| "epoch": 1.2982359426681367, |
| "grad_norm": 0.22807767987251282, |
| "learning_rate": 0.00010555555555555555, |
| "loss": 0.3974, |
| "step": 2355 |
| }, |
| { |
| "epoch": 1.298787210584344, |
| "grad_norm": 0.22131551802158356, |
| "learning_rate": 0.00010547263681592039, |
| "loss": 0.4002, |
| "step": 2356 |
| }, |
| { |
| "epoch": 1.2993384785005513, |
| "grad_norm": 0.24462686479091644, |
| "learning_rate": 0.00010538971807628523, |
| "loss": 0.4169, |
| "step": 2357 |
| }, |
| { |
| "epoch": 1.2998897464167585, |
| "grad_norm": 0.24126161634922028, |
| "learning_rate": 0.00010530679933665008, |
| "loss": 0.3846, |
| "step": 2358 |
| }, |
| { |
| "epoch": 1.3004410143329659, |
| "grad_norm": 0.2536928951740265, |
| "learning_rate": 0.00010522388059701492, |
| "loss": 0.3883, |
| "step": 2359 |
| }, |
| { |
| "epoch": 1.300992282249173, |
| "grad_norm": 0.23638053238391876, |
| "learning_rate": 0.00010514096185737977, |
| "loss": 0.3916, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.3015435501653805, |
| "grad_norm": 0.21713566780090332, |
| "learning_rate": 0.0001050580431177446, |
| "loss": 0.382, |
| "step": 2361 |
| }, |
| { |
| "epoch": 1.3020948180815877, |
| "grad_norm": 0.23291055858135223, |
| "learning_rate": 0.00010497512437810945, |
| "loss": 0.3831, |
| "step": 2362 |
| }, |
| { |
| "epoch": 1.3026460859977949, |
| "grad_norm": 0.2169044464826584, |
| "learning_rate": 0.00010489220563847429, |
| "loss": 0.3705, |
| "step": 2363 |
| }, |
| { |
| "epoch": 1.3031973539140023, |
| "grad_norm": 0.23216962814331055, |
| "learning_rate": 0.00010480928689883914, |
| "loss": 0.3691, |
| "step": 2364 |
| }, |
| { |
| "epoch": 1.3037486218302095, |
| "grad_norm": 0.2367962896823883, |
| "learning_rate": 0.00010472636815920398, |
| "loss": 0.4011, |
| "step": 2365 |
| }, |
| { |
| "epoch": 1.3042998897464169, |
| "grad_norm": 0.22988784313201904, |
| "learning_rate": 0.00010464344941956881, |
| "loss": 0.3904, |
| "step": 2366 |
| }, |
| { |
| "epoch": 1.304851157662624, |
| "grad_norm": 0.21731241047382355, |
| "learning_rate": 0.00010456053067993366, |
| "loss": 0.3815, |
| "step": 2367 |
| }, |
| { |
| "epoch": 1.3054024255788312, |
| "grad_norm": 0.25733426213264465, |
| "learning_rate": 0.0001044776119402985, |
| "loss": 0.4253, |
| "step": 2368 |
| }, |
| { |
| "epoch": 1.3059536934950386, |
| "grad_norm": 0.23438294231891632, |
| "learning_rate": 0.00010439469320066335, |
| "loss": 0.4041, |
| "step": 2369 |
| }, |
| { |
| "epoch": 1.3065049614112458, |
| "grad_norm": 0.22011101245880127, |
| "learning_rate": 0.0001043117744610282, |
| "loss": 0.3948, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.3070562293274532, |
| "grad_norm": 0.2404097616672516, |
| "learning_rate": 0.00010422885572139302, |
| "loss": 0.3996, |
| "step": 2371 |
| }, |
| { |
| "epoch": 1.3076074972436604, |
| "grad_norm": 0.23479090631008148, |
| "learning_rate": 0.00010414593698175786, |
| "loss": 0.4048, |
| "step": 2372 |
| }, |
| { |
| "epoch": 1.3081587651598676, |
| "grad_norm": 0.22892162203788757, |
| "learning_rate": 0.0001040630182421227, |
| "loss": 0.3751, |
| "step": 2373 |
| }, |
| { |
| "epoch": 1.308710033076075, |
| "grad_norm": 0.22712910175323486, |
| "learning_rate": 0.00010398009950248755, |
| "loss": 0.3777, |
| "step": 2374 |
| }, |
| { |
| "epoch": 1.3092613009922822, |
| "grad_norm": 0.22894370555877686, |
| "learning_rate": 0.00010389718076285238, |
| "loss": 0.3936, |
| "step": 2375 |
| }, |
| { |
| "epoch": 1.3098125689084896, |
| "grad_norm": 0.24097605049610138, |
| "learning_rate": 0.00010381426202321723, |
| "loss": 0.3693, |
| "step": 2376 |
| }, |
| { |
| "epoch": 1.3103638368246968, |
| "grad_norm": 0.23055890202522278, |
| "learning_rate": 0.00010373134328358208, |
| "loss": 0.3777, |
| "step": 2377 |
| }, |
| { |
| "epoch": 1.310915104740904, |
| "grad_norm": 0.23357531428337097, |
| "learning_rate": 0.00010364842454394692, |
| "loss": 0.3945, |
| "step": 2378 |
| }, |
| { |
| "epoch": 1.3114663726571114, |
| "grad_norm": 0.2378157526254654, |
| "learning_rate": 0.00010356550580431177, |
| "loss": 0.4077, |
| "step": 2379 |
| }, |
| { |
| "epoch": 1.3120176405733186, |
| "grad_norm": 0.2348390370607376, |
| "learning_rate": 0.0001034825870646766, |
| "loss": 0.3905, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.312568908489526, |
| "grad_norm": 0.24251805245876312, |
| "learning_rate": 0.00010339966832504144, |
| "loss": 0.4174, |
| "step": 2381 |
| }, |
| { |
| "epoch": 1.3131201764057332, |
| "grad_norm": 0.23102574050426483, |
| "learning_rate": 0.00010331674958540629, |
| "loss": 0.3856, |
| "step": 2382 |
| }, |
| { |
| "epoch": 1.3136714443219404, |
| "grad_norm": 0.2383720427751541, |
| "learning_rate": 0.00010323383084577114, |
| "loss": 0.3932, |
| "step": 2383 |
| }, |
| { |
| "epoch": 1.3142227122381478, |
| "grad_norm": 0.22161129117012024, |
| "learning_rate": 0.00010315091210613598, |
| "loss": 0.396, |
| "step": 2384 |
| }, |
| { |
| "epoch": 1.314773980154355, |
| "grad_norm": 0.2228018343448639, |
| "learning_rate": 0.00010306799336650081, |
| "loss": 0.3862, |
| "step": 2385 |
| }, |
| { |
| "epoch": 1.3153252480705624, |
| "grad_norm": 0.22873203456401825, |
| "learning_rate": 0.00010298507462686566, |
| "loss": 0.3513, |
| "step": 2386 |
| }, |
| { |
| "epoch": 1.3158765159867696, |
| "grad_norm": 0.23780828714370728, |
| "learning_rate": 0.0001029021558872305, |
| "loss": 0.3888, |
| "step": 2387 |
| }, |
| { |
| "epoch": 1.3164277839029768, |
| "grad_norm": 0.2447124868631363, |
| "learning_rate": 0.00010281923714759535, |
| "loss": 0.4046, |
| "step": 2388 |
| }, |
| { |
| "epoch": 1.3169790518191842, |
| "grad_norm": 0.24726513028144836, |
| "learning_rate": 0.0001027363184079602, |
| "loss": 0.4086, |
| "step": 2389 |
| }, |
| { |
| "epoch": 1.3175303197353914, |
| "grad_norm": 0.2359735518693924, |
| "learning_rate": 0.00010265339966832503, |
| "loss": 0.4015, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.3180815876515988, |
| "grad_norm": 0.23657964169979095, |
| "learning_rate": 0.00010257048092868987, |
| "loss": 0.3859, |
| "step": 2391 |
| }, |
| { |
| "epoch": 1.318632855567806, |
| "grad_norm": 0.23830877244472504, |
| "learning_rate": 0.00010248756218905472, |
| "loss": 0.3864, |
| "step": 2392 |
| }, |
| { |
| "epoch": 1.3191841234840131, |
| "grad_norm": 0.2303212434053421, |
| "learning_rate": 0.00010240464344941956, |
| "loss": 0.4036, |
| "step": 2393 |
| }, |
| { |
| "epoch": 1.3197353914002206, |
| "grad_norm": 0.2221781462430954, |
| "learning_rate": 0.0001023217247097844, |
| "loss": 0.3712, |
| "step": 2394 |
| }, |
| { |
| "epoch": 1.3202866593164277, |
| "grad_norm": 0.22085942327976227, |
| "learning_rate": 0.00010223880597014924, |
| "loss": 0.3708, |
| "step": 2395 |
| }, |
| { |
| "epoch": 1.3208379272326352, |
| "grad_norm": 0.24135445058345795, |
| "learning_rate": 0.00010215588723051409, |
| "loss": 0.3896, |
| "step": 2396 |
| }, |
| { |
| "epoch": 1.3213891951488423, |
| "grad_norm": 0.24116064608097076, |
| "learning_rate": 0.00010207296849087893, |
| "loss": 0.3866, |
| "step": 2397 |
| }, |
| { |
| "epoch": 1.3219404630650495, |
| "grad_norm": 0.26890698075294495, |
| "learning_rate": 0.00010199004975124378, |
| "loss": 0.3795, |
| "step": 2398 |
| }, |
| { |
| "epoch": 1.322491730981257, |
| "grad_norm": 0.2322501391172409, |
| "learning_rate": 0.00010190713101160861, |
| "loss": 0.3837, |
| "step": 2399 |
| }, |
| { |
| "epoch": 1.3230429988974641, |
| "grad_norm": 0.24631264805793762, |
| "learning_rate": 0.00010182421227197346, |
| "loss": 0.3954, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.3235942668136715, |
| "grad_norm": 0.2258647084236145, |
| "learning_rate": 0.0001017412935323383, |
| "loss": 0.3705, |
| "step": 2401 |
| }, |
| { |
| "epoch": 1.3241455347298787, |
| "grad_norm": 0.2519420087337494, |
| "learning_rate": 0.00010165837479270315, |
| "loss": 0.3921, |
| "step": 2402 |
| }, |
| { |
| "epoch": 1.324696802646086, |
| "grad_norm": 0.23400020599365234, |
| "learning_rate": 0.00010157545605306799, |
| "loss": 0.3702, |
| "step": 2403 |
| }, |
| { |
| "epoch": 1.3252480705622933, |
| "grad_norm": 0.22752946615219116, |
| "learning_rate": 0.00010149253731343282, |
| "loss": 0.3756, |
| "step": 2404 |
| }, |
| { |
| "epoch": 1.3257993384785005, |
| "grad_norm": 0.24144931137561798, |
| "learning_rate": 0.00010140961857379767, |
| "loss": 0.41, |
| "step": 2405 |
| }, |
| { |
| "epoch": 1.326350606394708, |
| "grad_norm": 0.24649466574192047, |
| "learning_rate": 0.00010132669983416252, |
| "loss": 0.4227, |
| "step": 2406 |
| }, |
| { |
| "epoch": 1.326901874310915, |
| "grad_norm": 0.22007010877132416, |
| "learning_rate": 0.00010124378109452736, |
| "loss": 0.3802, |
| "step": 2407 |
| }, |
| { |
| "epoch": 1.3274531422271223, |
| "grad_norm": 0.2177124321460724, |
| "learning_rate": 0.00010116086235489221, |
| "loss": 0.3733, |
| "step": 2408 |
| }, |
| { |
| "epoch": 1.3280044101433297, |
| "grad_norm": 0.23224158585071564, |
| "learning_rate": 0.00010107794361525704, |
| "loss": 0.3774, |
| "step": 2409 |
| }, |
| { |
| "epoch": 1.328555678059537, |
| "grad_norm": 0.24728813767433167, |
| "learning_rate": 0.00010099502487562188, |
| "loss": 0.3926, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.3291069459757443, |
| "grad_norm": 0.22190050780773163, |
| "learning_rate": 0.00010091210613598673, |
| "loss": 0.3826, |
| "step": 2411 |
| }, |
| { |
| "epoch": 1.3296582138919515, |
| "grad_norm": 0.23956191539764404, |
| "learning_rate": 0.00010082918739635158, |
| "loss": 0.3982, |
| "step": 2412 |
| }, |
| { |
| "epoch": 1.3302094818081587, |
| "grad_norm": 0.23789376020431519, |
| "learning_rate": 0.00010074626865671642, |
| "loss": 0.4032, |
| "step": 2413 |
| }, |
| { |
| "epoch": 1.330760749724366, |
| "grad_norm": 0.24080632627010345, |
| "learning_rate": 0.00010066334991708125, |
| "loss": 0.3974, |
| "step": 2414 |
| }, |
| { |
| "epoch": 1.3313120176405733, |
| "grad_norm": 0.22118644416332245, |
| "learning_rate": 0.00010058043117744609, |
| "loss": 0.3848, |
| "step": 2415 |
| }, |
| { |
| "epoch": 1.3318632855567807, |
| "grad_norm": 0.24440258741378784, |
| "learning_rate": 0.00010049751243781093, |
| "loss": 0.3801, |
| "step": 2416 |
| }, |
| { |
| "epoch": 1.3324145534729879, |
| "grad_norm": 0.23864087462425232, |
| "learning_rate": 0.00010041459369817578, |
| "loss": 0.4019, |
| "step": 2417 |
| }, |
| { |
| "epoch": 1.332965821389195, |
| "grad_norm": 0.2365901917219162, |
| "learning_rate": 0.00010033167495854061, |
| "loss": 0.3827, |
| "step": 2418 |
| }, |
| { |
| "epoch": 1.3335170893054025, |
| "grad_norm": 0.22480501234531403, |
| "learning_rate": 0.00010024875621890545, |
| "loss": 0.3696, |
| "step": 2419 |
| }, |
| { |
| "epoch": 1.3340683572216097, |
| "grad_norm": 0.23156774044036865, |
| "learning_rate": 0.0001001658374792703, |
| "loss": 0.3803, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.334619625137817, |
| "grad_norm": 0.22590211033821106, |
| "learning_rate": 0.00010008291873963515, |
| "loss": 0.387, |
| "step": 2421 |
| }, |
| { |
| "epoch": 1.3351708930540243, |
| "grad_norm": 0.2270091325044632, |
| "learning_rate": 9.999999999999999e-05, |
| "loss": 0.381, |
| "step": 2422 |
| }, |
| { |
| "epoch": 1.3357221609702314, |
| "grad_norm": 0.22601434588432312, |
| "learning_rate": 9.991708126036482e-05, |
| "loss": 0.3907, |
| "step": 2423 |
| }, |
| { |
| "epoch": 1.3362734288864389, |
| "grad_norm": 0.2249268740415573, |
| "learning_rate": 9.983416252072967e-05, |
| "loss": 0.3794, |
| "step": 2424 |
| }, |
| { |
| "epoch": 1.336824696802646, |
| "grad_norm": 0.2406623363494873, |
| "learning_rate": 9.975124378109451e-05, |
| "loss": 0.3912, |
| "step": 2425 |
| }, |
| { |
| "epoch": 1.3373759647188534, |
| "grad_norm": 0.24089276790618896, |
| "learning_rate": 9.966832504145936e-05, |
| "loss": 0.3997, |
| "step": 2426 |
| }, |
| { |
| "epoch": 1.3379272326350606, |
| "grad_norm": 0.2207108587026596, |
| "learning_rate": 9.95854063018242e-05, |
| "loss": 0.3804, |
| "step": 2427 |
| }, |
| { |
| "epoch": 1.3384785005512678, |
| "grad_norm": 0.21747317910194397, |
| "learning_rate": 9.950248756218904e-05, |
| "loss": 0.3808, |
| "step": 2428 |
| }, |
| { |
| "epoch": 1.3390297684674752, |
| "grad_norm": 0.2578473687171936, |
| "learning_rate": 9.941956882255388e-05, |
| "loss": 0.4195, |
| "step": 2429 |
| }, |
| { |
| "epoch": 1.3395810363836824, |
| "grad_norm": 0.22663085162639618, |
| "learning_rate": 9.933665008291873e-05, |
| "loss": 0.3877, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.3401323042998898, |
| "grad_norm": 0.24075528979301453, |
| "learning_rate": 9.925373134328357e-05, |
| "loss": 0.405, |
| "step": 2431 |
| }, |
| { |
| "epoch": 1.340683572216097, |
| "grad_norm": 0.22877177596092224, |
| "learning_rate": 9.917081260364842e-05, |
| "loss": 0.382, |
| "step": 2432 |
| }, |
| { |
| "epoch": 1.3412348401323042, |
| "grad_norm": 0.22892452776432037, |
| "learning_rate": 9.908789386401325e-05, |
| "loss": 0.3812, |
| "step": 2433 |
| }, |
| { |
| "epoch": 1.3417861080485116, |
| "grad_norm": 0.24187688529491425, |
| "learning_rate": 9.90049751243781e-05, |
| "loss": 0.3825, |
| "step": 2434 |
| }, |
| { |
| "epoch": 1.3423373759647188, |
| "grad_norm": 0.22903688251972198, |
| "learning_rate": 9.892205638474294e-05, |
| "loss": 0.3878, |
| "step": 2435 |
| }, |
| { |
| "epoch": 1.3428886438809262, |
| "grad_norm": 0.22924572229385376, |
| "learning_rate": 9.883913764510779e-05, |
| "loss": 0.388, |
| "step": 2436 |
| }, |
| { |
| "epoch": 1.3434399117971334, |
| "grad_norm": 0.24021534621715546, |
| "learning_rate": 9.875621890547263e-05, |
| "loss": 0.4031, |
| "step": 2437 |
| }, |
| { |
| "epoch": 1.3439911797133406, |
| "grad_norm": 0.23757272958755493, |
| "learning_rate": 9.867330016583747e-05, |
| "loss": 0.3934, |
| "step": 2438 |
| }, |
| { |
| "epoch": 1.344542447629548, |
| "grad_norm": 0.2555783987045288, |
| "learning_rate": 9.859038142620231e-05, |
| "loss": 0.3988, |
| "step": 2439 |
| }, |
| { |
| "epoch": 1.3450937155457552, |
| "grad_norm": 0.23108243942260742, |
| "learning_rate": 9.850746268656716e-05, |
| "loss": 0.379, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.3456449834619626, |
| "grad_norm": 0.24363455176353455, |
| "learning_rate": 9.8424543946932e-05, |
| "loss": 0.3939, |
| "step": 2441 |
| }, |
| { |
| "epoch": 1.3461962513781698, |
| "grad_norm": 0.2295197993516922, |
| "learning_rate": 9.834162520729685e-05, |
| "loss": 0.3799, |
| "step": 2442 |
| }, |
| { |
| "epoch": 1.346747519294377, |
| "grad_norm": 0.23563653230667114, |
| "learning_rate": 9.825870646766168e-05, |
| "loss": 0.3755, |
| "step": 2443 |
| }, |
| { |
| "epoch": 1.3472987872105844, |
| "grad_norm": 0.2241990715265274, |
| "learning_rate": 9.817578772802653e-05, |
| "loss": 0.3794, |
| "step": 2444 |
| }, |
| { |
| "epoch": 1.3478500551267916, |
| "grad_norm": 0.2593122124671936, |
| "learning_rate": 9.809286898839137e-05, |
| "loss": 0.3766, |
| "step": 2445 |
| }, |
| { |
| "epoch": 1.348401323042999, |
| "grad_norm": 0.22955520451068878, |
| "learning_rate": 9.800995024875622e-05, |
| "loss": 0.3787, |
| "step": 2446 |
| }, |
| { |
| "epoch": 1.3489525909592062, |
| "grad_norm": 0.23866330087184906, |
| "learning_rate": 9.792703150912106e-05, |
| "loss": 0.3955, |
| "step": 2447 |
| }, |
| { |
| "epoch": 1.3495038588754134, |
| "grad_norm": 0.24115972220897675, |
| "learning_rate": 9.78441127694859e-05, |
| "loss": 0.3811, |
| "step": 2448 |
| }, |
| { |
| "epoch": 1.3500551267916208, |
| "grad_norm": 0.23597833514213562, |
| "learning_rate": 9.776119402985074e-05, |
| "loss": 0.3831, |
| "step": 2449 |
| }, |
| { |
| "epoch": 1.350606394707828, |
| "grad_norm": 0.2415011078119278, |
| "learning_rate": 9.767827529021559e-05, |
| "loss": 0.3896, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.3511576626240354, |
| "grad_norm": 0.2416457235813141, |
| "learning_rate": 9.759535655058043e-05, |
| "loss": 0.3888, |
| "step": 2451 |
| }, |
| { |
| "epoch": 1.3517089305402425, |
| "grad_norm": 0.23950545489788055, |
| "learning_rate": 9.751243781094528e-05, |
| "loss": 0.3942, |
| "step": 2452 |
| }, |
| { |
| "epoch": 1.3522601984564497, |
| "grad_norm": 0.24059046804904938, |
| "learning_rate": 9.742951907131011e-05, |
| "loss": 0.4005, |
| "step": 2453 |
| }, |
| { |
| "epoch": 1.3528114663726571, |
| "grad_norm": 0.2414311021566391, |
| "learning_rate": 9.734660033167496e-05, |
| "loss": 0.3795, |
| "step": 2454 |
| }, |
| { |
| "epoch": 1.3533627342888643, |
| "grad_norm": 0.23370300233364105, |
| "learning_rate": 9.72636815920398e-05, |
| "loss": 0.3728, |
| "step": 2455 |
| }, |
| { |
| "epoch": 1.3539140022050717, |
| "grad_norm": 0.23373939096927643, |
| "learning_rate": 9.718076285240465e-05, |
| "loss": 0.3925, |
| "step": 2456 |
| }, |
| { |
| "epoch": 1.354465270121279, |
| "grad_norm": 0.22576579451560974, |
| "learning_rate": 9.709784411276948e-05, |
| "loss": 0.3787, |
| "step": 2457 |
| }, |
| { |
| "epoch": 1.3550165380374861, |
| "grad_norm": 0.22904476523399353, |
| "learning_rate": 9.701492537313432e-05, |
| "loss": 0.3939, |
| "step": 2458 |
| }, |
| { |
| "epoch": 1.3555678059536935, |
| "grad_norm": 0.24833030998706818, |
| "learning_rate": 9.693200663349916e-05, |
| "loss": 0.394, |
| "step": 2459 |
| }, |
| { |
| "epoch": 1.3561190738699007, |
| "grad_norm": 0.22664152085781097, |
| "learning_rate": 9.6849087893864e-05, |
| "loss": 0.363, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.3566703417861081, |
| "grad_norm": 0.23569191992282867, |
| "learning_rate": 9.676616915422883e-05, |
| "loss": 0.3823, |
| "step": 2461 |
| }, |
| { |
| "epoch": 1.3572216097023153, |
| "grad_norm": 0.23659692704677582, |
| "learning_rate": 9.668325041459368e-05, |
| "loss": 0.3879, |
| "step": 2462 |
| }, |
| { |
| "epoch": 1.3577728776185225, |
| "grad_norm": 0.22711534798145294, |
| "learning_rate": 9.660033167495852e-05, |
| "loss": 0.3761, |
| "step": 2463 |
| }, |
| { |
| "epoch": 1.35832414553473, |
| "grad_norm": 0.23172332346439362, |
| "learning_rate": 9.651741293532337e-05, |
| "loss": 0.3774, |
| "step": 2464 |
| }, |
| { |
| "epoch": 1.358875413450937, |
| "grad_norm": 0.23141370713710785, |
| "learning_rate": 9.643449419568822e-05, |
| "loss": 0.3976, |
| "step": 2465 |
| }, |
| { |
| "epoch": 1.3594266813671445, |
| "grad_norm": 0.24368800222873688, |
| "learning_rate": 9.635157545605305e-05, |
| "loss": 0.3843, |
| "step": 2466 |
| }, |
| { |
| "epoch": 1.3599779492833517, |
| "grad_norm": 0.22588768601417542, |
| "learning_rate": 9.62686567164179e-05, |
| "loss": 0.3798, |
| "step": 2467 |
| }, |
| { |
| "epoch": 1.3605292171995589, |
| "grad_norm": 0.2269313633441925, |
| "learning_rate": 9.618573797678274e-05, |
| "loss": 0.3874, |
| "step": 2468 |
| }, |
| { |
| "epoch": 1.3610804851157663, |
| "grad_norm": 0.23487702012062073, |
| "learning_rate": 9.610281923714758e-05, |
| "loss": 0.3888, |
| "step": 2469 |
| }, |
| { |
| "epoch": 1.3616317530319735, |
| "grad_norm": 0.2513071894645691, |
| "learning_rate": 9.601990049751243e-05, |
| "loss": 0.4122, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.362183020948181, |
| "grad_norm": 0.21708211302757263, |
| "learning_rate": 9.593698175787726e-05, |
| "loss": 0.3597, |
| "step": 2471 |
| }, |
| { |
| "epoch": 1.362734288864388, |
| "grad_norm": 0.2279457300901413, |
| "learning_rate": 9.585406301824211e-05, |
| "loss": 0.3834, |
| "step": 2472 |
| }, |
| { |
| "epoch": 1.3632855567805953, |
| "grad_norm": 0.22766946256160736, |
| "learning_rate": 9.577114427860695e-05, |
| "loss": 0.3682, |
| "step": 2473 |
| }, |
| { |
| "epoch": 1.3638368246968027, |
| "grad_norm": 0.22673630714416504, |
| "learning_rate": 9.56882255389718e-05, |
| "loss": 0.3823, |
| "step": 2474 |
| }, |
| { |
| "epoch": 1.3643880926130099, |
| "grad_norm": 0.23767007887363434, |
| "learning_rate": 9.560530679933664e-05, |
| "loss": 0.3991, |
| "step": 2475 |
| }, |
| { |
| "epoch": 1.3649393605292173, |
| "grad_norm": 0.2326952964067459, |
| "learning_rate": 9.552238805970148e-05, |
| "loss": 0.39, |
| "step": 2476 |
| }, |
| { |
| "epoch": 1.3654906284454245, |
| "grad_norm": 0.2336025983095169, |
| "learning_rate": 9.543946932006632e-05, |
| "loss": 0.3748, |
| "step": 2477 |
| }, |
| { |
| "epoch": 1.3660418963616316, |
| "grad_norm": 0.23857955634593964, |
| "learning_rate": 9.535655058043117e-05, |
| "loss": 0.4077, |
| "step": 2478 |
| }, |
| { |
| "epoch": 1.366593164277839, |
| "grad_norm": 0.22810246050357819, |
| "learning_rate": 9.527363184079601e-05, |
| "loss": 0.406, |
| "step": 2479 |
| }, |
| { |
| "epoch": 1.3671444321940462, |
| "grad_norm": 0.23381425440311432, |
| "learning_rate": 9.519071310116086e-05, |
| "loss": 0.395, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.3676957001102537, |
| "grad_norm": 0.21443428099155426, |
| "learning_rate": 9.510779436152569e-05, |
| "loss": 0.3772, |
| "step": 2481 |
| }, |
| { |
| "epoch": 1.3682469680264608, |
| "grad_norm": 0.23185119032859802, |
| "learning_rate": 9.502487562189054e-05, |
| "loss": 0.3892, |
| "step": 2482 |
| }, |
| { |
| "epoch": 1.368798235942668, |
| "grad_norm": 0.2298753708600998, |
| "learning_rate": 9.494195688225538e-05, |
| "loss": 0.3891, |
| "step": 2483 |
| }, |
| { |
| "epoch": 1.3693495038588754, |
| "grad_norm": 0.216232031583786, |
| "learning_rate": 9.485903814262023e-05, |
| "loss": 0.382, |
| "step": 2484 |
| }, |
| { |
| "epoch": 1.3699007717750826, |
| "grad_norm": 0.23376402258872986, |
| "learning_rate": 9.477611940298507e-05, |
| "loss": 0.3992, |
| "step": 2485 |
| }, |
| { |
| "epoch": 1.37045203969129, |
| "grad_norm": 0.2535459101200104, |
| "learning_rate": 9.46932006633499e-05, |
| "loss": 0.3957, |
| "step": 2486 |
| }, |
| { |
| "epoch": 1.3710033076074972, |
| "grad_norm": 0.22214862704277039, |
| "learning_rate": 9.461028192371475e-05, |
| "loss": 0.3713, |
| "step": 2487 |
| }, |
| { |
| "epoch": 1.3715545755237044, |
| "grad_norm": 0.23064962029457092, |
| "learning_rate": 9.45273631840796e-05, |
| "loss": 0.3821, |
| "step": 2488 |
| }, |
| { |
| "epoch": 1.3721058434399118, |
| "grad_norm": 0.249479740858078, |
| "learning_rate": 9.444444444444444e-05, |
| "loss": 0.3837, |
| "step": 2489 |
| }, |
| { |
| "epoch": 1.372657111356119, |
| "grad_norm": 0.22704121470451355, |
| "learning_rate": 9.436152570480929e-05, |
| "loss": 0.3931, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.3732083792723264, |
| "grad_norm": 0.23015405237674713, |
| "learning_rate": 9.427860696517412e-05, |
| "loss": 0.4049, |
| "step": 2491 |
| }, |
| { |
| "epoch": 1.3737596471885336, |
| "grad_norm": 0.23387496173381805, |
| "learning_rate": 9.419568822553897e-05, |
| "loss": 0.3727, |
| "step": 2492 |
| }, |
| { |
| "epoch": 1.3743109151047408, |
| "grad_norm": 0.21825988590717316, |
| "learning_rate": 9.411276948590381e-05, |
| "loss": 0.382, |
| "step": 2493 |
| }, |
| { |
| "epoch": 1.3748621830209482, |
| "grad_norm": 0.2230725735425949, |
| "learning_rate": 9.402985074626866e-05, |
| "loss": 0.3935, |
| "step": 2494 |
| }, |
| { |
| "epoch": 1.3754134509371554, |
| "grad_norm": 0.22703075408935547, |
| "learning_rate": 9.39469320066335e-05, |
| "loss": 0.3848, |
| "step": 2495 |
| }, |
| { |
| "epoch": 1.3759647188533628, |
| "grad_norm": 0.2219892293214798, |
| "learning_rate": 9.386401326699833e-05, |
| "loss": 0.3898, |
| "step": 2496 |
| }, |
| { |
| "epoch": 1.37651598676957, |
| "grad_norm": 0.23172403872013092, |
| "learning_rate": 9.378109452736318e-05, |
| "loss": 0.3861, |
| "step": 2497 |
| }, |
| { |
| "epoch": 1.3770672546857772, |
| "grad_norm": 0.23237434029579163, |
| "learning_rate": 9.369817578772803e-05, |
| "loss": 0.3705, |
| "step": 2498 |
| }, |
| { |
| "epoch": 1.3776185226019846, |
| "grad_norm": 0.2246798872947693, |
| "learning_rate": 9.361525704809287e-05, |
| "loss": 0.3679, |
| "step": 2499 |
| }, |
| { |
| "epoch": 1.3781697905181918, |
| "grad_norm": 0.2427067756652832, |
| "learning_rate": 9.353233830845772e-05, |
| "loss": 0.4212, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.3781697905181918, |
| "eval_loss": 0.4513299763202667, |
| "eval_runtime": 311.7925, |
| "eval_samples_per_second": 3.736, |
| "eval_steps_per_second": 0.468, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.3787210584343992, |
| "grad_norm": 0.2319420874118805, |
| "learning_rate": 9.344941956882255e-05, |
| "loss": 0.3887, |
| "step": 2501 |
| }, |
| { |
| "epoch": 1.3792723263506064, |
| "grad_norm": 0.23304283618927002, |
| "learning_rate": 9.33665008291874e-05, |
| "loss": 0.396, |
| "step": 2502 |
| }, |
| { |
| "epoch": 1.3798235942668136, |
| "grad_norm": 0.2571066617965698, |
| "learning_rate": 9.328358208955223e-05, |
| "loss": 0.3862, |
| "step": 2503 |
| }, |
| { |
| "epoch": 1.380374862183021, |
| "grad_norm": 0.22332634031772614, |
| "learning_rate": 9.320066334991707e-05, |
| "loss": 0.3608, |
| "step": 2504 |
| }, |
| { |
| "epoch": 1.3809261300992282, |
| "grad_norm": 0.2485717236995697, |
| "learning_rate": 9.31177446102819e-05, |
| "loss": 0.4238, |
| "step": 2505 |
| }, |
| { |
| "epoch": 1.3814773980154356, |
| "grad_norm": 0.230104461312294, |
| "learning_rate": 9.303482587064675e-05, |
| "loss": 0.4036, |
| "step": 2506 |
| }, |
| { |
| "epoch": 1.3820286659316428, |
| "grad_norm": 0.2558598816394806, |
| "learning_rate": 9.29519071310116e-05, |
| "loss": 0.3958, |
| "step": 2507 |
| }, |
| { |
| "epoch": 1.38257993384785, |
| "grad_norm": 0.23400071263313293, |
| "learning_rate": 9.286898839137644e-05, |
| "loss": 0.3862, |
| "step": 2508 |
| }, |
| { |
| "epoch": 1.3831312017640573, |
| "grad_norm": 0.23237945139408112, |
| "learning_rate": 9.278606965174129e-05, |
| "loss": 0.3753, |
| "step": 2509 |
| }, |
| { |
| "epoch": 1.3836824696802645, |
| "grad_norm": 0.2357659935951233, |
| "learning_rate": 9.270315091210612e-05, |
| "loss": 0.3826, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.384233737596472, |
| "grad_norm": 0.2599101960659027, |
| "learning_rate": 9.262023217247096e-05, |
| "loss": 0.4028, |
| "step": 2511 |
| }, |
| { |
| "epoch": 1.3847850055126791, |
| "grad_norm": 0.2372962385416031, |
| "learning_rate": 9.253731343283581e-05, |
| "loss": 0.4181, |
| "step": 2512 |
| }, |
| { |
| "epoch": 1.3853362734288863, |
| "grad_norm": 0.27277928590774536, |
| "learning_rate": 9.245439469320065e-05, |
| "loss": 0.4025, |
| "step": 2513 |
| }, |
| { |
| "epoch": 1.3858875413450937, |
| "grad_norm": 0.22424361109733582, |
| "learning_rate": 9.237147595356549e-05, |
| "loss": 0.3735, |
| "step": 2514 |
| }, |
| { |
| "epoch": 1.386438809261301, |
| "grad_norm": 0.2312849462032318, |
| "learning_rate": 9.228855721393033e-05, |
| "loss": 0.4009, |
| "step": 2515 |
| }, |
| { |
| "epoch": 1.3869900771775083, |
| "grad_norm": 0.24405118823051453, |
| "learning_rate": 9.220563847429518e-05, |
| "loss": 0.4026, |
| "step": 2516 |
| }, |
| { |
| "epoch": 1.3875413450937155, |
| "grad_norm": 0.25049299001693726, |
| "learning_rate": 9.212271973466002e-05, |
| "loss": 0.3878, |
| "step": 2517 |
| }, |
| { |
| "epoch": 1.3880926130099227, |
| "grad_norm": 0.23999334871768951, |
| "learning_rate": 9.203980099502487e-05, |
| "loss": 0.3758, |
| "step": 2518 |
| }, |
| { |
| "epoch": 1.3886438809261301, |
| "grad_norm": 0.23169536888599396, |
| "learning_rate": 9.19568822553897e-05, |
| "loss": 0.3758, |
| "step": 2519 |
| }, |
| { |
| "epoch": 1.3891951488423373, |
| "grad_norm": 0.228010356426239, |
| "learning_rate": 9.187396351575455e-05, |
| "loss": 0.3731, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.3897464167585447, |
| "grad_norm": 0.2497485876083374, |
| "learning_rate": 9.179104477611939e-05, |
| "loss": 0.3995, |
| "step": 2521 |
| }, |
| { |
| "epoch": 1.390297684674752, |
| "grad_norm": 0.257614403963089, |
| "learning_rate": 9.170812603648424e-05, |
| "loss": 0.3873, |
| "step": 2522 |
| }, |
| { |
| "epoch": 1.390848952590959, |
| "grad_norm": 0.22421546280384064, |
| "learning_rate": 9.162520729684908e-05, |
| "loss": 0.3746, |
| "step": 2523 |
| }, |
| { |
| "epoch": 1.3914002205071665, |
| "grad_norm": 0.22990712523460388, |
| "learning_rate": 9.154228855721392e-05, |
| "loss": 0.3916, |
| "step": 2524 |
| }, |
| { |
| "epoch": 1.3919514884233737, |
| "grad_norm": 0.24670518934726715, |
| "learning_rate": 9.145936981757876e-05, |
| "loss": 0.3983, |
| "step": 2525 |
| }, |
| { |
| "epoch": 1.392502756339581, |
| "grad_norm": 0.23636974394321442, |
| "learning_rate": 9.137645107794361e-05, |
| "loss": 0.3776, |
| "step": 2526 |
| }, |
| { |
| "epoch": 1.3930540242557883, |
| "grad_norm": 0.2319977879524231, |
| "learning_rate": 9.129353233830845e-05, |
| "loss": 0.3809, |
| "step": 2527 |
| }, |
| { |
| "epoch": 1.3936052921719955, |
| "grad_norm": 0.22971488535404205, |
| "learning_rate": 9.12106135986733e-05, |
| "loss": 0.3643, |
| "step": 2528 |
| }, |
| { |
| "epoch": 1.3941565600882029, |
| "grad_norm": 0.24024169147014618, |
| "learning_rate": 9.112769485903813e-05, |
| "loss": 0.3915, |
| "step": 2529 |
| }, |
| { |
| "epoch": 1.39470782800441, |
| "grad_norm": 0.22295120358467102, |
| "learning_rate": 9.104477611940298e-05, |
| "loss": 0.3702, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.3952590959206175, |
| "grad_norm": 0.23186278343200684, |
| "learning_rate": 9.096185737976782e-05, |
| "loss": 0.3733, |
| "step": 2531 |
| }, |
| { |
| "epoch": 1.3958103638368247, |
| "grad_norm": 0.25662240386009216, |
| "learning_rate": 9.087893864013267e-05, |
| "loss": 0.3843, |
| "step": 2532 |
| }, |
| { |
| "epoch": 1.3963616317530319, |
| "grad_norm": 0.24374930560588837, |
| "learning_rate": 9.079601990049751e-05, |
| "loss": 0.4025, |
| "step": 2533 |
| }, |
| { |
| "epoch": 1.3969128996692393, |
| "grad_norm": 0.22312727570533752, |
| "learning_rate": 9.071310116086234e-05, |
| "loss": 0.3794, |
| "step": 2534 |
| }, |
| { |
| "epoch": 1.3974641675854464, |
| "grad_norm": 0.21616993844509125, |
| "learning_rate": 9.063018242122719e-05, |
| "loss": 0.3771, |
| "step": 2535 |
| }, |
| { |
| "epoch": 1.3980154355016539, |
| "grad_norm": 0.24162566661834717, |
| "learning_rate": 9.054726368159204e-05, |
| "loss": 0.3797, |
| "step": 2536 |
| }, |
| { |
| "epoch": 1.398566703417861, |
| "grad_norm": 0.24157093465328217, |
| "learning_rate": 9.046434494195688e-05, |
| "loss": 0.3815, |
| "step": 2537 |
| }, |
| { |
| "epoch": 1.3991179713340682, |
| "grad_norm": 0.2437802404165268, |
| "learning_rate": 9.038142620232173e-05, |
| "loss": 0.3944, |
| "step": 2538 |
| }, |
| { |
| "epoch": 1.3996692392502756, |
| "grad_norm": 0.24138353765010834, |
| "learning_rate": 9.029850746268656e-05, |
| "loss": 0.392, |
| "step": 2539 |
| }, |
| { |
| "epoch": 1.4002205071664828, |
| "grad_norm": 0.25548362731933594, |
| "learning_rate": 9.02155887230514e-05, |
| "loss": 0.408, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.4007717750826902, |
| "grad_norm": 0.24517594277858734, |
| "learning_rate": 9.013266998341625e-05, |
| "loss": 0.3979, |
| "step": 2541 |
| }, |
| { |
| "epoch": 1.4013230429988974, |
| "grad_norm": 0.24252092838287354, |
| "learning_rate": 9.00497512437811e-05, |
| "loss": 0.4122, |
| "step": 2542 |
| }, |
| { |
| "epoch": 1.4018743109151046, |
| "grad_norm": 0.23663447797298431, |
| "learning_rate": 8.996683250414594e-05, |
| "loss": 0.3936, |
| "step": 2543 |
| }, |
| { |
| "epoch": 1.402425578831312, |
| "grad_norm": 0.2445666640996933, |
| "learning_rate": 8.988391376451077e-05, |
| "loss": 0.3863, |
| "step": 2544 |
| }, |
| { |
| "epoch": 1.4029768467475192, |
| "grad_norm": 0.24747510254383087, |
| "learning_rate": 8.980099502487562e-05, |
| "loss": 0.4024, |
| "step": 2545 |
| }, |
| { |
| "epoch": 1.4035281146637266, |
| "grad_norm": 0.22010785341262817, |
| "learning_rate": 8.971807628524046e-05, |
| "loss": 0.3765, |
| "step": 2546 |
| }, |
| { |
| "epoch": 1.4040793825799338, |
| "grad_norm": 0.24189656972885132, |
| "learning_rate": 8.963515754560531e-05, |
| "loss": 0.3735, |
| "step": 2547 |
| }, |
| { |
| "epoch": 1.404630650496141, |
| "grad_norm": 0.23379263281822205, |
| "learning_rate": 8.955223880597013e-05, |
| "loss": 0.3886, |
| "step": 2548 |
| }, |
| { |
| "epoch": 1.4051819184123484, |
| "grad_norm": 0.2319820672273636, |
| "learning_rate": 8.946932006633497e-05, |
| "loss": 0.3932, |
| "step": 2549 |
| }, |
| { |
| "epoch": 1.4057331863285556, |
| "grad_norm": 0.2426556944847107, |
| "learning_rate": 8.938640132669982e-05, |
| "loss": 0.3579, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.406284454244763, |
| "grad_norm": 0.23170387744903564, |
| "learning_rate": 8.930348258706467e-05, |
| "loss": 0.3657, |
| "step": 2551 |
| }, |
| { |
| "epoch": 1.4068357221609702, |
| "grad_norm": 0.24107246100902557, |
| "learning_rate": 8.922056384742951e-05, |
| "loss": 0.4121, |
| "step": 2552 |
| }, |
| { |
| "epoch": 1.4073869900771774, |
| "grad_norm": 0.23268483579158783, |
| "learning_rate": 8.913764510779434e-05, |
| "loss": 0.3964, |
| "step": 2553 |
| }, |
| { |
| "epoch": 1.4079382579933848, |
| "grad_norm": 0.24437369406223297, |
| "learning_rate": 8.905472636815919e-05, |
| "loss": 0.3886, |
| "step": 2554 |
| }, |
| { |
| "epoch": 1.4084895259095922, |
| "grad_norm": 0.2408677190542221, |
| "learning_rate": 8.897180762852403e-05, |
| "loss": 0.4128, |
| "step": 2555 |
| }, |
| { |
| "epoch": 1.4090407938257994, |
| "grad_norm": 0.24828049540519714, |
| "learning_rate": 8.888888888888888e-05, |
| "loss": 0.3968, |
| "step": 2556 |
| }, |
| { |
| "epoch": 1.4095920617420066, |
| "grad_norm": 0.25326454639434814, |
| "learning_rate": 8.880597014925373e-05, |
| "loss": 0.4163, |
| "step": 2557 |
| }, |
| { |
| "epoch": 1.4101433296582138, |
| "grad_norm": 0.2104220986366272, |
| "learning_rate": 8.872305140961856e-05, |
| "loss": 0.3861, |
| "step": 2558 |
| }, |
| { |
| "epoch": 1.4106945975744212, |
| "grad_norm": 0.24456249177455902, |
| "learning_rate": 8.86401326699834e-05, |
| "loss": 0.3969, |
| "step": 2559 |
| }, |
| { |
| "epoch": 1.4112458654906286, |
| "grad_norm": 0.23775126039981842, |
| "learning_rate": 8.855721393034825e-05, |
| "loss": 0.4024, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.4117971334068358, |
| "grad_norm": 0.2330765575170517, |
| "learning_rate": 8.84742951907131e-05, |
| "loss": 0.3988, |
| "step": 2561 |
| }, |
| { |
| "epoch": 1.412348401323043, |
| "grad_norm": 0.23499152064323425, |
| "learning_rate": 8.839137645107794e-05, |
| "loss": 0.4021, |
| "step": 2562 |
| }, |
| { |
| "epoch": 1.4128996692392501, |
| "grad_norm": 0.23784568905830383, |
| "learning_rate": 8.830845771144277e-05, |
| "loss": 0.4093, |
| "step": 2563 |
| }, |
| { |
| "epoch": 1.4134509371554576, |
| "grad_norm": 0.25330281257629395, |
| "learning_rate": 8.822553897180762e-05, |
| "loss": 0.3896, |
| "step": 2564 |
| }, |
| { |
| "epoch": 1.414002205071665, |
| "grad_norm": 0.2372010052204132, |
| "learning_rate": 8.814262023217246e-05, |
| "loss": 0.3887, |
| "step": 2565 |
| }, |
| { |
| "epoch": 1.4145534729878722, |
| "grad_norm": 0.227810338139534, |
| "learning_rate": 8.805970149253731e-05, |
| "loss": 0.3727, |
| "step": 2566 |
| }, |
| { |
| "epoch": 1.4151047409040793, |
| "grad_norm": 0.23357363045215607, |
| "learning_rate": 8.797678275290215e-05, |
| "loss": 0.3735, |
| "step": 2567 |
| }, |
| { |
| "epoch": 1.4156560088202865, |
| "grad_norm": 0.23767000436782837, |
| "learning_rate": 8.789386401326699e-05, |
| "loss": 0.3906, |
| "step": 2568 |
| }, |
| { |
| "epoch": 1.416207276736494, |
| "grad_norm": 0.22021612524986267, |
| "learning_rate": 8.781094527363183e-05, |
| "loss": 0.3907, |
| "step": 2569 |
| }, |
| { |
| "epoch": 1.4167585446527013, |
| "grad_norm": 0.22677011787891388, |
| "learning_rate": 8.772802653399668e-05, |
| "loss": 0.3568, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.4173098125689085, |
| "grad_norm": 0.23188649117946625, |
| "learning_rate": 8.764510779436152e-05, |
| "loss": 0.3872, |
| "step": 2571 |
| }, |
| { |
| "epoch": 1.4178610804851157, |
| "grad_norm": 0.24772998690605164, |
| "learning_rate": 8.756218905472637e-05, |
| "loss": 0.4013, |
| "step": 2572 |
| }, |
| { |
| "epoch": 1.418412348401323, |
| "grad_norm": 0.23278258740901947, |
| "learning_rate": 8.74792703150912e-05, |
| "loss": 0.3783, |
| "step": 2573 |
| }, |
| { |
| "epoch": 1.4189636163175303, |
| "grad_norm": 0.24379077553749084, |
| "learning_rate": 8.739635157545605e-05, |
| "loss": 0.3929, |
| "step": 2574 |
| }, |
| { |
| "epoch": 1.4195148842337377, |
| "grad_norm": 0.23344534635543823, |
| "learning_rate": 8.731343283582089e-05, |
| "loss": 0.3709, |
| "step": 2575 |
| }, |
| { |
| "epoch": 1.420066152149945, |
| "grad_norm": 0.23678019642829895, |
| "learning_rate": 8.723051409618574e-05, |
| "loss": 0.3973, |
| "step": 2576 |
| }, |
| { |
| "epoch": 1.420617420066152, |
| "grad_norm": 0.23193979263305664, |
| "learning_rate": 8.714759535655057e-05, |
| "loss": 0.3778, |
| "step": 2577 |
| }, |
| { |
| "epoch": 1.4211686879823593, |
| "grad_norm": 0.24555335938930511, |
| "learning_rate": 8.706467661691541e-05, |
| "loss": 0.4252, |
| "step": 2578 |
| }, |
| { |
| "epoch": 1.4217199558985667, |
| "grad_norm": 0.22985686361789703, |
| "learning_rate": 8.698175787728026e-05, |
| "loss": 0.3896, |
| "step": 2579 |
| }, |
| { |
| "epoch": 1.422271223814774, |
| "grad_norm": 0.24446120858192444, |
| "learning_rate": 8.68988391376451e-05, |
| "loss": 0.3969, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.4228224917309813, |
| "grad_norm": 0.22781571745872498, |
| "learning_rate": 8.681592039800995e-05, |
| "loss": 0.3836, |
| "step": 2581 |
| }, |
| { |
| "epoch": 1.4233737596471885, |
| "grad_norm": 0.2543814778327942, |
| "learning_rate": 8.673300165837478e-05, |
| "loss": 0.3934, |
| "step": 2582 |
| }, |
| { |
| "epoch": 1.4239250275633957, |
| "grad_norm": 0.2298593968153, |
| "learning_rate": 8.665008291873963e-05, |
| "loss": 0.3894, |
| "step": 2583 |
| }, |
| { |
| "epoch": 1.424476295479603, |
| "grad_norm": 0.24680182337760925, |
| "learning_rate": 8.656716417910447e-05, |
| "loss": 0.3928, |
| "step": 2584 |
| }, |
| { |
| "epoch": 1.4250275633958105, |
| "grad_norm": 0.2492562234401703, |
| "learning_rate": 8.648424543946932e-05, |
| "loss": 0.3793, |
| "step": 2585 |
| }, |
| { |
| "epoch": 1.4255788313120177, |
| "grad_norm": 0.24546745419502258, |
| "learning_rate": 8.640132669983417e-05, |
| "loss": 0.3671, |
| "step": 2586 |
| }, |
| { |
| "epoch": 1.4261300992282249, |
| "grad_norm": 0.24431215226650238, |
| "learning_rate": 8.6318407960199e-05, |
| "loss": 0.3613, |
| "step": 2587 |
| }, |
| { |
| "epoch": 1.426681367144432, |
| "grad_norm": 0.24530234932899475, |
| "learning_rate": 8.623548922056384e-05, |
| "loss": 0.3894, |
| "step": 2588 |
| }, |
| { |
| "epoch": 1.4272326350606395, |
| "grad_norm": 0.2521824240684509, |
| "learning_rate": 8.615257048092869e-05, |
| "loss": 0.3938, |
| "step": 2589 |
| }, |
| { |
| "epoch": 1.4277839029768469, |
| "grad_norm": 0.23589465022087097, |
| "learning_rate": 8.606965174129353e-05, |
| "loss": 0.377, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.428335170893054, |
| "grad_norm": 0.22879983484745026, |
| "learning_rate": 8.598673300165838e-05, |
| "loss": 0.387, |
| "step": 2591 |
| }, |
| { |
| "epoch": 1.4288864388092613, |
| "grad_norm": 0.2426953762769699, |
| "learning_rate": 8.59038142620232e-05, |
| "loss": 0.3921, |
| "step": 2592 |
| }, |
| { |
| "epoch": 1.4294377067254687, |
| "grad_norm": 0.2464035451412201, |
| "learning_rate": 8.582089552238804e-05, |
| "loss": 0.3842, |
| "step": 2593 |
| }, |
| { |
| "epoch": 1.4299889746416758, |
| "grad_norm": 0.24871256947517395, |
| "learning_rate": 8.573797678275289e-05, |
| "loss": 0.4075, |
| "step": 2594 |
| }, |
| { |
| "epoch": 1.4305402425578833, |
| "grad_norm": 0.22682443261146545, |
| "learning_rate": 8.565505804311774e-05, |
| "loss": 0.3538, |
| "step": 2595 |
| }, |
| { |
| "epoch": 1.4310915104740904, |
| "grad_norm": 0.23264093697071075, |
| "learning_rate": 8.557213930348257e-05, |
| "loss": 0.3802, |
| "step": 2596 |
| }, |
| { |
| "epoch": 1.4316427783902976, |
| "grad_norm": 0.2368372529745102, |
| "learning_rate": 8.548922056384741e-05, |
| "loss": 0.3897, |
| "step": 2597 |
| }, |
| { |
| "epoch": 1.432194046306505, |
| "grad_norm": 0.23906560242176056, |
| "learning_rate": 8.540630182421226e-05, |
| "loss": 0.3691, |
| "step": 2598 |
| }, |
| { |
| "epoch": 1.4327453142227122, |
| "grad_norm": 0.22911648452281952, |
| "learning_rate": 8.53233830845771e-05, |
| "loss": 0.3829, |
| "step": 2599 |
| }, |
| { |
| "epoch": 1.4332965821389196, |
| "grad_norm": 0.23407630622386932, |
| "learning_rate": 8.524046434494195e-05, |
| "loss": 0.3841, |
| "step": 2600 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 3628, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.918731552725244e+19, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|