| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 2069, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00048344210780759005, |
| "grad_norm": 2.4757904153326913, |
| "learning_rate": 0.0, |
| "loss": 0.9924, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0009668842156151801, |
| "grad_norm": 2.3793618820940923, |
| "learning_rate": 9.615384615384617e-08, |
| "loss": 0.9738, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0014503263234227702, |
| "grad_norm": 2.375691201697703, |
| "learning_rate": 1.9230769230769234e-07, |
| "loss": 0.9588, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0019337684312303602, |
| "grad_norm": 2.3403619553808497, |
| "learning_rate": 2.884615384615385e-07, |
| "loss": 0.9862, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.00241721053903795, |
| "grad_norm": 2.3613475552419394, |
| "learning_rate": 3.846153846153847e-07, |
| "loss": 0.9758, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0029006526468455403, |
| "grad_norm": 2.374422358129782, |
| "learning_rate": 4.807692307692308e-07, |
| "loss": 0.9716, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.00338409475465313, |
| "grad_norm": 2.478706471115894, |
| "learning_rate": 5.76923076923077e-07, |
| "loss": 0.976, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0038675368624607204, |
| "grad_norm": 2.3811968693026198, |
| "learning_rate": 6.730769230769231e-07, |
| "loss": 0.9873, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.00435097897026831, |
| "grad_norm": 2.2147341913024956, |
| "learning_rate": 7.692307692307694e-07, |
| "loss": 0.9286, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0048344210780759, |
| "grad_norm": 2.335255162349414, |
| "learning_rate": 8.653846153846154e-07, |
| "loss": 0.9845, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005317863185883491, |
| "grad_norm": 2.218894644037587, |
| "learning_rate": 9.615384615384617e-07, |
| "loss": 0.95, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.005801305293691081, |
| "grad_norm": 2.2403773948516226, |
| "learning_rate": 1.0576923076923078e-06, |
| "loss": 0.9715, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0062847474014986705, |
| "grad_norm": 2.164785866254398, |
| "learning_rate": 1.153846153846154e-06, |
| "loss": 0.9422, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.00676818950930626, |
| "grad_norm": 2.2075110374685947, |
| "learning_rate": 1.25e-06, |
| "loss": 0.965, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.007251631617113851, |
| "grad_norm": 1.9308798071113116, |
| "learning_rate": 1.3461538461538462e-06, |
| "loss": 0.9372, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.007735073724921441, |
| "grad_norm": 1.8705357350667309, |
| "learning_rate": 1.4423076923076922e-06, |
| "loss": 0.9443, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.00821851583272903, |
| "grad_norm": 1.775691766227149, |
| "learning_rate": 1.5384615384615387e-06, |
| "loss": 0.9362, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00870195794053662, |
| "grad_norm": 1.7290053738093054, |
| "learning_rate": 1.6346153846153848e-06, |
| "loss": 0.9298, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.00918540004834421, |
| "grad_norm": 1.6541389612298973, |
| "learning_rate": 1.7307692307692308e-06, |
| "loss": 0.9336, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0096688421561518, |
| "grad_norm": 1.2338607620225968, |
| "learning_rate": 1.826923076923077e-06, |
| "loss": 0.9055, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01015228426395939, |
| "grad_norm": 1.1808086522456918, |
| "learning_rate": 1.9230769230769234e-06, |
| "loss": 0.8962, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.010635726371766982, |
| "grad_norm": 1.090531117286559, |
| "learning_rate": 2.0192307692307692e-06, |
| "loss": 0.8702, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.011119168479574571, |
| "grad_norm": 1.095517820053717, |
| "learning_rate": 2.1153846153846155e-06, |
| "loss": 0.8816, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.011602610587382161, |
| "grad_norm": 1.0208393908518454, |
| "learning_rate": 2.211538461538462e-06, |
| "loss": 0.8699, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.012086052695189751, |
| "grad_norm": 1.004109121666044, |
| "learning_rate": 2.307692307692308e-06, |
| "loss": 0.8669, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.012569494802997341, |
| "grad_norm": 0.98169412760157, |
| "learning_rate": 2.403846153846154e-06, |
| "loss": 0.8371, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.01305293691080493, |
| "grad_norm": 0.9209444270757048, |
| "learning_rate": 2.5e-06, |
| "loss": 0.8388, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.01353637901861252, |
| "grad_norm": 0.8619822284316448, |
| "learning_rate": 2.5961538461538465e-06, |
| "loss": 0.8041, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.01401982112642011, |
| "grad_norm": 0.9241232197315488, |
| "learning_rate": 2.6923076923076923e-06, |
| "loss": 0.8091, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.014503263234227702, |
| "grad_norm": 0.917429451582305, |
| "learning_rate": 2.7884615384615386e-06, |
| "loss": 0.7749, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.014986705342035292, |
| "grad_norm": 0.9043786370452085, |
| "learning_rate": 2.8846153846153845e-06, |
| "loss": 0.8144, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.015470147449842882, |
| "grad_norm": 0.820031258272968, |
| "learning_rate": 2.980769230769231e-06, |
| "loss": 0.7931, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.01595358955765047, |
| "grad_norm": 0.7726701862119408, |
| "learning_rate": 3.0769230769230774e-06, |
| "loss": 0.7903, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.01643703166545806, |
| "grad_norm": 0.6871638945331215, |
| "learning_rate": 3.1730769230769233e-06, |
| "loss": 0.7256, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01692047377326565, |
| "grad_norm": 0.7302580243312591, |
| "learning_rate": 3.2692307692307696e-06, |
| "loss": 0.7956, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01740391588107324, |
| "grad_norm": 0.6737498928543134, |
| "learning_rate": 3.365384615384616e-06, |
| "loss": 0.7478, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01788735798888083, |
| "grad_norm": 0.6914440787905148, |
| "learning_rate": 3.4615384615384617e-06, |
| "loss": 0.7621, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.01837080009668842, |
| "grad_norm": 0.6869329802424697, |
| "learning_rate": 3.557692307692308e-06, |
| "loss": 0.7706, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.01885424220449601, |
| "grad_norm": 0.6549740713679569, |
| "learning_rate": 3.653846153846154e-06, |
| "loss": 0.7237, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0193376843123036, |
| "grad_norm": 0.6922145753217636, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.7537, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01982112642011119, |
| "grad_norm": 0.665714225887781, |
| "learning_rate": 3.846153846153847e-06, |
| "loss": 0.7656, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.02030456852791878, |
| "grad_norm": 0.6390200477155564, |
| "learning_rate": 3.942307692307692e-06, |
| "loss": 0.7558, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.020788010635726373, |
| "grad_norm": 0.6558268717213803, |
| "learning_rate": 4.0384615384615385e-06, |
| "loss": 0.7408, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.021271452743533963, |
| "grad_norm": 0.6191043915893901, |
| "learning_rate": 4.134615384615385e-06, |
| "loss": 0.7482, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.021754894851341553, |
| "grad_norm": 0.6193196066166552, |
| "learning_rate": 4.230769230769231e-06, |
| "loss": 0.7424, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.022238336959149143, |
| "grad_norm": 0.6141046550876093, |
| "learning_rate": 4.326923076923077e-06, |
| "loss": 0.7372, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.022721779066956733, |
| "grad_norm": 0.6265315399192994, |
| "learning_rate": 4.423076923076924e-06, |
| "loss": 0.7362, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.023205221174764323, |
| "grad_norm": 0.6704953048927751, |
| "learning_rate": 4.51923076923077e-06, |
| "loss": 0.7326, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.023688663282571912, |
| "grad_norm": 0.6544522629648533, |
| "learning_rate": 4.615384615384616e-06, |
| "loss": 0.7275, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.024172105390379502, |
| "grad_norm": 0.6277879949612973, |
| "learning_rate": 4.711538461538462e-06, |
| "loss": 0.7311, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.024655547498187092, |
| "grad_norm": 0.5924725867824154, |
| "learning_rate": 4.807692307692308e-06, |
| "loss": 0.7261, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.025138989605994682, |
| "grad_norm": 0.591545350722231, |
| "learning_rate": 4.903846153846154e-06, |
| "loss": 0.7092, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.025622431713802272, |
| "grad_norm": 0.5698079528908845, |
| "learning_rate": 5e-06, |
| "loss": 0.7093, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.02610587382160986, |
| "grad_norm": 0.6162650749995418, |
| "learning_rate": 5.096153846153846e-06, |
| "loss": 0.687, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.02658931592941745, |
| "grad_norm": 0.5577930499697958, |
| "learning_rate": 5.192307692307693e-06, |
| "loss": 0.7143, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.02707275803722504, |
| "grad_norm": 0.5640209708881836, |
| "learning_rate": 5.288461538461539e-06, |
| "loss": 0.7059, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.02755620014503263, |
| "grad_norm": 0.5430126891143467, |
| "learning_rate": 5.384615384615385e-06, |
| "loss": 0.704, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.02803964225284022, |
| "grad_norm": 0.6009517822786309, |
| "learning_rate": 5.480769230769232e-06, |
| "loss": 0.7169, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02852308436064781, |
| "grad_norm": 0.6167733540891279, |
| "learning_rate": 5.576923076923077e-06, |
| "loss": 0.7065, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.029006526468455404, |
| "grad_norm": 0.5731458421774205, |
| "learning_rate": 5.6730769230769235e-06, |
| "loss": 0.6373, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.029489968576262994, |
| "grad_norm": 0.5489403473955915, |
| "learning_rate": 5.769230769230769e-06, |
| "loss": 0.7018, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.029973410684070584, |
| "grad_norm": 0.5325599545502842, |
| "learning_rate": 5.865384615384616e-06, |
| "loss": 0.6959, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.030456852791878174, |
| "grad_norm": 0.5478537783639954, |
| "learning_rate": 5.961538461538462e-06, |
| "loss": 0.6896, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.030940294899685764, |
| "grad_norm": 0.5363515063211778, |
| "learning_rate": 6.057692307692308e-06, |
| "loss": 0.7014, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.03142373700749335, |
| "grad_norm": 0.5641946867306303, |
| "learning_rate": 6.153846153846155e-06, |
| "loss": 0.6903, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03190717911530094, |
| "grad_norm": 0.5481835775113026, |
| "learning_rate": 6.25e-06, |
| "loss": 0.6893, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.03239062122310853, |
| "grad_norm": 0.5143476489389097, |
| "learning_rate": 6.3461538461538466e-06, |
| "loss": 0.6946, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.03287406333091612, |
| "grad_norm": 0.590656354467126, |
| "learning_rate": 6.442307692307693e-06, |
| "loss": 0.6788, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.03335750543872371, |
| "grad_norm": 0.5203563663337313, |
| "learning_rate": 6.538461538461539e-06, |
| "loss": 0.6847, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.0338409475465313, |
| "grad_norm": 0.5084500426939229, |
| "learning_rate": 6.6346153846153846e-06, |
| "loss": 0.7086, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03432438965433889, |
| "grad_norm": 0.5032784264719405, |
| "learning_rate": 6.730769230769232e-06, |
| "loss": 0.6724, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03480783176214648, |
| "grad_norm": 0.5205048813341548, |
| "learning_rate": 6.826923076923078e-06, |
| "loss": 0.6592, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.03529127386995407, |
| "grad_norm": 0.5066251849073853, |
| "learning_rate": 6.923076923076923e-06, |
| "loss": 0.6674, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.03577471597776166, |
| "grad_norm": 0.5305760257061701, |
| "learning_rate": 7.01923076923077e-06, |
| "loss": 0.6665, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.03625815808556925, |
| "grad_norm": 0.5583636863825877, |
| "learning_rate": 7.115384615384616e-06, |
| "loss": 0.6685, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.03674160019337684, |
| "grad_norm": 0.6055857508188283, |
| "learning_rate": 7.211538461538462e-06, |
| "loss": 0.6826, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.03722504230118443, |
| "grad_norm": 0.5576393446552599, |
| "learning_rate": 7.307692307692308e-06, |
| "loss": 0.6811, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.03770848440899202, |
| "grad_norm": 0.5550469150359895, |
| "learning_rate": 7.403846153846155e-06, |
| "loss": 0.6752, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.03819192651679961, |
| "grad_norm": 0.5095358853416947, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.657, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0386753686246072, |
| "grad_norm": 0.519449515803278, |
| "learning_rate": 7.5961538461538465e-06, |
| "loss": 0.6326, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03915881073241479, |
| "grad_norm": 0.5360371671954463, |
| "learning_rate": 7.692307692307694e-06, |
| "loss": 0.6577, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.03964225284022238, |
| "grad_norm": 0.5220981103197152, |
| "learning_rate": 7.78846153846154e-06, |
| "loss": 0.6803, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.04012569494802997, |
| "grad_norm": 0.5357442529489778, |
| "learning_rate": 7.884615384615384e-06, |
| "loss": 0.662, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.04060913705583756, |
| "grad_norm": 0.5922567510802571, |
| "learning_rate": 7.980769230769232e-06, |
| "loss": 0.6784, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.04109257916364515, |
| "grad_norm": 0.5471555288958341, |
| "learning_rate": 8.076923076923077e-06, |
| "loss": 0.663, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.04157602127145275, |
| "grad_norm": 0.5234614832210157, |
| "learning_rate": 8.173076923076923e-06, |
| "loss": 0.6633, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.04205946337926034, |
| "grad_norm": 0.5014680527453607, |
| "learning_rate": 8.26923076923077e-06, |
| "loss": 0.6345, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.04254290548706793, |
| "grad_norm": 0.5541925854592269, |
| "learning_rate": 8.365384615384616e-06, |
| "loss": 0.661, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.043026347594875516, |
| "grad_norm": 0.5556737957241218, |
| "learning_rate": 8.461538461538462e-06, |
| "loss": 0.6476, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.043509789702683106, |
| "grad_norm": 0.5358812925229628, |
| "learning_rate": 8.557692307692308e-06, |
| "loss": 0.6667, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.043993231810490696, |
| "grad_norm": 0.5285944473021625, |
| "learning_rate": 8.653846153846155e-06, |
| "loss": 0.6558, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.044476673918298286, |
| "grad_norm": 0.6130129115794695, |
| "learning_rate": 8.750000000000001e-06, |
| "loss": 0.6662, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.044960116026105876, |
| "grad_norm": 0.6086871477606206, |
| "learning_rate": 8.846153846153847e-06, |
| "loss": 0.6768, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.045443558133913466, |
| "grad_norm": 0.534737794998822, |
| "learning_rate": 8.942307692307693e-06, |
| "loss": 0.633, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.045927000241721055, |
| "grad_norm": 0.5048674854153722, |
| "learning_rate": 9.03846153846154e-06, |
| "loss": 0.6075, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.046410442349528645, |
| "grad_norm": 0.5516912026027078, |
| "learning_rate": 9.134615384615384e-06, |
| "loss": 0.623, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.046893884457336235, |
| "grad_norm": 0.6083291149980872, |
| "learning_rate": 9.230769230769232e-06, |
| "loss": 0.6556, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.047377326565143825, |
| "grad_norm": 0.5460750932826393, |
| "learning_rate": 9.326923076923079e-06, |
| "loss": 0.6524, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.047860768672951415, |
| "grad_norm": 0.5459534721301705, |
| "learning_rate": 9.423076923076923e-06, |
| "loss": 0.6449, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.048344210780759005, |
| "grad_norm": 0.5622412415254093, |
| "learning_rate": 9.51923076923077e-06, |
| "loss": 0.6517, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.048827652888566594, |
| "grad_norm": 0.6148179967646931, |
| "learning_rate": 9.615384615384616e-06, |
| "loss": 0.636, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.049311094996374184, |
| "grad_norm": 0.5377477077942675, |
| "learning_rate": 9.711538461538462e-06, |
| "loss": 0.6569, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.049794537104181774, |
| "grad_norm": 0.535881794576154, |
| "learning_rate": 9.807692307692308e-06, |
| "loss": 0.6515, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.050277979211989364, |
| "grad_norm": 0.5554528998874018, |
| "learning_rate": 9.903846153846155e-06, |
| "loss": 0.6471, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.050761421319796954, |
| "grad_norm": 0.5472055318440415, |
| "learning_rate": 1e-05, |
| "loss": 0.6212, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.051244863427604544, |
| "grad_norm": 0.5562423079812571, |
| "learning_rate": 9.99999360979851e-06, |
| "loss": 0.6483, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.051728305535412133, |
| "grad_norm": 0.6203972023036308, |
| "learning_rate": 9.999974439210376e-06, |
| "loss": 0.6474, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.05221174764321972, |
| "grad_norm": 0.5879110259866966, |
| "learning_rate": 9.999942488284598e-06, |
| "loss": 0.6506, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.05269518975102731, |
| "grad_norm": 0.5415023727684817, |
| "learning_rate": 9.999897757102843e-06, |
| "loss": 0.641, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.0531786318588349, |
| "grad_norm": 0.5661080832571289, |
| "learning_rate": 9.99984024577945e-06, |
| "loss": 0.6561, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.05366207396664249, |
| "grad_norm": 0.6384080684659277, |
| "learning_rate": 9.999769954461425e-06, |
| "loss": 0.6181, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.05414551607445008, |
| "grad_norm": 0.5321086465207798, |
| "learning_rate": 9.999686883328433e-06, |
| "loss": 0.6269, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.05462895818225767, |
| "grad_norm": 0.5658443861351884, |
| "learning_rate": 9.999591032592813e-06, |
| "loss": 0.6317, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.05511240029006526, |
| "grad_norm": 0.5759014415066968, |
| "learning_rate": 9.999482402499569e-06, |
| "loss": 0.6468, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.05559584239787285, |
| "grad_norm": 0.5480587710988183, |
| "learning_rate": 9.999360993326366e-06, |
| "loss": 0.6359, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.05607928450568044, |
| "grad_norm": 0.6380718424826206, |
| "learning_rate": 9.999226805383534e-06, |
| "loss": 0.6349, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.05656272661348803, |
| "grad_norm": 0.5246861209498886, |
| "learning_rate": 9.999079839014074e-06, |
| "loss": 0.6399, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.05704616872129562, |
| "grad_norm": 0.515686613549135, |
| "learning_rate": 9.998920094593637e-06, |
| "loss": 0.5984, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.05752961082910321, |
| "grad_norm": 0.5607127828178857, |
| "learning_rate": 9.998747572530548e-06, |
| "loss": 0.6398, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.05801305293691081, |
| "grad_norm": 0.5459763409466101, |
| "learning_rate": 9.998562273265786e-06, |
| "loss": 0.626, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0584964950447184, |
| "grad_norm": 0.5525418327052581, |
| "learning_rate": 9.998364197272988e-06, |
| "loss": 0.6537, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.05897993715252599, |
| "grad_norm": 6.74083445541264, |
| "learning_rate": 9.998153345058454e-06, |
| "loss": 0.9475, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.05946337926033358, |
| "grad_norm": 0.6141628359508349, |
| "learning_rate": 9.997929717161142e-06, |
| "loss": 0.6473, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.05994682136814117, |
| "grad_norm": 0.6652958169663876, |
| "learning_rate": 9.997693314152658e-06, |
| "loss": 0.6342, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.06043026347594876, |
| "grad_norm": 0.606711498986106, |
| "learning_rate": 9.99744413663727e-06, |
| "loss": 0.623, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.06091370558375635, |
| "grad_norm": 0.5175309840849823, |
| "learning_rate": 9.997182185251896e-06, |
| "loss": 0.6221, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.06139714769156394, |
| "grad_norm": 0.5341566674562975, |
| "learning_rate": 9.996907460666104e-06, |
| "loss": 0.6357, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.06188058979937153, |
| "grad_norm": 0.5922792510376619, |
| "learning_rate": 9.996619963582113e-06, |
| "loss": 0.6043, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.06236403190717912, |
| "grad_norm": 0.5694036510960461, |
| "learning_rate": 9.996319694734787e-06, |
| "loss": 0.6311, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.0628474740149867, |
| "grad_norm": 0.5540844850790518, |
| "learning_rate": 9.99600665489164e-06, |
| "loss": 0.6411, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06333091612279429, |
| "grad_norm": 0.5371960793753483, |
| "learning_rate": 9.995680844852824e-06, |
| "loss": 0.6403, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.06381435823060189, |
| "grad_norm": 0.5225384791967033, |
| "learning_rate": 9.995342265451138e-06, |
| "loss": 0.6269, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06429780033840947, |
| "grad_norm": 0.6035451474536077, |
| "learning_rate": 9.994990917552017e-06, |
| "loss": 0.6321, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.06478124244621707, |
| "grad_norm": 0.6507380493478006, |
| "learning_rate": 9.994626802053536e-06, |
| "loss": 0.6236, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.06526468455402465, |
| "grad_norm": 0.5456651842881993, |
| "learning_rate": 9.994249919886402e-06, |
| "loss": 0.6258, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06574812666183225, |
| "grad_norm": 0.5172506944070536, |
| "learning_rate": 9.993860272013958e-06, |
| "loss": 0.6162, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.06623156876963984, |
| "grad_norm": 0.6233262394445207, |
| "learning_rate": 9.993457859432172e-06, |
| "loss": 0.6261, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.06671501087744743, |
| "grad_norm": 0.6073445562745826, |
| "learning_rate": 9.993042683169647e-06, |
| "loss": 0.6371, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.06719845298525502, |
| "grad_norm": 0.5857241687958673, |
| "learning_rate": 9.992614744287605e-06, |
| "loss": 0.6275, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.0676818950930626, |
| "grad_norm": 0.5304150460003405, |
| "learning_rate": 9.992174043879893e-06, |
| "loss": 0.6175, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0681653372008702, |
| "grad_norm": 0.5933722892089892, |
| "learning_rate": 9.991720583072975e-06, |
| "loss": 0.6255, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.06864877930867778, |
| "grad_norm": 0.561723953482763, |
| "learning_rate": 9.991254363025935e-06, |
| "loss": 0.6257, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.06913222141648538, |
| "grad_norm": 0.532228224452236, |
| "learning_rate": 9.99077538493047e-06, |
| "loss": 0.6301, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.06961566352429296, |
| "grad_norm": 0.541783938730816, |
| "learning_rate": 9.990283650010883e-06, |
| "loss": 0.619, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.07009910563210056, |
| "grad_norm": 0.5606995950440783, |
| "learning_rate": 9.989779159524091e-06, |
| "loss": 0.5818, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.07058254773990814, |
| "grad_norm": 0.5286741282148979, |
| "learning_rate": 9.989261914759612e-06, |
| "loss": 0.6105, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.07106598984771574, |
| "grad_norm": 0.525375741245272, |
| "learning_rate": 9.988731917039564e-06, |
| "loss": 0.6154, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.07154943195552332, |
| "grad_norm": 0.5132546936158348, |
| "learning_rate": 9.988189167718665e-06, |
| "loss": 0.5533, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.07203287406333092, |
| "grad_norm": 0.5330232205089095, |
| "learning_rate": 9.987633668184227e-06, |
| "loss": 0.6281, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.0725163161711385, |
| "grad_norm": 0.5459730729112252, |
| "learning_rate": 9.98706541985615e-06, |
| "loss": 0.5836, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0729997582789461, |
| "grad_norm": 0.5818263727750432, |
| "learning_rate": 9.986484424186922e-06, |
| "loss": 0.6246, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.07348320038675368, |
| "grad_norm": 0.5754133435232375, |
| "learning_rate": 9.985890682661616e-06, |
| "loss": 0.6038, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.07396664249456128, |
| "grad_norm": 0.5528911744587542, |
| "learning_rate": 9.985284196797884e-06, |
| "loss": 0.6246, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.07445008460236886, |
| "grad_norm": 0.5484687585797547, |
| "learning_rate": 9.984664968145953e-06, |
| "loss": 0.6318, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.07493352671017646, |
| "grad_norm": 0.5351986552762329, |
| "learning_rate": 9.984032998288617e-06, |
| "loss": 0.6184, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.07541696881798404, |
| "grad_norm": 0.519416066205614, |
| "learning_rate": 9.983388288841246e-06, |
| "loss": 0.6185, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.07590041092579164, |
| "grad_norm": 0.5470449402548487, |
| "learning_rate": 9.982730841451768e-06, |
| "loss": 0.625, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.07638385303359922, |
| "grad_norm": 0.5887016805140373, |
| "learning_rate": 9.982060657800672e-06, |
| "loss": 0.6183, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.07686729514140682, |
| "grad_norm": 0.5522566946881194, |
| "learning_rate": 9.981377739601002e-06, |
| "loss": 0.6137, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.0773507372492144, |
| "grad_norm": 0.5411997809451911, |
| "learning_rate": 9.980682088598349e-06, |
| "loss": 0.6229, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.077834179357022, |
| "grad_norm": 0.52840707851752, |
| "learning_rate": 9.979973706570856e-06, |
| "loss": 0.614, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.07831762146482958, |
| "grad_norm": 0.6047062373713257, |
| "learning_rate": 9.979252595329204e-06, |
| "loss": 0.6222, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.07880106357263718, |
| "grad_norm": 0.5420471794760692, |
| "learning_rate": 9.978518756716611e-06, |
| "loss": 0.5856, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.07928450568044476, |
| "grad_norm": 0.5857386315586672, |
| "learning_rate": 9.977772192608827e-06, |
| "loss": 0.6291, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.07976794778825236, |
| "grad_norm": 0.5691356356316107, |
| "learning_rate": 9.977012904914133e-06, |
| "loss": 0.6149, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.08025138989605994, |
| "grad_norm": 0.5823273363045892, |
| "learning_rate": 9.976240895573326e-06, |
| "loss": 0.6147, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.08073483200386754, |
| "grad_norm": 0.538212010864403, |
| "learning_rate": 9.975456166559725e-06, |
| "loss": 0.6002, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.08121827411167512, |
| "grad_norm": 0.601371610274862, |
| "learning_rate": 9.974658719879163e-06, |
| "loss": 0.606, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.08170171621948272, |
| "grad_norm": 0.588104162701253, |
| "learning_rate": 9.973848557569974e-06, |
| "loss": 0.6226, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.0821851583272903, |
| "grad_norm": 0.5316828963553285, |
| "learning_rate": 9.973025681703e-06, |
| "loss": 0.6144, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0826686004350979, |
| "grad_norm": 0.5405916050680715, |
| "learning_rate": 9.972190094381578e-06, |
| "loss": 0.6148, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.0831520425429055, |
| "grad_norm": 0.5102891757426009, |
| "learning_rate": 9.971341797741538e-06, |
| "loss": 0.616, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.08363548465071308, |
| "grad_norm": 0.5551757535954606, |
| "learning_rate": 9.970480793951194e-06, |
| "loss": 0.6196, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.08411892675852067, |
| "grad_norm": 0.5349760515746151, |
| "learning_rate": 9.96960708521134e-06, |
| "loss": 0.5902, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.08460236886632826, |
| "grad_norm": 0.5713299053870873, |
| "learning_rate": 9.968720673755246e-06, |
| "loss": 0.6039, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08508581097413585, |
| "grad_norm": 0.5886201187493544, |
| "learning_rate": 9.96782156184865e-06, |
| "loss": 0.6128, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.08556925308194344, |
| "grad_norm": 0.52487297166769, |
| "learning_rate": 9.966909751789758e-06, |
| "loss": 0.6201, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.08605269518975103, |
| "grad_norm": 0.47488673856360863, |
| "learning_rate": 9.965985245909226e-06, |
| "loss": 0.581, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.08653613729755862, |
| "grad_norm": 0.5390345004627665, |
| "learning_rate": 9.96504804657017e-06, |
| "loss": 0.5748, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.08701957940536621, |
| "grad_norm": 0.5030595297893009, |
| "learning_rate": 9.964098156168143e-06, |
| "loss": 0.6025, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0875030215131738, |
| "grad_norm": 0.5468598312459072, |
| "learning_rate": 9.963135577131144e-06, |
| "loss": 0.6086, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.08798646362098139, |
| "grad_norm": 0.48113219800404783, |
| "learning_rate": 9.962160311919601e-06, |
| "loss": 0.5759, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.08846990572878898, |
| "grad_norm": 0.5498772940672643, |
| "learning_rate": 9.96117236302637e-06, |
| "loss": 0.6009, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.08895334783659657, |
| "grad_norm": 0.572150853367621, |
| "learning_rate": 9.960171732976731e-06, |
| "loss": 0.5891, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.08943678994440415, |
| "grad_norm": 0.5440182913032069, |
| "learning_rate": 9.959158424328373e-06, |
| "loss": 0.6126, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.08992023205221175, |
| "grad_norm": 0.5124606491120447, |
| "learning_rate": 9.958132439671392e-06, |
| "loss": 0.6113, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.09040367416001933, |
| "grad_norm": 0.5122426086233111, |
| "learning_rate": 9.957093781628294e-06, |
| "loss": 0.5585, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.09088711626782693, |
| "grad_norm": 0.5466339032920954, |
| "learning_rate": 9.956042452853967e-06, |
| "loss": 0.5829, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.09137055837563451, |
| "grad_norm": 0.5319185267267565, |
| "learning_rate": 9.954978456035695e-06, |
| "loss": 0.6014, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.09185400048344211, |
| "grad_norm": 0.5439360347029544, |
| "learning_rate": 9.953901793893137e-06, |
| "loss": 0.6135, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0923374425912497, |
| "grad_norm": 0.5572467498872743, |
| "learning_rate": 9.95281246917833e-06, |
| "loss": 0.6126, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.09282088469905729, |
| "grad_norm": 0.5541110285684123, |
| "learning_rate": 9.951710484675677e-06, |
| "loss": 0.6077, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.09330432680686487, |
| "grad_norm": 0.4850481807152515, |
| "learning_rate": 9.950595843201936e-06, |
| "loss": 0.6052, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.09378776891467247, |
| "grad_norm": 0.4982494369774088, |
| "learning_rate": 9.949468547606222e-06, |
| "loss": 0.608, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.09427121102248005, |
| "grad_norm": 0.5222210926075901, |
| "learning_rate": 9.948328600769996e-06, |
| "loss": 0.5725, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.09475465313028765, |
| "grad_norm": 0.5156665548407187, |
| "learning_rate": 9.94717600560705e-06, |
| "loss": 0.5981, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.09523809523809523, |
| "grad_norm": 0.4789398218595176, |
| "learning_rate": 9.946010765063512e-06, |
| "loss": 0.6163, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.09572153734590283, |
| "grad_norm": 0.5066106303118647, |
| "learning_rate": 9.94483288211783e-06, |
| "loss": 0.6049, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.09620497945371041, |
| "grad_norm": 0.519086410125638, |
| "learning_rate": 9.943642359780767e-06, |
| "loss": 0.6034, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.09668842156151801, |
| "grad_norm": 0.5726309849663989, |
| "learning_rate": 9.942439201095398e-06, |
| "loss": 0.5977, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09717186366932559, |
| "grad_norm": 0.5149014744932526, |
| "learning_rate": 9.941223409137088e-06, |
| "loss": 0.6147, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.09765530577713319, |
| "grad_norm": 0.5009166664227639, |
| "learning_rate": 9.939994987013505e-06, |
| "loss": 0.595, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.09813874788494077, |
| "grad_norm": 0.5677576117209191, |
| "learning_rate": 9.93875393786459e-06, |
| "loss": 0.5825, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.09862218999274837, |
| "grad_norm": 0.5705628060741978, |
| "learning_rate": 9.937500264862567e-06, |
| "loss": 0.6106, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.09910563210055595, |
| "grad_norm": 0.5166084751955315, |
| "learning_rate": 9.936233971211926e-06, |
| "loss": 0.5724, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.09958907420836355, |
| "grad_norm": 0.4998369485071646, |
| "learning_rate": 9.934955060149413e-06, |
| "loss": 0.5702, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.10007251631617115, |
| "grad_norm": 0.4773861112208611, |
| "learning_rate": 9.933663534944029e-06, |
| "loss": 0.5976, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.10055595842397873, |
| "grad_norm": 0.5142399648385931, |
| "learning_rate": 9.932359398897018e-06, |
| "loss": 0.5662, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.10103940053178632, |
| "grad_norm": 0.5152331134346968, |
| "learning_rate": 9.931042655341856e-06, |
| "loss": 0.5987, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.10152284263959391, |
| "grad_norm": 0.5697107336495173, |
| "learning_rate": 9.929713307644245e-06, |
| "loss": 0.5956, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1020062847474015, |
| "grad_norm": 0.5437386464507225, |
| "learning_rate": 9.928371359202103e-06, |
| "loss": 0.6023, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.10248972685520909, |
| "grad_norm": 0.48930400532530816, |
| "learning_rate": 9.927016813445562e-06, |
| "loss": 0.5941, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.10297316896301668, |
| "grad_norm": 0.5691895002113943, |
| "learning_rate": 9.925649673836949e-06, |
| "loss": 0.5977, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.10345661107082427, |
| "grad_norm": 0.518358121778254, |
| "learning_rate": 9.924269943870781e-06, |
| "loss": 0.599, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.10394005317863186, |
| "grad_norm": 0.5179203447080591, |
| "learning_rate": 9.922877627073763e-06, |
| "loss": 0.565, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.10442349528643945, |
| "grad_norm": 0.5348210146349037, |
| "learning_rate": 9.921472727004765e-06, |
| "loss": 0.6038, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.10490693739424704, |
| "grad_norm": 0.5011388091471438, |
| "learning_rate": 9.920055247254827e-06, |
| "loss": 0.5951, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.10539037950205463, |
| "grad_norm": 0.5706178448892886, |
| "learning_rate": 9.91862519144714e-06, |
| "loss": 0.604, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.10587382160986222, |
| "grad_norm": 0.5667257328777994, |
| "learning_rate": 9.917182563237045e-06, |
| "loss": 0.6006, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.1063572637176698, |
| "grad_norm": 0.5402529870671051, |
| "learning_rate": 9.915727366312012e-06, |
| "loss": 0.591, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1068407058254774, |
| "grad_norm": 0.5123066262170495, |
| "learning_rate": 9.914259604391642e-06, |
| "loss": 0.5818, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.10732414793328499, |
| "grad_norm": 0.5104812232878251, |
| "learning_rate": 9.912779281227656e-06, |
| "loss": 0.5991, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.10780759004109258, |
| "grad_norm": 0.5073553912253322, |
| "learning_rate": 9.911286400603878e-06, |
| "loss": 0.5783, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.10829103214890017, |
| "grad_norm": 0.5516103650201469, |
| "learning_rate": 9.90978096633623e-06, |
| "loss": 0.6007, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.10877447425670776, |
| "grad_norm": 0.5241670992889956, |
| "learning_rate": 9.908262982272724e-06, |
| "loss": 0.5865, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.10925791636451535, |
| "grad_norm": 0.4894067875331202, |
| "learning_rate": 9.906732452293448e-06, |
| "loss": 0.5635, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.10974135847232294, |
| "grad_norm": 0.5079732216995924, |
| "learning_rate": 9.905189380310564e-06, |
| "loss": 0.5982, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.11022480058013052, |
| "grad_norm": 0.47288266380376864, |
| "learning_rate": 9.903633770268286e-06, |
| "loss": 0.5734, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.11070824268793812, |
| "grad_norm": 0.5195973051222883, |
| "learning_rate": 9.902065626142876e-06, |
| "loss": 0.6021, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.1111916847957457, |
| "grad_norm": 0.5159734590151601, |
| "learning_rate": 9.900484951942642e-06, |
| "loss": 0.5847, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1116751269035533, |
| "grad_norm": 0.5157347113387764, |
| "learning_rate": 9.89889175170791e-06, |
| "loss": 0.5946, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.11215856901136088, |
| "grad_norm": 0.49409523170190334, |
| "learning_rate": 9.89728602951103e-06, |
| "loss": 0.5941, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.11264201111916848, |
| "grad_norm": 0.5699641967141135, |
| "learning_rate": 9.89566778945636e-06, |
| "loss": 0.5965, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.11312545322697606, |
| "grad_norm": 0.5565932357020583, |
| "learning_rate": 9.894037035680246e-06, |
| "loss": 0.6076, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.11360889533478366, |
| "grad_norm": 0.4762368359891958, |
| "learning_rate": 9.892393772351033e-06, |
| "loss": 0.5749, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.11409233744259124, |
| "grad_norm": 0.5226269336653058, |
| "learning_rate": 9.890738003669029e-06, |
| "loss": 0.5882, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.11457577955039884, |
| "grad_norm": 0.5893232226185929, |
| "learning_rate": 9.889069733866515e-06, |
| "loss": 0.5978, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.11505922165820642, |
| "grad_norm": 0.5556325697280562, |
| "learning_rate": 9.887388967207722e-06, |
| "loss": 0.6, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.11554266376601402, |
| "grad_norm": 0.48160661753964396, |
| "learning_rate": 9.885695707988825e-06, |
| "loss": 0.5977, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.11602610587382162, |
| "grad_norm": 0.5122405505133801, |
| "learning_rate": 9.883989960537934e-06, |
| "loss": 0.6044, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1165095479816292, |
| "grad_norm": 0.5812889541684825, |
| "learning_rate": 9.882271729215071e-06, |
| "loss": 0.5849, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.1169929900894368, |
| "grad_norm": 0.4906401332764143, |
| "learning_rate": 9.880541018412179e-06, |
| "loss": 0.5986, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.11747643219724438, |
| "grad_norm": 0.48951055967126716, |
| "learning_rate": 9.878797832553093e-06, |
| "loss": 0.5646, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.11795987430505198, |
| "grad_norm": 0.4836474446158179, |
| "learning_rate": 9.877042176093537e-06, |
| "loss": 0.5998, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.11844331641285956, |
| "grad_norm": 0.4962973453940785, |
| "learning_rate": 9.875274053521107e-06, |
| "loss": 0.5846, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.11892675852066716, |
| "grad_norm": 0.45261755838242107, |
| "learning_rate": 9.873493469355271e-06, |
| "loss": 0.5912, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.11941020062847474, |
| "grad_norm": 0.49934013758424506, |
| "learning_rate": 9.871700428147342e-06, |
| "loss": 0.5836, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.11989364273628234, |
| "grad_norm": 0.48318779237357384, |
| "learning_rate": 9.86989493448048e-06, |
| "loss": 0.5898, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.12037708484408992, |
| "grad_norm": 0.4877998807669757, |
| "learning_rate": 9.868076992969672e-06, |
| "loss": 0.5933, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.12086052695189752, |
| "grad_norm": 0.4697579805390032, |
| "learning_rate": 9.866246608261725e-06, |
| "loss": 0.5855, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1213439690597051, |
| "grad_norm": 0.512552573820198, |
| "learning_rate": 9.864403785035246e-06, |
| "loss": 0.5989, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.1218274111675127, |
| "grad_norm": 0.4865753568683563, |
| "learning_rate": 9.862548528000644e-06, |
| "loss": 0.5722, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.12231085327532028, |
| "grad_norm": 0.5276925045930954, |
| "learning_rate": 9.860680841900101e-06, |
| "loss": 0.5879, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.12279429538312787, |
| "grad_norm": 0.5213718677505005, |
| "learning_rate": 9.858800731507575e-06, |
| "loss": 0.5999, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.12327773749093546, |
| "grad_norm": 0.5180845494091726, |
| "learning_rate": 9.85690820162878e-06, |
| "loss": 0.586, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.12376117959874305, |
| "grad_norm": 0.5698025401421347, |
| "learning_rate": 9.855003257101177e-06, |
| "loss": 0.6011, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.12424462170655064, |
| "grad_norm": 0.562343589994959, |
| "learning_rate": 9.853085902793952e-06, |
| "loss": 0.5894, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.12472806381435823, |
| "grad_norm": 0.5160827286882833, |
| "learning_rate": 9.851156143608025e-06, |
| "loss": 0.5897, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.12521150592216582, |
| "grad_norm": 0.5407107287832078, |
| "learning_rate": 9.84921398447601e-06, |
| "loss": 0.59, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.1256949480299734, |
| "grad_norm": 0.4828245059112851, |
| "learning_rate": 9.847259430362222e-06, |
| "loss": 0.5642, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.126178390137781, |
| "grad_norm": 0.5766667340207283, |
| "learning_rate": 9.845292486262664e-06, |
| "loss": 0.6016, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.12666183224558858, |
| "grad_norm": 0.5818866932241936, |
| "learning_rate": 9.843313157204999e-06, |
| "loss": 0.5807, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.12714527435339618, |
| "grad_norm": 0.5140923007570054, |
| "learning_rate": 9.841321448248552e-06, |
| "loss": 0.5858, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.12762871646120377, |
| "grad_norm": 0.513399510660716, |
| "learning_rate": 9.839317364484295e-06, |
| "loss": 0.5847, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.12811215856901137, |
| "grad_norm": 0.5227642580781724, |
| "learning_rate": 9.837300911034824e-06, |
| "loss": 0.5888, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.12859560067681894, |
| "grad_norm": 0.5579358896097371, |
| "learning_rate": 9.83527209305436e-06, |
| "loss": 0.5928, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.12907904278462654, |
| "grad_norm": 0.5145348442577231, |
| "learning_rate": 9.83323091572872e-06, |
| "loss": 0.5872, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.12956248489243413, |
| "grad_norm": 0.5112821410236051, |
| "learning_rate": 9.831177384275323e-06, |
| "loss": 0.5805, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.13004592700024173, |
| "grad_norm": 0.5497912960403669, |
| "learning_rate": 9.829111503943159e-06, |
| "loss": 0.5837, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.1305293691080493, |
| "grad_norm": 0.5226743950335115, |
| "learning_rate": 9.827033280012783e-06, |
| "loss": 0.5539, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.1310128112158569, |
| "grad_norm": 0.5713921241049837, |
| "learning_rate": 9.824942717796304e-06, |
| "loss": 0.5881, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.1314962533236645, |
| "grad_norm": 0.5241764388189555, |
| "learning_rate": 9.822839822637369e-06, |
| "loss": 0.6032, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.1319796954314721, |
| "grad_norm": 0.5162440352522167, |
| "learning_rate": 9.820724599911147e-06, |
| "loss": 0.5842, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.13246313753927969, |
| "grad_norm": 0.5431692492650363, |
| "learning_rate": 9.818597055024315e-06, |
| "loss": 0.585, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.13294657964708725, |
| "grad_norm": 0.5124783198553914, |
| "learning_rate": 9.816457193415055e-06, |
| "loss": 0.5779, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.13343002175489485, |
| "grad_norm": 0.5257695390265421, |
| "learning_rate": 9.81430502055302e-06, |
| "loss": 0.5798, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.13391346386270245, |
| "grad_norm": 0.49781008962990064, |
| "learning_rate": 9.812140541939338e-06, |
| "loss": 0.5836, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.13439690597051004, |
| "grad_norm": 0.5327804269781539, |
| "learning_rate": 9.809963763106593e-06, |
| "loss": 0.5733, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.1348803480783176, |
| "grad_norm": 0.5167258655366103, |
| "learning_rate": 9.807774689618806e-06, |
| "loss": 0.58, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.1353637901861252, |
| "grad_norm": 0.5321175943512093, |
| "learning_rate": 9.805573327071428e-06, |
| "loss": 0.5911, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1358472322939328, |
| "grad_norm": 0.49961117510050285, |
| "learning_rate": 9.803359681091313e-06, |
| "loss": 0.5737, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.1363306744017404, |
| "grad_norm": 0.5314962622355859, |
| "learning_rate": 9.801133757336726e-06, |
| "loss": 0.593, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.13681411650954797, |
| "grad_norm": 0.48173417582091976, |
| "learning_rate": 9.798895561497299e-06, |
| "loss": 0.5818, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.13729755861735557, |
| "grad_norm": 0.5127693228983886, |
| "learning_rate": 9.796645099294049e-06, |
| "loss": 0.6024, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.13778100072516317, |
| "grad_norm": 0.5128313174228813, |
| "learning_rate": 9.794382376479334e-06, |
| "loss": 0.5837, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.13826444283297076, |
| "grad_norm": 0.502862882638082, |
| "learning_rate": 9.792107398836859e-06, |
| "loss": 0.5781, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.13874788494077833, |
| "grad_norm": 0.5169656633134686, |
| "learning_rate": 9.789820172181648e-06, |
| "loss": 0.5821, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.13923132704858593, |
| "grad_norm": 6.7246508188992, |
| "learning_rate": 9.787520702360035e-06, |
| "loss": 1.0972, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.13971476915639353, |
| "grad_norm": 0.6005251051430991, |
| "learning_rate": 9.785208995249655e-06, |
| "loss": 0.5803, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.14019821126420112, |
| "grad_norm": 0.5531574758650235, |
| "learning_rate": 9.782885056759413e-06, |
| "loss": 0.563, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1406816533720087, |
| "grad_norm": 0.5273779406180227, |
| "learning_rate": 9.780548892829486e-06, |
| "loss": 0.5872, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.1411650954798163, |
| "grad_norm": 0.5063770192301159, |
| "learning_rate": 9.778200509431297e-06, |
| "loss": 0.5782, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.14164853758762389, |
| "grad_norm": 0.5401099132225082, |
| "learning_rate": 9.775839912567502e-06, |
| "loss": 0.5804, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.14213197969543148, |
| "grad_norm": 0.607784811294971, |
| "learning_rate": 9.773467108271978e-06, |
| "loss": 0.5831, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.14261542180323905, |
| "grad_norm": 0.5051370116219928, |
| "learning_rate": 9.771082102609803e-06, |
| "loss": 0.5597, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.14309886391104665, |
| "grad_norm": 0.5723810352863865, |
| "learning_rate": 9.768684901677245e-06, |
| "loss": 0.5779, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.14358230601885424, |
| "grad_norm": 0.529491415132923, |
| "learning_rate": 9.766275511601742e-06, |
| "loss": 0.5849, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.14406574812666184, |
| "grad_norm": 0.6275998382003428, |
| "learning_rate": 9.763853938541887e-06, |
| "loss": 0.5915, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1445491902344694, |
| "grad_norm": 0.5906428033404255, |
| "learning_rate": 9.76142018868742e-06, |
| "loss": 0.5816, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.145032632342277, |
| "grad_norm": 0.597638837356143, |
| "learning_rate": 9.7589742682592e-06, |
| "loss": 0.5578, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1455160744500846, |
| "grad_norm": 0.5365546900890564, |
| "learning_rate": 9.756516183509198e-06, |
| "loss": 0.5833, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.1459995165578922, |
| "grad_norm": 0.554155920273677, |
| "learning_rate": 9.754045940720471e-06, |
| "loss": 0.581, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.14648295866569977, |
| "grad_norm": 0.5290449152773149, |
| "learning_rate": 9.751563546207167e-06, |
| "loss": 0.5879, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.14696640077350737, |
| "grad_norm": 0.5303051981230842, |
| "learning_rate": 9.749069006314481e-06, |
| "loss": 0.557, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.14744984288131496, |
| "grad_norm": 0.4750712434505446, |
| "learning_rate": 9.74656232741866e-06, |
| "loss": 0.5236, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.14793328498912256, |
| "grad_norm": 0.515780571537496, |
| "learning_rate": 9.744043515926975e-06, |
| "loss": 0.5827, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.14841672709693013, |
| "grad_norm": 0.5886066507830542, |
| "learning_rate": 9.741512578277715e-06, |
| "loss": 0.5741, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.14890016920473773, |
| "grad_norm": 0.5712616310834069, |
| "learning_rate": 9.738969520940158e-06, |
| "loss": 0.587, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.14938361131254532, |
| "grad_norm": 0.5883909446108012, |
| "learning_rate": 9.736414350414564e-06, |
| "loss": 0.5836, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.14986705342035292, |
| "grad_norm": 0.49300111186175044, |
| "learning_rate": 9.733847073232156e-06, |
| "loss": 0.583, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.15035049552816052, |
| "grad_norm": 0.47057695692490953, |
| "learning_rate": 9.7312676959551e-06, |
| "loss": 0.5433, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.15083393763596809, |
| "grad_norm": 0.5647156070035382, |
| "learning_rate": 9.72867622517649e-06, |
| "loss": 0.5859, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.15131737974377568, |
| "grad_norm": 0.5698749374107666, |
| "learning_rate": 9.726072667520338e-06, |
| "loss": 0.5759, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.15180082185158328, |
| "grad_norm": 0.4935935341959304, |
| "learning_rate": 9.723457029641547e-06, |
| "loss": 0.5883, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.15228426395939088, |
| "grad_norm": 0.5040267732247843, |
| "learning_rate": 9.720829318225897e-06, |
| "loss": 0.5723, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.15276770606719844, |
| "grad_norm": 0.5390674583456238, |
| "learning_rate": 9.718189539990029e-06, |
| "loss": 0.5748, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.15325114817500604, |
| "grad_norm": 0.5449958057788811, |
| "learning_rate": 9.715537701681431e-06, |
| "loss": 0.5831, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.15373459028281364, |
| "grad_norm": 0.48895966772949706, |
| "learning_rate": 9.712873810078415e-06, |
| "loss": 0.5505, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.15421803239062123, |
| "grad_norm": 0.5694877152526486, |
| "learning_rate": 9.710197871990101e-06, |
| "loss": 0.5789, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.1547014744984288, |
| "grad_norm": 0.5390854150150773, |
| "learning_rate": 9.707509894256406e-06, |
| "loss": 0.5699, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1551849166062364, |
| "grad_norm": 0.5339825765060972, |
| "learning_rate": 9.704809883748012e-06, |
| "loss": 0.5841, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.155668358714044, |
| "grad_norm": 0.5691147363910026, |
| "learning_rate": 9.70209784736637e-06, |
| "loss": 0.5791, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.1561518008218516, |
| "grad_norm": 0.5098897525025804, |
| "learning_rate": 9.699373792043658e-06, |
| "loss": 0.5789, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.15663524292965916, |
| "grad_norm": 0.5233093422091403, |
| "learning_rate": 9.696637724742785e-06, |
| "loss": 0.5791, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.15711868503746676, |
| "grad_norm": 0.4951608627676522, |
| "learning_rate": 9.693889652457359e-06, |
| "loss": 0.5664, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.15760212714527436, |
| "grad_norm": 0.5085606430384619, |
| "learning_rate": 9.691129582211671e-06, |
| "loss": 0.5777, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.15808556925308195, |
| "grad_norm": 0.5137102450781047, |
| "learning_rate": 9.688357521060685e-06, |
| "loss": 0.5843, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.15856901136088952, |
| "grad_norm": 0.4769071854330559, |
| "learning_rate": 9.685573476090015e-06, |
| "loss": 0.578, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.15905245346869712, |
| "grad_norm": 0.542975418114207, |
| "learning_rate": 9.6827774544159e-06, |
| "loss": 0.5859, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.15953589557650472, |
| "grad_norm": 0.4926718305346952, |
| "learning_rate": 9.6799694631852e-06, |
| "loss": 0.5871, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1600193376843123, |
| "grad_norm": 0.5010989320404932, |
| "learning_rate": 9.677149509575365e-06, |
| "loss": 0.5841, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.16050277979211988, |
| "grad_norm": 0.5446382005351177, |
| "learning_rate": 9.674317600794426e-06, |
| "loss": 0.5762, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.16098622189992748, |
| "grad_norm": 0.5406240370145704, |
| "learning_rate": 9.67147374408097e-06, |
| "loss": 0.5685, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.16146966400773508, |
| "grad_norm": 0.5171074604025283, |
| "learning_rate": 9.66861794670412e-06, |
| "loss": 0.5856, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.16195310611554267, |
| "grad_norm": 0.5545080974369176, |
| "learning_rate": 9.665750215963528e-06, |
| "loss": 0.5789, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.16243654822335024, |
| "grad_norm": 0.49939805294647144, |
| "learning_rate": 9.662870559189344e-06, |
| "loss": 0.5702, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.16291999033115784, |
| "grad_norm": 0.49295646596373777, |
| "learning_rate": 9.6599789837422e-06, |
| "loss": 0.5742, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.16340343243896543, |
| "grad_norm": 0.5522231456414357, |
| "learning_rate": 9.657075497013202e-06, |
| "loss": 0.5752, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.16388687454677303, |
| "grad_norm": 0.5606395929711875, |
| "learning_rate": 9.654160106423891e-06, |
| "loss": 0.5854, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.1643703166545806, |
| "grad_norm": 0.5086990809592122, |
| "learning_rate": 9.651232819426242e-06, |
| "loss": 0.5764, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1648537587623882, |
| "grad_norm": 0.4984930367771814, |
| "learning_rate": 9.648293643502636e-06, |
| "loss": 0.5619, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.1653372008701958, |
| "grad_norm": 0.5217470426797576, |
| "learning_rate": 9.645342586165845e-06, |
| "loss": 0.5833, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.1658206429780034, |
| "grad_norm": 0.546389261380125, |
| "learning_rate": 9.642379654959006e-06, |
| "loss": 0.5381, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.166304085085811, |
| "grad_norm": 0.5439151860872452, |
| "learning_rate": 9.639404857455614e-06, |
| "loss": 0.5674, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.16678752719361856, |
| "grad_norm": 0.5469688158149608, |
| "learning_rate": 9.63641820125949e-06, |
| "loss": 0.5705, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.16727096930142615, |
| "grad_norm": 0.4994352161741759, |
| "learning_rate": 9.633419694004767e-06, |
| "loss": 0.555, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.16775441140923375, |
| "grad_norm": 0.5270157823994652, |
| "learning_rate": 9.63040934335587e-06, |
| "loss": 0.5741, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.16823785351704135, |
| "grad_norm": 0.5302701119307424, |
| "learning_rate": 9.627387157007502e-06, |
| "loss": 0.5775, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.16872129562484892, |
| "grad_norm": 0.5005904286760833, |
| "learning_rate": 9.624353142684611e-06, |
| "loss": 0.5724, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1692047377326565, |
| "grad_norm": 0.5035595085634601, |
| "learning_rate": 9.621307308142385e-06, |
| "loss": 0.5794, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.1696881798404641, |
| "grad_norm": 0.521381746170865, |
| "learning_rate": 9.618249661166218e-06, |
| "loss": 0.5764, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.1701716219482717, |
| "grad_norm": 0.48214165657815927, |
| "learning_rate": 9.615180209571709e-06, |
| "loss": 0.5804, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.17065506405607928, |
| "grad_norm": 0.47552991671065514, |
| "learning_rate": 9.612098961204617e-06, |
| "loss": 0.5581, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.17113850616388687, |
| "grad_norm": 0.46097880469562935, |
| "learning_rate": 9.609005923940865e-06, |
| "loss": 0.5618, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.17162194827169447, |
| "grad_norm": 0.5629931104502605, |
| "learning_rate": 9.605901105686503e-06, |
| "loss": 0.5694, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.17210539037950207, |
| "grad_norm": 0.5179757776717347, |
| "learning_rate": 9.602784514377701e-06, |
| "loss": 0.5897, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.17258883248730963, |
| "grad_norm": 0.5355839686571028, |
| "learning_rate": 9.599656157980715e-06, |
| "loss": 0.5724, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.17307227459511723, |
| "grad_norm": 0.6350286695754506, |
| "learning_rate": 9.596516044491873e-06, |
| "loss": 0.577, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.17355571670292483, |
| "grad_norm": 0.5295601313068036, |
| "learning_rate": 9.593364181937563e-06, |
| "loss": 0.5834, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.17403915881073242, |
| "grad_norm": 0.5016272467409, |
| "learning_rate": 9.590200578374198e-06, |
| "loss": 0.5848, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.17452260091854, |
| "grad_norm": 0.4734403734457174, |
| "learning_rate": 9.587025241888202e-06, |
| "loss": 0.5629, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.1750060430263476, |
| "grad_norm": 0.5345541955737336, |
| "learning_rate": 9.583838180595993e-06, |
| "loss": 0.5619, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.1754894851341552, |
| "grad_norm": 0.5159159294276754, |
| "learning_rate": 9.580639402643957e-06, |
| "loss": 0.5788, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.17597292724196278, |
| "grad_norm": 0.5475730953848408, |
| "learning_rate": 9.577428916208426e-06, |
| "loss": 0.5758, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.17645636934977035, |
| "grad_norm": 0.5065491502971655, |
| "learning_rate": 9.574206729495662e-06, |
| "loss": 0.5739, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.17693981145757795, |
| "grad_norm": 0.5385122338140608, |
| "learning_rate": 9.570972850741839e-06, |
| "loss": 0.5646, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.17742325356538555, |
| "grad_norm": 0.5282114345918013, |
| "learning_rate": 9.567727288213005e-06, |
| "loss": 0.5809, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.17790669567319314, |
| "grad_norm": 0.5183724179001736, |
| "learning_rate": 9.564470050205084e-06, |
| "loss": 0.5745, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.1783901377810007, |
| "grad_norm": 0.501228022506401, |
| "learning_rate": 9.561201145043835e-06, |
| "loss": 0.5759, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.1788735798888083, |
| "grad_norm": 0.5161478035704796, |
| "learning_rate": 9.557920581084848e-06, |
| "loss": 0.5716, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1793570219966159, |
| "grad_norm": 0.5508440640900468, |
| "learning_rate": 9.554628366713506e-06, |
| "loss": 0.5681, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.1798404641044235, |
| "grad_norm": 0.4958022642187558, |
| "learning_rate": 9.551324510344972e-06, |
| "loss": 0.5674, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.18032390621223107, |
| "grad_norm": 0.5211800045547449, |
| "learning_rate": 9.548009020424172e-06, |
| "loss": 0.5759, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.18080734832003867, |
| "grad_norm": 0.5234346072417955, |
| "learning_rate": 9.544681905425767e-06, |
| "loss": 0.5761, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.18129079042784627, |
| "grad_norm": 0.5277623761050696, |
| "learning_rate": 9.541343173854128e-06, |
| "loss": 0.5846, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.18177423253565386, |
| "grad_norm": 0.5159488960453931, |
| "learning_rate": 9.537992834243323e-06, |
| "loss": 0.5655, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.18225767464346146, |
| "grad_norm": 0.5036893425002033, |
| "learning_rate": 9.53463089515709e-06, |
| "loss": 0.578, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.18274111675126903, |
| "grad_norm": 0.5500694186101432, |
| "learning_rate": 9.531257365188818e-06, |
| "loss": 0.5683, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.18322455885907662, |
| "grad_norm": 0.4446123327167339, |
| "learning_rate": 9.527872252961518e-06, |
| "loss": 0.5112, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.18370800096688422, |
| "grad_norm": 0.49646226307611685, |
| "learning_rate": 9.524475567127813e-06, |
| "loss": 0.5799, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.18419144307469182, |
| "grad_norm": 0.5455620647014985, |
| "learning_rate": 9.521067316369903e-06, |
| "loss": 0.5601, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1846748851824994, |
| "grad_norm": 0.5073331374598753, |
| "learning_rate": 9.517647509399555e-06, |
| "loss": 0.5399, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.18515832729030698, |
| "grad_norm": 0.5171824333562809, |
| "learning_rate": 9.514216154958067e-06, |
| "loss": 0.5754, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.18564176939811458, |
| "grad_norm": 0.5085818096253197, |
| "learning_rate": 9.510773261816261e-06, |
| "loss": 0.5623, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.18612521150592218, |
| "grad_norm": 0.50056273177622, |
| "learning_rate": 9.507318838774448e-06, |
| "loss": 0.5774, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.18660865361372975, |
| "grad_norm": 0.5493241761943409, |
| "learning_rate": 9.50385289466241e-06, |
| "loss": 0.5698, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.18709209572153734, |
| "grad_norm": 0.48083872272472233, |
| "learning_rate": 9.500375438339384e-06, |
| "loss": 0.5634, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.18757553782934494, |
| "grad_norm": 0.48598643847981954, |
| "learning_rate": 9.496886478694025e-06, |
| "loss": 0.5642, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.18805897993715254, |
| "grad_norm": 0.4945695421669264, |
| "learning_rate": 9.493386024644396e-06, |
| "loss": 0.5763, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.1885424220449601, |
| "grad_norm": 0.4630609260733735, |
| "learning_rate": 9.48987408513794e-06, |
| "loss": 0.5667, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1890258641527677, |
| "grad_norm": 0.5178132025025237, |
| "learning_rate": 9.486350669151455e-06, |
| "loss": 0.5633, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.1895093062605753, |
| "grad_norm": 0.4855261545618926, |
| "learning_rate": 9.482815785691082e-06, |
| "loss": 0.5705, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.1899927483683829, |
| "grad_norm": 0.48580056178653924, |
| "learning_rate": 9.47926944379226e-06, |
| "loss": 0.5703, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.19047619047619047, |
| "grad_norm": 0.5308237684959329, |
| "learning_rate": 9.475711652519732e-06, |
| "loss": 0.5583, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.19095963258399806, |
| "grad_norm": 0.5127712618313278, |
| "learning_rate": 9.472142420967496e-06, |
| "loss": 0.5674, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.19144307469180566, |
| "grad_norm": 0.4833488281294125, |
| "learning_rate": 9.468561758258795e-06, |
| "loss": 0.578, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.19192651679961326, |
| "grad_norm": 0.5644191416840888, |
| "learning_rate": 9.464969673546092e-06, |
| "loss": 0.582, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.19240995890742082, |
| "grad_norm": 0.5561428050479044, |
| "learning_rate": 9.461366176011047e-06, |
| "loss": 0.5762, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.19289340101522842, |
| "grad_norm": 0.49800634280761286, |
| "learning_rate": 9.457751274864486e-06, |
| "loss": 0.5786, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.19337684312303602, |
| "grad_norm": 0.464098426014889, |
| "learning_rate": 9.454124979346392e-06, |
| "loss": 0.531, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.19386028523084362, |
| "grad_norm": 0.5317711530861378, |
| "learning_rate": 9.450487298725866e-06, |
| "loss": 0.5735, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.19434372733865118, |
| "grad_norm": 0.5700860255634325, |
| "learning_rate": 9.446838242301113e-06, |
| "loss": 0.5736, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.19482716944645878, |
| "grad_norm": 0.5415575586047788, |
| "learning_rate": 9.443177819399416e-06, |
| "loss": 0.5682, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.19531061155426638, |
| "grad_norm": 0.45162964809703743, |
| "learning_rate": 9.439506039377111e-06, |
| "loss": 0.5457, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.19579405366207397, |
| "grad_norm": 0.48073200361222107, |
| "learning_rate": 9.435822911619564e-06, |
| "loss": 0.5452, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.19627749576988154, |
| "grad_norm": 0.5218011226870963, |
| "learning_rate": 9.432128445541147e-06, |
| "loss": 0.5569, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.19676093787768914, |
| "grad_norm": 0.5241766492312198, |
| "learning_rate": 9.42842265058521e-06, |
| "loss": 0.5791, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.19724437998549674, |
| "grad_norm": 0.4747479232641684, |
| "learning_rate": 9.424705536224065e-06, |
| "loss": 0.572, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.19772782209330433, |
| "grad_norm": 0.4892195750767198, |
| "learning_rate": 9.420977111958957e-06, |
| "loss": 0.577, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.1982112642011119, |
| "grad_norm": 0.49625147154018395, |
| "learning_rate": 9.41723738732004e-06, |
| "loss": 0.5673, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1986947063089195, |
| "grad_norm": 0.553969116933997, |
| "learning_rate": 9.41348637186635e-06, |
| "loss": 0.5805, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.1991781484167271, |
| "grad_norm": 0.5271833056864474, |
| "learning_rate": 9.409724075185782e-06, |
| "loss": 0.5811, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.1996615905245347, |
| "grad_norm": 0.541152410560869, |
| "learning_rate": 9.405950506895074e-06, |
| "loss": 0.5539, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.2001450326323423, |
| "grad_norm": 0.4827367980584999, |
| "learning_rate": 9.40216567663977e-06, |
| "loss": 0.5754, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.20062847474014986, |
| "grad_norm": 0.49177545628835745, |
| "learning_rate": 9.398369594094198e-06, |
| "loss": 0.508, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.20111191684795746, |
| "grad_norm": 0.50467312755319, |
| "learning_rate": 9.394562268961454e-06, |
| "loss": 0.5681, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.20159535895576505, |
| "grad_norm": 0.4916777572033636, |
| "learning_rate": 9.390743710973366e-06, |
| "loss": 0.575, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.20207880106357265, |
| "grad_norm": 0.5183550927798377, |
| "learning_rate": 9.386913929890478e-06, |
| "loss": 0.57, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.20256224317138022, |
| "grad_norm": 0.47362092706218123, |
| "learning_rate": 9.383072935502018e-06, |
| "loss": 0.5644, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.20304568527918782, |
| "grad_norm": 0.49530019201729136, |
| "learning_rate": 9.379220737625877e-06, |
| "loss": 0.564, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2035291273869954, |
| "grad_norm": 0.5108751966700111, |
| "learning_rate": 9.375357346108583e-06, |
| "loss": 0.5602, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.204012569494803, |
| "grad_norm": 0.5230318233484302, |
| "learning_rate": 9.371482770825277e-06, |
| "loss": 0.5695, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.20449601160261058, |
| "grad_norm": 0.4802393361720882, |
| "learning_rate": 9.367597021679686e-06, |
| "loss": 0.5661, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.20497945371041817, |
| "grad_norm": 0.541773712373739, |
| "learning_rate": 9.363700108604096e-06, |
| "loss": 0.5582, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.20546289581822577, |
| "grad_norm": 0.5110126727655455, |
| "learning_rate": 9.359792041559334e-06, |
| "loss": 0.5645, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.20594633792603337, |
| "grad_norm": 0.5486480496411716, |
| "learning_rate": 9.35587283053473e-06, |
| "loss": 0.5677, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.20642978003384094, |
| "grad_norm": 0.5379779057549923, |
| "learning_rate": 9.351942485548109e-06, |
| "loss": 0.5435, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.20691322214164853, |
| "grad_norm": 0.5341397558862222, |
| "learning_rate": 9.348001016645744e-06, |
| "loss": 0.5599, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.20739666424945613, |
| "grad_norm": 0.44238086682442823, |
| "learning_rate": 9.344048433902351e-06, |
| "loss": 0.541, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.20788010635726373, |
| "grad_norm": 0.5213851954927032, |
| "learning_rate": 9.340084747421048e-06, |
| "loss": 0.5366, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2083635484650713, |
| "grad_norm": 0.5349032988779688, |
| "learning_rate": 9.336109967333337e-06, |
| "loss": 0.5571, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.2088469905728789, |
| "grad_norm": 0.4554230771685569, |
| "learning_rate": 9.332124103799075e-06, |
| "loss": 0.5516, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2093304326806865, |
| "grad_norm": 0.5021585721937876, |
| "learning_rate": 9.328127167006457e-06, |
| "loss": 0.5679, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.2098138747884941, |
| "grad_norm": 0.5025134126056662, |
| "learning_rate": 9.324119167171967e-06, |
| "loss": 0.5659, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.21029731689630166, |
| "grad_norm": 0.48977518403096176, |
| "learning_rate": 9.320100114540382e-06, |
| "loss": 0.5753, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.21078075900410925, |
| "grad_norm": 0.4789181842167065, |
| "learning_rate": 9.316070019384722e-06, |
| "loss": 0.558, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.21126420111191685, |
| "grad_norm": 0.48417362744631853, |
| "learning_rate": 9.312028892006233e-06, |
| "loss": 0.5637, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.21174764321972445, |
| "grad_norm": 0.5040441298097904, |
| "learning_rate": 9.307976742734366e-06, |
| "loss": 0.5603, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.21223108532753202, |
| "grad_norm": 0.5003182083782678, |
| "learning_rate": 9.30391358192674e-06, |
| "loss": 0.5583, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.2127145274353396, |
| "grad_norm": 0.5188458903874932, |
| "learning_rate": 9.299839419969119e-06, |
| "loss": 0.5614, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2131979695431472, |
| "grad_norm": 0.4990120996823676, |
| "learning_rate": 9.295754267275393e-06, |
| "loss": 0.5732, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.2136814116509548, |
| "grad_norm": 0.43407580533296863, |
| "learning_rate": 9.291658134287537e-06, |
| "loss": 0.5451, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.21416485375876237, |
| "grad_norm": 0.487299832131986, |
| "learning_rate": 9.287551031475604e-06, |
| "loss": 0.5486, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.21464829586656997, |
| "grad_norm": 0.4748601209022523, |
| "learning_rate": 9.283432969337672e-06, |
| "loss": 0.5568, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.21513173797437757, |
| "grad_norm": 0.5116954397180901, |
| "learning_rate": 9.279303958399846e-06, |
| "loss": 0.5561, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.21561518008218516, |
| "grad_norm": 0.5103832796562369, |
| "learning_rate": 9.275164009216205e-06, |
| "loss": 0.5653, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.21609862218999276, |
| "grad_norm": 0.453674255766726, |
| "learning_rate": 9.271013132368799e-06, |
| "loss": 0.5359, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.21658206429780033, |
| "grad_norm": 0.4865827031825044, |
| "learning_rate": 9.266851338467598e-06, |
| "loss": 0.5627, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.21706550640560793, |
| "grad_norm": 0.4474998958247519, |
| "learning_rate": 9.262678638150486e-06, |
| "loss": 0.5372, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.21754894851341552, |
| "grad_norm": 0.5312817145455567, |
| "learning_rate": 9.258495042083222e-06, |
| "loss": 0.583, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.21803239062122312, |
| "grad_norm": 0.5326646088756841, |
| "learning_rate": 9.254300560959413e-06, |
| "loss": 0.5641, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.2185158327290307, |
| "grad_norm": 0.47741110714076435, |
| "learning_rate": 9.25009520550049e-06, |
| "loss": 0.5692, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.2189992748368383, |
| "grad_norm": 0.4992778758439529, |
| "learning_rate": 9.245878986455684e-06, |
| "loss": 0.5732, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.21948271694464588, |
| "grad_norm": 0.5067531688765293, |
| "learning_rate": 9.241651914601986e-06, |
| "loss": 0.5684, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.21996615905245348, |
| "grad_norm": 0.5259329600281596, |
| "learning_rate": 9.237414000744134e-06, |
| "loss": 0.5728, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.22044960116026105, |
| "grad_norm": 0.4912112930780334, |
| "learning_rate": 9.23316525571458e-06, |
| "loss": 0.5543, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.22093304326806865, |
| "grad_norm": 0.4325116439857764, |
| "learning_rate": 9.228905690373456e-06, |
| "loss": 0.5109, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.22141648537587624, |
| "grad_norm": 0.5251969417490432, |
| "learning_rate": 9.224635315608554e-06, |
| "loss": 0.5613, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.22189992748368384, |
| "grad_norm": 0.5371164613513753, |
| "learning_rate": 9.2203541423353e-06, |
| "loss": 0.5758, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.2223833695914914, |
| "grad_norm": 0.49879877094748626, |
| "learning_rate": 9.216062181496712e-06, |
| "loss": 0.5656, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.222866811699299, |
| "grad_norm": 0.4666739038962981, |
| "learning_rate": 9.211759444063392e-06, |
| "loss": 0.5643, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.2233502538071066, |
| "grad_norm": 0.5019702713381807, |
| "learning_rate": 9.207445941033483e-06, |
| "loss": 0.5645, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.2238336959149142, |
| "grad_norm": 0.560484985437826, |
| "learning_rate": 9.203121683432646e-06, |
| "loss": 0.5622, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.22431713802272177, |
| "grad_norm": 0.501701537299382, |
| "learning_rate": 9.19878668231403e-06, |
| "loss": 0.5686, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.22480058013052936, |
| "grad_norm": 0.48640275847390047, |
| "learning_rate": 9.19444094875825e-06, |
| "loss": 0.5617, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.22528402223833696, |
| "grad_norm": 0.5066662929437282, |
| "learning_rate": 9.190084493873353e-06, |
| "loss": 0.5733, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.22576746434614456, |
| "grad_norm": 0.5297511031777309, |
| "learning_rate": 9.185717328794784e-06, |
| "loss": 0.5632, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.22625090645395213, |
| "grad_norm": 0.5778692323663056, |
| "learning_rate": 9.18133946468537e-06, |
| "loss": 0.5684, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.22673434856175972, |
| "grad_norm": 0.5148715492097395, |
| "learning_rate": 9.176950912735287e-06, |
| "loss": 0.5559, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.22721779066956732, |
| "grad_norm": 0.5157447753884506, |
| "learning_rate": 9.172551684162025e-06, |
| "loss": 0.5731, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.22770123277737492, |
| "grad_norm": 0.4783319000473412, |
| "learning_rate": 9.16814179021037e-06, |
| "loss": 0.5671, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.2281846748851825, |
| "grad_norm": 0.5017422895280137, |
| "learning_rate": 9.163721242152362e-06, |
| "loss": 0.5661, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.22866811699299008, |
| "grad_norm": 0.49272124386072536, |
| "learning_rate": 9.159290051287282e-06, |
| "loss": 0.5627, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.22915155910079768, |
| "grad_norm": 0.47471736533769476, |
| "learning_rate": 9.154848228941607e-06, |
| "loss": 0.5615, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.22963500120860528, |
| "grad_norm": 0.5071884927272643, |
| "learning_rate": 9.150395786468998e-06, |
| "loss": 0.5645, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.23011844331641285, |
| "grad_norm": 0.48690399925776484, |
| "learning_rate": 9.14593273525025e-06, |
| "loss": 0.5647, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.23060188542422044, |
| "grad_norm": 0.5041235784595942, |
| "learning_rate": 9.14145908669329e-06, |
| "loss": 0.5729, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.23108532753202804, |
| "grad_norm": 0.5265161224054821, |
| "learning_rate": 9.136974852233118e-06, |
| "loss": 0.5587, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.23156876963983564, |
| "grad_norm": 0.4778337324840926, |
| "learning_rate": 9.132480043331801e-06, |
| "loss": 0.5646, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.23205221174764323, |
| "grad_norm": 0.5036800160533508, |
| "learning_rate": 9.127974671478432e-06, |
| "loss": 0.5655, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2325356538554508, |
| "grad_norm": 0.4915164507750186, |
| "learning_rate": 9.123458748189105e-06, |
| "loss": 0.5608, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.2330190959632584, |
| "grad_norm": 0.4447947403953834, |
| "learning_rate": 9.118932285006886e-06, |
| "loss": 0.5254, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.233502538071066, |
| "grad_norm": 0.4936810479165672, |
| "learning_rate": 9.114395293501775e-06, |
| "loss": 0.5751, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.2339859801788736, |
| "grad_norm": 0.4933009245810686, |
| "learning_rate": 9.10984778527069e-06, |
| "loss": 0.5603, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.23446942228668116, |
| "grad_norm": 0.4720549987110232, |
| "learning_rate": 9.10528977193743e-06, |
| "loss": 0.5703, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.23495286439448876, |
| "grad_norm": 0.5362136689894559, |
| "learning_rate": 9.100721265152644e-06, |
| "loss": 0.5635, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.23543630650229636, |
| "grad_norm": 0.47602005538977166, |
| "learning_rate": 9.096142276593802e-06, |
| "loss": 0.5721, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.23591974861010395, |
| "grad_norm": 0.48887012727323886, |
| "learning_rate": 9.09155281796517e-06, |
| "loss": 0.5502, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.23640319071791152, |
| "grad_norm": 0.5468866437635687, |
| "learning_rate": 9.086952900997774e-06, |
| "loss": 0.5628, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.23688663282571912, |
| "grad_norm": 0.468285091758703, |
| "learning_rate": 9.082342537449369e-06, |
| "loss": 0.5649, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.23737007493352671, |
| "grad_norm": 0.49449575173177474, |
| "learning_rate": 9.07772173910442e-06, |
| "loss": 0.5363, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.2378535170413343, |
| "grad_norm": 0.5665277859908898, |
| "learning_rate": 9.073090517774057e-06, |
| "loss": 0.5679, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.23833695914914188, |
| "grad_norm": 0.559218042712036, |
| "learning_rate": 9.068448885296057e-06, |
| "loss": 0.5598, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.23882040125694948, |
| "grad_norm": 0.5572180299965971, |
| "learning_rate": 9.063796853534808e-06, |
| "loss": 0.5606, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.23930384336475707, |
| "grad_norm": 0.4852501650353095, |
| "learning_rate": 9.059134434381274e-06, |
| "loss": 0.5614, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.23978728547256467, |
| "grad_norm": 0.5235782249928449, |
| "learning_rate": 9.054461639752976e-06, |
| "loss": 0.5637, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.24027072758037224, |
| "grad_norm": 0.5028533022976227, |
| "learning_rate": 9.049778481593954e-06, |
| "loss": 0.5718, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.24075416968817984, |
| "grad_norm": 0.508045864936268, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.5651, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.24123761179598743, |
| "grad_norm": 0.5575870011120908, |
| "learning_rate": 9.040381122592317e-06, |
| "loss": 0.565, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.24172105390379503, |
| "grad_norm": 0.5201685839473924, |
| "learning_rate": 9.035666945770107e-06, |
| "loss": 0.5593, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2422044960116026, |
| "grad_norm": 0.48179233555943923, |
| "learning_rate": 9.030942453457928e-06, |
| "loss": 0.5199, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.2426879381194102, |
| "grad_norm": 0.4867208952029737, |
| "learning_rate": 9.02620765773196e-06, |
| "loss": 0.5548, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.2431713802272178, |
| "grad_norm": 0.5240394440690106, |
| "learning_rate": 9.02146257069472e-06, |
| "loss": 0.5611, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.2436548223350254, |
| "grad_norm": 0.48307750050965703, |
| "learning_rate": 9.01670720447504e-06, |
| "loss": 0.5577, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.24413826444283296, |
| "grad_norm": 0.5034030614527921, |
| "learning_rate": 9.011941571228015e-06, |
| "loss": 0.5608, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.24462170655064056, |
| "grad_norm": 0.46379490536223517, |
| "learning_rate": 9.007165683134986e-06, |
| "loss": 0.5315, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.24510514865844815, |
| "grad_norm": 0.5103811282689319, |
| "learning_rate": 9.00237955240351e-06, |
| "loss": 0.5613, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.24558859076625575, |
| "grad_norm": 0.47564392120255755, |
| "learning_rate": 8.997583191267326e-06, |
| "loss": 0.5764, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.24607203287406332, |
| "grad_norm": 0.4811799201923712, |
| "learning_rate": 8.992776611986313e-06, |
| "loss": 0.5704, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.24655547498187091, |
| "grad_norm": 0.4799439081762819, |
| "learning_rate": 8.987959826846479e-06, |
| "loss": 0.5573, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.2470389170896785, |
| "grad_norm": 0.5219349618857427, |
| "learning_rate": 8.983132848159916e-06, |
| "loss": 0.5583, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2475223591974861, |
| "grad_norm": 0.5010818591918965, |
| "learning_rate": 8.978295688264768e-06, |
| "loss": 0.5699, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.2480058013052937, |
| "grad_norm": 0.5282819201955711, |
| "learning_rate": 8.973448359525207e-06, |
| "loss": 0.5641, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.24848924341310127, |
| "grad_norm": 0.5025819972323563, |
| "learning_rate": 8.968590874331395e-06, |
| "loss": 0.5649, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.24897268552090887, |
| "grad_norm": 0.4880024154213522, |
| "learning_rate": 8.963723245099456e-06, |
| "loss": 0.5533, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.24945612762871647, |
| "grad_norm": 0.4844265343558768, |
| "learning_rate": 8.958845484271443e-06, |
| "loss": 0.5571, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.24993956973652406, |
| "grad_norm": 0.4918270286134992, |
| "learning_rate": 8.953957604315306e-06, |
| "loss": 0.5612, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.25042301184433163, |
| "grad_norm": 0.4532098318099568, |
| "learning_rate": 8.949059617724859e-06, |
| "loss": 0.5532, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.25090645395213923, |
| "grad_norm": 0.4784777680132966, |
| "learning_rate": 8.944151537019752e-06, |
| "loss": 0.5314, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.2513898960599468, |
| "grad_norm": 0.49834032614411844, |
| "learning_rate": 8.939233374745432e-06, |
| "loss": 0.561, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2518733381677544, |
| "grad_norm": 0.43922831313439964, |
| "learning_rate": 8.934305143473123e-06, |
| "loss": 0.5229, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.252356780275562, |
| "grad_norm": 0.4774051999235377, |
| "learning_rate": 8.929366855799777e-06, |
| "loss": 0.5584, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.2528402223833696, |
| "grad_norm": 0.4860585540987837, |
| "learning_rate": 8.924418524348058e-06, |
| "loss": 0.5722, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.25332366449117716, |
| "grad_norm": 0.476115105724116, |
| "learning_rate": 8.919460161766299e-06, |
| "loss": 0.5527, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.25380710659898476, |
| "grad_norm": 0.49670836036646415, |
| "learning_rate": 8.914491780728471e-06, |
| "loss": 0.565, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.25429054870679235, |
| "grad_norm": 0.49705890206049747, |
| "learning_rate": 8.909513393934162e-06, |
| "loss": 0.5562, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.25477399081459995, |
| "grad_norm": 0.5118474736649574, |
| "learning_rate": 8.904525014108529e-06, |
| "loss": 0.5536, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.25525743292240755, |
| "grad_norm": 0.5301718242423505, |
| "learning_rate": 8.899526654002268e-06, |
| "loss": 0.5612, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.25574087503021514, |
| "grad_norm": 0.4796891269551852, |
| "learning_rate": 8.894518326391595e-06, |
| "loss": 0.5578, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.25622431713802274, |
| "grad_norm": 0.4825310469483714, |
| "learning_rate": 8.889500044078199e-06, |
| "loss": 0.5554, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.25670775924583034, |
| "grad_norm": 0.474416307358851, |
| "learning_rate": 8.88447181988921e-06, |
| "loss": 0.5466, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.2571912013536379, |
| "grad_norm": 0.4754427571901456, |
| "learning_rate": 8.87943366667718e-06, |
| "loss": 0.5232, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.2576746434614455, |
| "grad_norm": 0.5283380707149146, |
| "learning_rate": 8.87438559732003e-06, |
| "loss": 0.5575, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.25815808556925307, |
| "grad_norm": 0.49022668890084664, |
| "learning_rate": 8.869327624721033e-06, |
| "loss": 0.5584, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.25864152767706067, |
| "grad_norm": 0.45329648879294543, |
| "learning_rate": 8.864259761808778e-06, |
| "loss": 0.5557, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.25912496978486826, |
| "grad_norm": 0.5150923796193744, |
| "learning_rate": 8.859182021537126e-06, |
| "loss": 0.5672, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.25960841189267586, |
| "grad_norm": 0.49475203737919254, |
| "learning_rate": 8.854094416885192e-06, |
| "loss": 0.5513, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.26009185400048346, |
| "grad_norm": 0.48640723658571816, |
| "learning_rate": 8.848996960857308e-06, |
| "loss": 0.5542, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.26057529610829105, |
| "grad_norm": 0.5011403090647114, |
| "learning_rate": 8.843889666482977e-06, |
| "loss": 0.5503, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.2610587382160986, |
| "grad_norm": 0.45868293065964316, |
| "learning_rate": 8.838772546816857e-06, |
| "loss": 0.5245, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.2615421803239062, |
| "grad_norm": 0.5028670832415251, |
| "learning_rate": 8.833645614938716e-06, |
| "loss": 0.563, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.2620256224317138, |
| "grad_norm": 0.4652958998559184, |
| "learning_rate": 8.82850888395341e-06, |
| "loss": 0.5214, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2625090645395214, |
| "grad_norm": 0.48814680090193757, |
| "learning_rate": 8.823362366990833e-06, |
| "loss": 0.5539, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.262992506647329, |
| "grad_norm": 0.4686742850265713, |
| "learning_rate": 8.818206077205899e-06, |
| "loss": 0.5432, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.2634759487551366, |
| "grad_norm": 0.44177435010013455, |
| "learning_rate": 8.8130400277785e-06, |
| "loss": 0.5432, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2639593908629442, |
| "grad_norm": 0.48745119596264225, |
| "learning_rate": 8.807864231913475e-06, |
| "loss": 0.5609, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.2644428329707518, |
| "grad_norm": 0.5387031701921053, |
| "learning_rate": 8.802678702840575e-06, |
| "loss": 0.5608, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.26492627507855937, |
| "grad_norm": 0.47706550642594997, |
| "learning_rate": 8.79748345381443e-06, |
| "loss": 0.5487, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.2654097171863669, |
| "grad_norm": 0.4694250929319588, |
| "learning_rate": 8.792278498114517e-06, |
| "loss": 0.549, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.2658931592941745, |
| "grad_norm": 0.4937111232536657, |
| "learning_rate": 8.78706384904512e-06, |
| "loss": 0.5564, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.2663766014019821, |
| "grad_norm": 0.468357648344204, |
| "learning_rate": 8.7818395199353e-06, |
| "loss": 0.5546, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.2668600435097897, |
| "grad_norm": 0.4713572915315673, |
| "learning_rate": 8.77660552413887e-06, |
| "loss": 0.5512, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2673434856175973, |
| "grad_norm": 0.4740909938486332, |
| "learning_rate": 8.77136187503434e-06, |
| "loss": 0.5631, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.2678269277254049, |
| "grad_norm": 0.4472174307551216, |
| "learning_rate": 8.766108586024904e-06, |
| "loss": 0.5222, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.2683103698332125, |
| "grad_norm": 0.4548110236983466, |
| "learning_rate": 8.760845670538387e-06, |
| "loss": 0.5485, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.2687938119410201, |
| "grad_norm": 0.5173119662805489, |
| "learning_rate": 8.755573142027228e-06, |
| "loss": 0.5624, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.26927725404882763, |
| "grad_norm": 0.4812632123799694, |
| "learning_rate": 8.750291013968432e-06, |
| "loss": 0.5562, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.2697606961566352, |
| "grad_norm": 0.472663174890125, |
| "learning_rate": 8.744999299863549e-06, |
| "loss": 0.5669, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.2702441382644428, |
| "grad_norm": 0.463122081686998, |
| "learning_rate": 8.739698013238625e-06, |
| "loss": 0.557, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.2707275803722504, |
| "grad_norm": 0.5188284707009508, |
| "learning_rate": 8.734387167644171e-06, |
| "loss": 0.5202, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.271211022480058, |
| "grad_norm": 0.49659334079030504, |
| "learning_rate": 8.729066776655144e-06, |
| "loss": 0.5605, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.2716944645878656, |
| "grad_norm": 0.4433765304016552, |
| "learning_rate": 8.723736853870888e-06, |
| "loss": 0.5193, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.2721779066956732, |
| "grad_norm": 0.46285084832800716, |
| "learning_rate": 8.718397412915114e-06, |
| "loss": 0.5583, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.2726613488034808, |
| "grad_norm": 0.4406166472711255, |
| "learning_rate": 8.713048467435865e-06, |
| "loss": 0.5365, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.27314479091128835, |
| "grad_norm": 0.49591339367367465, |
| "learning_rate": 8.707690031105478e-06, |
| "loss": 0.5638, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.27362823301909595, |
| "grad_norm": 0.47717175741546425, |
| "learning_rate": 8.702322117620547e-06, |
| "loss": 0.5375, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.27411167512690354, |
| "grad_norm": 0.49399204569955096, |
| "learning_rate": 8.696944740701891e-06, |
| "loss": 0.5502, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.27459511723471114, |
| "grad_norm": 0.462084403002843, |
| "learning_rate": 8.69155791409452e-06, |
| "loss": 0.549, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.27507855934251874, |
| "grad_norm": 0.4733870628371529, |
| "learning_rate": 8.686161651567596e-06, |
| "loss": 0.5479, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.27556200145032633, |
| "grad_norm": 0.4586305030542931, |
| "learning_rate": 8.6807559669144e-06, |
| "loss": 0.517, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.27604544355813393, |
| "grad_norm": 0.4786603573138254, |
| "learning_rate": 8.6753408739523e-06, |
| "loss": 0.5449, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.2765288856659415, |
| "grad_norm": 0.4923356872720239, |
| "learning_rate": 8.669916386522708e-06, |
| "loss": 0.5516, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.27701232777374907, |
| "grad_norm": 0.47497918747290174, |
| "learning_rate": 8.664482518491053e-06, |
| "loss": 0.5527, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.27749576988155666, |
| "grad_norm": 0.5463551243922615, |
| "learning_rate": 8.659039283746738e-06, |
| "loss": 0.5528, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.27797921198936426, |
| "grad_norm": 0.5125817786426824, |
| "learning_rate": 8.653586696203111e-06, |
| "loss": 0.5428, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.27846265409717186, |
| "grad_norm": 0.46930359618316736, |
| "learning_rate": 8.648124769797424e-06, |
| "loss": 0.5566, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.27894609620497945, |
| "grad_norm": 0.48871061545968875, |
| "learning_rate": 8.6426535184908e-06, |
| "loss": 0.5517, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.27942953831278705, |
| "grad_norm": 0.49517469382405177, |
| "learning_rate": 8.637172956268203e-06, |
| "loss": 0.5537, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.27991298042059465, |
| "grad_norm": 0.46885534036424203, |
| "learning_rate": 8.631683097138386e-06, |
| "loss": 0.5455, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.28039642252840224, |
| "grad_norm": 0.4404595141316285, |
| "learning_rate": 8.626183955133876e-06, |
| "loss": 0.5216, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2808798646362098, |
| "grad_norm": 0.45805738086780906, |
| "learning_rate": 8.620675544310921e-06, |
| "loss": 0.5483, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.2813633067440174, |
| "grad_norm": 0.43283074014496, |
| "learning_rate": 8.615157878749462e-06, |
| "loss": 0.546, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.281846748851825, |
| "grad_norm": 0.45714306807295113, |
| "learning_rate": 8.609630972553098e-06, |
| "loss": 0.5521, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.2823301909596326, |
| "grad_norm": 0.47311376331001226, |
| "learning_rate": 8.604094839849047e-06, |
| "loss": 0.5586, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.2828136330674402, |
| "grad_norm": 0.4424955765808361, |
| "learning_rate": 8.598549494788111e-06, |
| "loss": 0.5384, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.28329707517524777, |
| "grad_norm": 0.467505480407099, |
| "learning_rate": 8.592994951544637e-06, |
| "loss": 0.5368, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.28378051728305537, |
| "grad_norm": 0.48553503600686004, |
| "learning_rate": 8.587431224316488e-06, |
| "loss": 0.5475, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.28426395939086296, |
| "grad_norm": 0.46529868946828945, |
| "learning_rate": 8.581858327324996e-06, |
| "loss": 0.5212, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.28474740149867056, |
| "grad_norm": 0.4898248932325677, |
| "learning_rate": 8.576276274814936e-06, |
| "loss": 0.553, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.2852308436064781, |
| "grad_norm": 0.4455201034159363, |
| "learning_rate": 8.570685081054487e-06, |
| "loss": 0.5216, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.46516198660507346, |
| "learning_rate": 8.565084760335188e-06, |
| "loss": 0.5505, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.2861977278220933, |
| "grad_norm": 0.5006509760317717, |
| "learning_rate": 8.559475326971907e-06, |
| "loss": 0.551, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.2866811699299009, |
| "grad_norm": 0.4994980078510237, |
| "learning_rate": 8.553856795302815e-06, |
| "loss": 0.5421, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.2871646120377085, |
| "grad_norm": 0.5029150812228765, |
| "learning_rate": 8.548229179689325e-06, |
| "loss": 0.5519, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.2876480541455161, |
| "grad_norm": 0.5073270485472724, |
| "learning_rate": 8.54259249451608e-06, |
| "loss": 0.5537, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.2881314962533237, |
| "grad_norm": 0.45709172284548705, |
| "learning_rate": 8.536946754190903e-06, |
| "loss": 0.564, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.2886149383611313, |
| "grad_norm": 0.47174309410425874, |
| "learning_rate": 8.531291973144755e-06, |
| "loss": 0.5452, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.2890983804689388, |
| "grad_norm": 0.46639713589843634, |
| "learning_rate": 8.52562816583172e-06, |
| "loss": 0.5509, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.2895818225767464, |
| "grad_norm": 0.4508036851803557, |
| "learning_rate": 8.519955346728939e-06, |
| "loss": 0.5428, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.290065264684554, |
| "grad_norm": 0.44468353218524803, |
| "learning_rate": 8.5142735303366e-06, |
| "loss": 0.5205, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2905487067923616, |
| "grad_norm": 0.4850164433619974, |
| "learning_rate": 8.50858273117788e-06, |
| "loss": 0.5476, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.2910321489001692, |
| "grad_norm": 0.49303870805000655, |
| "learning_rate": 8.502882963798923e-06, |
| "loss": 0.545, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.2915155910079768, |
| "grad_norm": 0.47572858582093197, |
| "learning_rate": 8.497174242768792e-06, |
| "loss": 0.5515, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.2919990331157844, |
| "grad_norm": 0.5284607359345597, |
| "learning_rate": 8.49145658267944e-06, |
| "loss": 0.5453, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.292482475223592, |
| "grad_norm": 0.47829654266425203, |
| "learning_rate": 8.485729998145665e-06, |
| "loss": 0.5452, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.29296591733139954, |
| "grad_norm": 0.4503645291799449, |
| "learning_rate": 8.479994503805079e-06, |
| "loss": 0.5536, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.29344935943920714, |
| "grad_norm": 0.4693738299713831, |
| "learning_rate": 8.474250114318066e-06, |
| "loss": 0.5216, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.29393280154701473, |
| "grad_norm": 0.4988674830387375, |
| "learning_rate": 8.468496844367752e-06, |
| "loss": 0.5582, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.29441624365482233, |
| "grad_norm": 0.47627140431869974, |
| "learning_rate": 8.462734708659959e-06, |
| "loss": 0.5511, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.2948996857626299, |
| "grad_norm": 0.43233992742433075, |
| "learning_rate": 8.456963721923166e-06, |
| "loss": 0.5279, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2953831278704375, |
| "grad_norm": 0.4908070311501362, |
| "learning_rate": 8.451183898908484e-06, |
| "loss": 0.5546, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.2958665699782451, |
| "grad_norm": 0.4519643584485447, |
| "learning_rate": 8.445395254389605e-06, |
| "loss": 0.5221, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.2963500120860527, |
| "grad_norm": 0.48396713453490725, |
| "learning_rate": 8.439597803162773e-06, |
| "loss": 0.5489, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.29683345419386026, |
| "grad_norm": 0.4611763742603572, |
| "learning_rate": 8.433791560046737e-06, |
| "loss": 0.5457, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.29731689630166785, |
| "grad_norm": 0.472544396347692, |
| "learning_rate": 8.427976539882725e-06, |
| "loss": 0.5553, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.29780033840947545, |
| "grad_norm": 0.5058827141310254, |
| "learning_rate": 8.422152757534395e-06, |
| "loss": 0.5435, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.29828378051728305, |
| "grad_norm": 0.4766589825937423, |
| "learning_rate": 8.416320227887805e-06, |
| "loss": 0.5526, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.29876722262509064, |
| "grad_norm": 0.47223702801719897, |
| "learning_rate": 8.410478965851371e-06, |
| "loss": 0.5542, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.29925066473289824, |
| "grad_norm": 0.4819039683875086, |
| "learning_rate": 8.404628986355832e-06, |
| "loss": 0.5546, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.29973410684070584, |
| "grad_norm": 0.49462386708237827, |
| "learning_rate": 8.398770304354203e-06, |
| "loss": 0.5566, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.30021754894851344, |
| "grad_norm": 0.4977376021667819, |
| "learning_rate": 8.39290293482175e-06, |
| "loss": 0.5508, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.30070099105632103, |
| "grad_norm": 0.506465713525892, |
| "learning_rate": 8.387026892755942e-06, |
| "loss": 0.5568, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.3011844331641286, |
| "grad_norm": 0.4712688161265267, |
| "learning_rate": 8.381142193176414e-06, |
| "loss": 0.5489, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.30166787527193617, |
| "grad_norm": 0.4615215470431895, |
| "learning_rate": 8.375248851124937e-06, |
| "loss": 0.5554, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.30215131737974377, |
| "grad_norm": 0.5110895222198079, |
| "learning_rate": 8.369346881665364e-06, |
| "loss": 0.5466, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.30263475948755136, |
| "grad_norm": 0.47157470051165545, |
| "learning_rate": 8.363436299883604e-06, |
| "loss": 0.5644, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.30311820159535896, |
| "grad_norm": 0.4789841431133952, |
| "learning_rate": 8.357517120887586e-06, |
| "loss": 0.5493, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.30360164370316656, |
| "grad_norm": 0.47629409809645545, |
| "learning_rate": 8.351589359807204e-06, |
| "loss": 0.5523, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.30408508581097415, |
| "grad_norm": 0.4618925314784255, |
| "learning_rate": 8.345653031794292e-06, |
| "loss": 0.5348, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.30456852791878175, |
| "grad_norm": 0.500646417496574, |
| "learning_rate": 8.339708152022586e-06, |
| "loss": 0.554, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.3050519700265893, |
| "grad_norm": 0.4553680483630167, |
| "learning_rate": 8.333754735687677e-06, |
| "loss": 0.5489, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.3055354121343969, |
| "grad_norm": 0.4591000336987377, |
| "learning_rate": 8.327792798006977e-06, |
| "loss": 0.5508, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.3060188542422045, |
| "grad_norm": 0.48696012413599493, |
| "learning_rate": 8.321822354219677e-06, |
| "loss": 0.5505, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.3065022963500121, |
| "grad_norm": 0.4952413093498077, |
| "learning_rate": 8.315843419586717e-06, |
| "loss": 0.5574, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.3069857384578197, |
| "grad_norm": 0.4716786308005616, |
| "learning_rate": 8.309856009390732e-06, |
| "loss": 0.5281, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.3074691805656273, |
| "grad_norm": 0.5207738583309734, |
| "learning_rate": 8.303860138936027e-06, |
| "loss": 0.5607, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.3079526226734349, |
| "grad_norm": 0.5226978234399785, |
| "learning_rate": 8.297855823548528e-06, |
| "loss": 0.5565, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.30843606478124247, |
| "grad_norm": 0.49251100209183046, |
| "learning_rate": 8.291843078575752e-06, |
| "loss": 0.5485, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.30891950688905, |
| "grad_norm": 0.4769824051475033, |
| "learning_rate": 8.285821919386758e-06, |
| "loss": 0.5456, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.3094029489968576, |
| "grad_norm": 0.503019530780954, |
| "learning_rate": 8.279792361372114e-06, |
| "loss": 0.5602, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3098863911046652, |
| "grad_norm": 0.48405162661408385, |
| "learning_rate": 8.273754419943856e-06, |
| "loss": 0.5536, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.3103698332124728, |
| "grad_norm": 0.4657304337869963, |
| "learning_rate": 8.267708110535449e-06, |
| "loss": 0.5477, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.3108532753202804, |
| "grad_norm": 0.5106373655355231, |
| "learning_rate": 8.26165344860175e-06, |
| "loss": 0.571, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.311336717428088, |
| "grad_norm": 0.4854760780132044, |
| "learning_rate": 8.255590449618958e-06, |
| "loss": 0.546, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.3118201595358956, |
| "grad_norm": 0.4817908473273075, |
| "learning_rate": 8.24951912908459e-06, |
| "loss": 0.5446, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.3123036016437032, |
| "grad_norm": 0.4792564537130554, |
| "learning_rate": 8.243439502517432e-06, |
| "loss": 0.5352, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.31278704375151073, |
| "grad_norm": 0.5188490831185355, |
| "learning_rate": 8.237351585457499e-06, |
| "loss": 0.5298, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.3132704858593183, |
| "grad_norm": 0.5232755336111542, |
| "learning_rate": 8.231255393465993e-06, |
| "loss": 0.5387, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.3137539279671259, |
| "grad_norm": 0.48933101067554713, |
| "learning_rate": 8.225150942125278e-06, |
| "loss": 0.5156, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.3142373700749335, |
| "grad_norm": 0.47579138598403903, |
| "learning_rate": 8.21903824703882e-06, |
| "loss": 0.552, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3147208121827411, |
| "grad_norm": 0.47742859766681844, |
| "learning_rate": 8.21291732383116e-06, |
| "loss": 0.5498, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.3152042542905487, |
| "grad_norm": 0.5282098462854927, |
| "learning_rate": 8.206788188147874e-06, |
| "loss": 0.5327, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.3156876963983563, |
| "grad_norm": 0.4655983753785802, |
| "learning_rate": 8.200650855655525e-06, |
| "loss": 0.5523, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.3161711385061639, |
| "grad_norm": 0.46598993965098007, |
| "learning_rate": 8.19450534204163e-06, |
| "loss": 0.5428, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.3166545806139715, |
| "grad_norm": 0.48320040727215685, |
| "learning_rate": 8.188351663014615e-06, |
| "loss": 0.5511, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.31713802272177904, |
| "grad_norm": 0.4851268795547935, |
| "learning_rate": 8.182189834303783e-06, |
| "loss": 0.5515, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.31762146482958664, |
| "grad_norm": 0.4829311813743368, |
| "learning_rate": 8.176019871659263e-06, |
| "loss": 0.5425, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.31810490693739424, |
| "grad_norm": 0.4268110510337058, |
| "learning_rate": 8.169841790851976e-06, |
| "loss": 0.5192, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.31858834904520184, |
| "grad_norm": 0.46970357309915234, |
| "learning_rate": 8.163655607673594e-06, |
| "loss": 0.5516, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.31907179115300943, |
| "grad_norm": 0.4688205789040297, |
| "learning_rate": 8.157461337936506e-06, |
| "loss": 0.5398, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.31955523326081703, |
| "grad_norm": 0.49966496418563966, |
| "learning_rate": 8.151258997473757e-06, |
| "loss": 0.5501, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.3200386753686246, |
| "grad_norm": 0.4715831395525512, |
| "learning_rate": 8.145048602139031e-06, |
| "loss": 0.5473, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.3205221174764322, |
| "grad_norm": 0.46025918766438206, |
| "learning_rate": 8.138830167806601e-06, |
| "loss": 0.5481, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.32100555958423976, |
| "grad_norm": 0.49250849769551697, |
| "learning_rate": 8.132603710371287e-06, |
| "loss": 0.5563, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.32148900169204736, |
| "grad_norm": 0.46277397720994495, |
| "learning_rate": 8.126369245748413e-06, |
| "loss": 0.5418, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.32197244379985496, |
| "grad_norm": 0.44842320811529324, |
| "learning_rate": 8.120126789873775e-06, |
| "loss": 0.549, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.32245588590766255, |
| "grad_norm": 0.4487718178782243, |
| "learning_rate": 8.113876358703593e-06, |
| "loss": 0.5515, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.32293932801547015, |
| "grad_norm": 0.49737040438900676, |
| "learning_rate": 8.10761796821447e-06, |
| "loss": 0.5529, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.32342277012327775, |
| "grad_norm": 0.5088088437400782, |
| "learning_rate": 8.10135163440336e-06, |
| "loss": 0.5507, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.32390621223108534, |
| "grad_norm": 0.5221100660415426, |
| "learning_rate": 8.095077373287517e-06, |
| "loss": 0.5363, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.32438965433889294, |
| "grad_norm": 0.5098038198929602, |
| "learning_rate": 8.088795200904457e-06, |
| "loss": 0.5443, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.3248730964467005, |
| "grad_norm": 0.5299548080054053, |
| "learning_rate": 8.08250513331192e-06, |
| "loss": 0.5547, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3253565385545081, |
| "grad_norm": 0.47991648628747413, |
| "learning_rate": 8.076207186587826e-06, |
| "loss": 0.552, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.3258399806623157, |
| "grad_norm": 0.4928995313967277, |
| "learning_rate": 8.069901376830232e-06, |
| "loss": 0.5449, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.3263234227701233, |
| "grad_norm": 0.526245201002504, |
| "learning_rate": 8.063587720157298e-06, |
| "loss": 0.5544, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.32680686487793087, |
| "grad_norm": 0.5169185895561939, |
| "learning_rate": 8.057266232707239e-06, |
| "loss": 0.5388, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.32729030698573847, |
| "grad_norm": 0.45862190884382065, |
| "learning_rate": 8.050936930638285e-06, |
| "loss": 0.5523, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.32777374909354606, |
| "grad_norm": 0.4791194354627634, |
| "learning_rate": 8.044599830128643e-06, |
| "loss": 0.5498, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.32825719120135366, |
| "grad_norm": 0.5040011739287719, |
| "learning_rate": 8.038254947376454e-06, |
| "loss": 0.5378, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.3287406333091612, |
| "grad_norm": 0.42346684737245893, |
| "learning_rate": 8.03190229859975e-06, |
| "loss": 0.5541, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.3292240754169688, |
| "grad_norm": 0.48225697444636256, |
| "learning_rate": 8.02554190003641e-06, |
| "loss": 0.5505, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.3297075175247764, |
| "grad_norm": 0.480200233217211, |
| "learning_rate": 8.019173767944128e-06, |
| "loss": 0.5563, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.330190959632584, |
| "grad_norm": 0.4406037883552503, |
| "learning_rate": 8.012797918600363e-06, |
| "loss": 0.5241, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.3306744017403916, |
| "grad_norm": 0.4838913486529156, |
| "learning_rate": 8.006414368302297e-06, |
| "loss": 0.5251, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.3311578438481992, |
| "grad_norm": 0.45454190895682295, |
| "learning_rate": 8.000023133366804e-06, |
| "loss": 0.5449, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.3316412859560068, |
| "grad_norm": 0.49869890620532237, |
| "learning_rate": 7.99362423013039e-06, |
| "loss": 0.5401, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.3321247280638144, |
| "grad_norm": 0.4727231220514769, |
| "learning_rate": 7.98721767494917e-06, |
| "loss": 0.5381, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.332608170171622, |
| "grad_norm": 0.46944667758244535, |
| "learning_rate": 7.980803484198817e-06, |
| "loss": 0.5542, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.3330916122794295, |
| "grad_norm": 0.4643616722232514, |
| "learning_rate": 7.974381674274517e-06, |
| "loss": 0.5394, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.3335750543872371, |
| "grad_norm": 0.4529493856728362, |
| "learning_rate": 7.967952261590936e-06, |
| "loss": 0.5478, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3340584964950447, |
| "grad_norm": 0.4497900124215144, |
| "learning_rate": 7.961515262582168e-06, |
| "loss": 0.5387, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.3345419386028523, |
| "grad_norm": 0.4613195703294155, |
| "learning_rate": 7.955070693701704e-06, |
| "loss": 0.5488, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.3350253807106599, |
| "grad_norm": 0.45208853687907335, |
| "learning_rate": 7.94861857142238e-06, |
| "loss": 0.5161, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.3355088228184675, |
| "grad_norm": 0.45338462953665065, |
| "learning_rate": 7.942158912236339e-06, |
| "loss": 0.5504, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.3359922649262751, |
| "grad_norm": 0.45784135957705213, |
| "learning_rate": 7.935691732654995e-06, |
| "loss": 0.5525, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.3364757070340827, |
| "grad_norm": 0.4745455134248678, |
| "learning_rate": 7.929217049208977e-06, |
| "loss": 0.5549, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.33695914914189024, |
| "grad_norm": 0.46788843343497605, |
| "learning_rate": 7.922734878448099e-06, |
| "loss": 0.5543, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.33744259124969783, |
| "grad_norm": 0.4894111106267614, |
| "learning_rate": 7.916245236941311e-06, |
| "loss": 0.5456, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.33792603335750543, |
| "grad_norm": 0.4818527781927651, |
| "learning_rate": 7.90974814127666e-06, |
| "loss": 0.5436, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.338409475465313, |
| "grad_norm": 0.48230512049955104, |
| "learning_rate": 7.903243608061246e-06, |
| "loss": 0.5569, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3388929175731206, |
| "grad_norm": 0.4651013778967097, |
| "learning_rate": 7.89673165392118e-06, |
| "loss": 0.5497, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.3393763596809282, |
| "grad_norm": 0.5263037891579944, |
| "learning_rate": 7.890212295501542e-06, |
| "loss": 0.5489, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.3398598017887358, |
| "grad_norm": 0.47525750483933155, |
| "learning_rate": 7.883685549466337e-06, |
| "loss": 0.5438, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.3403432438965434, |
| "grad_norm": 0.48435256135519467, |
| "learning_rate": 7.877151432498456e-06, |
| "loss": 0.5506, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.34082668600435095, |
| "grad_norm": 0.49040296450298604, |
| "learning_rate": 7.870609961299627e-06, |
| "loss": 0.536, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.34131012811215855, |
| "grad_norm": 0.4437135993163076, |
| "learning_rate": 7.864061152590376e-06, |
| "loss": 0.5539, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.34179357021996615, |
| "grad_norm": 0.48585487486606105, |
| "learning_rate": 7.857505023109989e-06, |
| "loss": 0.5461, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.34227701232777374, |
| "grad_norm": 0.48202275018795376, |
| "learning_rate": 7.850941589616458e-06, |
| "loss": 0.5371, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.34276045443558134, |
| "grad_norm": 0.4716406712767161, |
| "learning_rate": 7.844370868886452e-06, |
| "loss": 0.5557, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.34324389654338894, |
| "grad_norm": 0.49083867550017374, |
| "learning_rate": 7.83779287771526e-06, |
| "loss": 0.5459, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.34372733865119653, |
| "grad_norm": 0.49576681886311147, |
| "learning_rate": 7.831207632916757e-06, |
| "loss": 0.5466, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.34421078075900413, |
| "grad_norm": 0.4533195955521626, |
| "learning_rate": 7.824615151323363e-06, |
| "loss": 0.519, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.3446942228668117, |
| "grad_norm": 0.4638295505748454, |
| "learning_rate": 7.818015449785987e-06, |
| "loss": 0.5485, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.34517766497461927, |
| "grad_norm": 0.4802273717901249, |
| "learning_rate": 7.811408545174001e-06, |
| "loss": 0.5453, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.34566110708242687, |
| "grad_norm": 0.4633815927205105, |
| "learning_rate": 7.804794454375189e-06, |
| "loss": 0.5504, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.34614454919023446, |
| "grad_norm": 0.4455507327132057, |
| "learning_rate": 7.798173194295693e-06, |
| "loss": 0.5425, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.34662799129804206, |
| "grad_norm": 0.4555461305882042, |
| "learning_rate": 7.791544781859993e-06, |
| "loss": 0.5402, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.34711143340584966, |
| "grad_norm": 0.4459519768062681, |
| "learning_rate": 7.784909234010843e-06, |
| "loss": 0.5448, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.34759487551365725, |
| "grad_norm": 0.43036102684437805, |
| "learning_rate": 7.778266567709239e-06, |
| "loss": 0.5532, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.34807831762146485, |
| "grad_norm": 0.4640780423848208, |
| "learning_rate": 7.771616799934372e-06, |
| "loss": 0.5403, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.34856175972927245, |
| "grad_norm": 0.4785048230046999, |
| "learning_rate": 7.764959947683581e-06, |
| "loss": 0.5484, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.34904520183708, |
| "grad_norm": 0.5054523460781126, |
| "learning_rate": 7.758296027972324e-06, |
| "loss": 0.5367, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.3495286439448876, |
| "grad_norm": 0.46292028395178175, |
| "learning_rate": 7.751625057834107e-06, |
| "loss": 0.5221, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.3500120860526952, |
| "grad_norm": 0.5080355944834025, |
| "learning_rate": 7.744947054320475e-06, |
| "loss": 0.552, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.3504955281605028, |
| "grad_norm": 0.4692605361826857, |
| "learning_rate": 7.73826203450094e-06, |
| "loss": 0.5516, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.3509789702683104, |
| "grad_norm": 0.4452014990295793, |
| "learning_rate": 7.731570015462953e-06, |
| "loss": 0.5385, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.35146241237611797, |
| "grad_norm": 0.4407238797603078, |
| "learning_rate": 7.724871014311853e-06, |
| "loss": 0.5512, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.35194585448392557, |
| "grad_norm": 0.4578450461185458, |
| "learning_rate": 7.718165048170827e-06, |
| "loss": 0.5436, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.35242929659173317, |
| "grad_norm": 0.47134089664050416, |
| "learning_rate": 7.711452134180865e-06, |
| "loss": 0.5439, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.3529127386995407, |
| "grad_norm": 0.45807802586279717, |
| "learning_rate": 7.704732289500717e-06, |
| "loss": 0.535, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3533961808073483, |
| "grad_norm": 0.46923078006226726, |
| "learning_rate": 7.698005531306844e-06, |
| "loss": 0.5438, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.3538796229151559, |
| "grad_norm": 0.47163216368627525, |
| "learning_rate": 7.691271876793387e-06, |
| "loss": 0.5412, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.3543630650229635, |
| "grad_norm": 0.43982060964801745, |
| "learning_rate": 7.684531343172108e-06, |
| "loss": 0.5326, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.3548465071307711, |
| "grad_norm": 0.49898422329655673, |
| "learning_rate": 7.677783947672352e-06, |
| "loss": 0.5352, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.3553299492385787, |
| "grad_norm": 0.4426035951569431, |
| "learning_rate": 7.67102970754101e-06, |
| "loss": 0.5083, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.3558133913463863, |
| "grad_norm": 0.473881609856312, |
| "learning_rate": 7.664268640042459e-06, |
| "loss": 0.5493, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.3562968334541939, |
| "grad_norm": 0.48477702288906854, |
| "learning_rate": 7.657500762458536e-06, |
| "loss": 0.5415, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.3567802755620014, |
| "grad_norm": 0.4673684560489235, |
| "learning_rate": 7.65072609208848e-06, |
| "loss": 0.5402, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.357263717669809, |
| "grad_norm": 0.45922228645390506, |
| "learning_rate": 7.643944646248898e-06, |
| "loss": 0.5523, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.3577471597776166, |
| "grad_norm": 0.5023203702238386, |
| "learning_rate": 7.637156442273705e-06, |
| "loss": 0.5472, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.3582306018854242, |
| "grad_norm": 0.5158066743775931, |
| "learning_rate": 7.630361497514104e-06, |
| "loss": 0.5409, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.3587140439932318, |
| "grad_norm": 0.4279389055361383, |
| "learning_rate": 7.6235598293385184e-06, |
| "loss": 0.548, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.3591974861010394, |
| "grad_norm": 0.48124896416843527, |
| "learning_rate": 7.616751455132561e-06, |
| "loss": 0.5061, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.359680928208847, |
| "grad_norm": 0.45130820986839676, |
| "learning_rate": 7.6099363922989845e-06, |
| "loss": 0.5408, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.3601643703166546, |
| "grad_norm": 0.43968032097493187, |
| "learning_rate": 7.60311465825764e-06, |
| "loss": 0.5419, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.36064781242446214, |
| "grad_norm": 0.4638052394642039, |
| "learning_rate": 7.596286270445429e-06, |
| "loss": 0.5474, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.36113125453226974, |
| "grad_norm": 0.47215533812036253, |
| "learning_rate": 7.5894512463162595e-06, |
| "loss": 0.5481, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.36161469664007734, |
| "grad_norm": 0.4910077404120728, |
| "learning_rate": 7.5826096033410056e-06, |
| "loss": 0.5483, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.36209813874788493, |
| "grad_norm": 0.49435519282302404, |
| "learning_rate": 7.575761359007459e-06, |
| "loss": 0.5375, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.36258158085569253, |
| "grad_norm": 0.4645080520487796, |
| "learning_rate": 7.568906530820281e-06, |
| "loss": 0.5406, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.36306502296350013, |
| "grad_norm": 0.48056836362105476, |
| "learning_rate": 7.562045136300969e-06, |
| "loss": 0.547, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.3635484650713077, |
| "grad_norm": 0.4648855493783244, |
| "learning_rate": 7.555177192987797e-06, |
| "loss": 0.5372, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.3640319071791153, |
| "grad_norm": 0.43851001194612105, |
| "learning_rate": 7.5483027184357825e-06, |
| "loss": 0.5484, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.3645153492869229, |
| "grad_norm": 0.4234049796935857, |
| "learning_rate": 7.541421730216638e-06, |
| "loss": 0.4914, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.36499879139473046, |
| "grad_norm": 0.4886945785128111, |
| "learning_rate": 7.534534245918723e-06, |
| "loss": 0.5362, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.36548223350253806, |
| "grad_norm": 0.47490196043064764, |
| "learning_rate": 7.527640283147003e-06, |
| "loss": 0.5387, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.36596567561034565, |
| "grad_norm": 0.4422931000418374, |
| "learning_rate": 7.520739859523001e-06, |
| "loss": 0.5334, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.36644911771815325, |
| "grad_norm": 0.45103949345201827, |
| "learning_rate": 7.513832992684758e-06, |
| "loss": 0.5423, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.36693255982596085, |
| "grad_norm": 0.5023542886793314, |
| "learning_rate": 7.50691970028678e-06, |
| "loss": 0.5371, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.36741600193376844, |
| "grad_norm": 0.48961990962706975, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.5602, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.36789944404157604, |
| "grad_norm": 0.47244479352550756, |
| "learning_rate": 7.493073909511732e-06, |
| "loss": 0.5399, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.36838288614938364, |
| "grad_norm": 0.47262373426445514, |
| "learning_rate": 7.486141446525619e-06, |
| "loss": 0.5465, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.3688663282571912, |
| "grad_norm": 0.47837508137309714, |
| "learning_rate": 7.479202628761597e-06, |
| "loss": 0.5412, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.3693497703649988, |
| "grad_norm": 0.47771254343171743, |
| "learning_rate": 7.472257473955841e-06, |
| "loss": 0.5429, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.36983321247280637, |
| "grad_norm": 0.45892920422210776, |
| "learning_rate": 7.465305999860728e-06, |
| "loss": 0.5358, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.37031665458061397, |
| "grad_norm": 0.4636269912834914, |
| "learning_rate": 7.4583482242447856e-06, |
| "loss": 0.528, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.37080009668842157, |
| "grad_norm": 0.45196732778688614, |
| "learning_rate": 7.45138416489265e-06, |
| "loss": 0.5466, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.37128353879622916, |
| "grad_norm": 0.44240214579051484, |
| "learning_rate": 7.444413839605017e-06, |
| "loss": 0.5315, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.37176698090403676, |
| "grad_norm": 0.45295775865600874, |
| "learning_rate": 7.437437266198602e-06, |
| "loss": 0.5443, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.37225042301184436, |
| "grad_norm": 0.44966146652009026, |
| "learning_rate": 7.430454462506085e-06, |
| "loss": 0.5417, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3727338651196519, |
| "grad_norm": 0.451144144721217, |
| "learning_rate": 7.423465446376079e-06, |
| "loss": 0.5389, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.3732173072274595, |
| "grad_norm": 0.45620688667035586, |
| "learning_rate": 7.416470235673069e-06, |
| "loss": 0.538, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.3737007493352671, |
| "grad_norm": 0.44604596469243557, |
| "learning_rate": 7.40946884827738e-06, |
| "loss": 0.5293, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.3741841914430747, |
| "grad_norm": 0.4476638425696451, |
| "learning_rate": 7.402461302085121e-06, |
| "loss": 0.5402, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.3746676335508823, |
| "grad_norm": 0.46401364076227924, |
| "learning_rate": 7.395447615008147e-06, |
| "loss": 0.5377, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.3751510756586899, |
| "grad_norm": 0.46250351228418424, |
| "learning_rate": 7.388427804974003e-06, |
| "loss": 0.5455, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.3756345177664975, |
| "grad_norm": 0.448294668881327, |
| "learning_rate": 7.381401889925894e-06, |
| "loss": 0.5311, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.3761179598743051, |
| "grad_norm": 0.4490599419042619, |
| "learning_rate": 7.374369887822623e-06, |
| "loss": 0.5416, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.3766014019821126, |
| "grad_norm": 0.44904272890455516, |
| "learning_rate": 7.367331816638554e-06, |
| "loss": 0.5464, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.3770848440899202, |
| "grad_norm": 0.4731428544902919, |
| "learning_rate": 7.360287694363566e-06, |
| "loss": 0.5415, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3775682861977278, |
| "grad_norm": 0.4760085542862297, |
| "learning_rate": 7.353237539002999e-06, |
| "loss": 0.5388, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.3780517283055354, |
| "grad_norm": 0.4817431722421546, |
| "learning_rate": 7.346181368577624e-06, |
| "loss": 0.5513, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.378535170413343, |
| "grad_norm": 0.462287277146555, |
| "learning_rate": 7.3391192011235764e-06, |
| "loss": 0.5393, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.3790186125211506, |
| "grad_norm": 0.44812435609118556, |
| "learning_rate": 7.3320510546923285e-06, |
| "loss": 0.5509, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.3795020546289582, |
| "grad_norm": 0.48025260306275075, |
| "learning_rate": 7.324976947350631e-06, |
| "loss": 0.5387, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.3799854967367658, |
| "grad_norm": 0.4639545538957294, |
| "learning_rate": 7.317896897180472e-06, |
| "loss": 0.5298, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.3804689388445734, |
| "grad_norm": 0.46088677266135386, |
| "learning_rate": 7.31081092227903e-06, |
| "loss": 0.5371, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.38095238095238093, |
| "grad_norm": 0.4446814202000039, |
| "learning_rate": 7.303719040758631e-06, |
| "loss": 0.5368, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.38143582306018853, |
| "grad_norm": 0.4628164716114684, |
| "learning_rate": 7.296621270746691e-06, |
| "loss": 0.5439, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.3819192651679961, |
| "grad_norm": 0.46916898249294825, |
| "learning_rate": 7.289517630385687e-06, |
| "loss": 0.5188, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3824027072758037, |
| "grad_norm": 0.44187034218765375, |
| "learning_rate": 7.282408137833093e-06, |
| "loss": 0.5404, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.3828861493836113, |
| "grad_norm": 0.47877844839355055, |
| "learning_rate": 7.275292811261346e-06, |
| "loss": 0.5377, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.3833695914914189, |
| "grad_norm": 0.46034254680546527, |
| "learning_rate": 7.268171668857794e-06, |
| "loss": 0.5489, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.3838530335992265, |
| "grad_norm": 0.4557903207271804, |
| "learning_rate": 7.261044728824652e-06, |
| "loss": 0.5415, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.3843364757070341, |
| "grad_norm": 0.477573699663043, |
| "learning_rate": 7.253912009378953e-06, |
| "loss": 0.5526, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.38481991781484165, |
| "grad_norm": 0.5200587123977845, |
| "learning_rate": 7.246773528752501e-06, |
| "loss": 0.5452, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.38530335992264925, |
| "grad_norm": 0.4504279257869106, |
| "learning_rate": 7.239629305191828e-06, |
| "loss": 0.528, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.38578680203045684, |
| "grad_norm": 0.47332344951633437, |
| "learning_rate": 7.2324793569581474e-06, |
| "loss": 0.5413, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.38627024413826444, |
| "grad_norm": 0.4658360771399747, |
| "learning_rate": 7.2253237023273e-06, |
| "loss": 0.5111, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.38675368624607204, |
| "grad_norm": 0.4820288472202763, |
| "learning_rate": 7.21816235958972e-06, |
| "loss": 0.5472, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.38723712835387963, |
| "grad_norm": 0.4713168204495061, |
| "learning_rate": 7.210995347050372e-06, |
| "loss": 0.5441, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.38772057046168723, |
| "grad_norm": 0.45204543208032005, |
| "learning_rate": 7.203822683028721e-06, |
| "loss": 0.5403, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.3882040125694948, |
| "grad_norm": 0.4577372851640406, |
| "learning_rate": 7.196644385858673e-06, |
| "loss": 0.5303, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.38868745467730237, |
| "grad_norm": 0.4458565390228467, |
| "learning_rate": 7.189460473888535e-06, |
| "loss": 0.5453, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.38917089678510997, |
| "grad_norm": 0.4735785239686837, |
| "learning_rate": 7.182270965480963e-06, |
| "loss": 0.5491, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.38965433889291756, |
| "grad_norm": 0.44995709460533084, |
| "learning_rate": 7.17507587901292e-06, |
| "loss": 0.5328, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.39013778100072516, |
| "grad_norm": 0.45714294309440695, |
| "learning_rate": 7.167875232875632e-06, |
| "loss": 0.5401, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.39062122310853276, |
| "grad_norm": 0.45625350898261685, |
| "learning_rate": 7.160669045474524e-06, |
| "loss": 0.5198, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.39110466521634035, |
| "grad_norm": 0.45109568818047574, |
| "learning_rate": 7.153457335229196e-06, |
| "loss": 0.5396, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.39158810732414795, |
| "grad_norm": 0.4362283889114229, |
| "learning_rate": 7.146240120573358e-06, |
| "loss": 0.5421, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.39207154943195555, |
| "grad_norm": 0.44454460051068084, |
| "learning_rate": 7.1390174199547945e-06, |
| "loss": 0.5495, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.3925549915397631, |
| "grad_norm": 0.47028424443890243, |
| "learning_rate": 7.131789251835309e-06, |
| "loss": 0.5528, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.3930384336475707, |
| "grad_norm": 0.44600311061331627, |
| "learning_rate": 7.124555634690684e-06, |
| "loss": 0.546, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.3935218757553783, |
| "grad_norm": 0.42127806705345067, |
| "learning_rate": 7.117316587010625e-06, |
| "loss": 0.5164, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.3940053178631859, |
| "grad_norm": 0.4919804442725305, |
| "learning_rate": 7.110072127298722e-06, |
| "loss": 0.5405, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.3944887599709935, |
| "grad_norm": 0.433673881020081, |
| "learning_rate": 7.1028222740724e-06, |
| "loss": 0.5474, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.39497220207880107, |
| "grad_norm": 0.44154650103792475, |
| "learning_rate": 7.095567045862867e-06, |
| "loss": 0.537, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.39545564418660867, |
| "grad_norm": 0.4862239172154185, |
| "learning_rate": 7.0883064612150684e-06, |
| "loss": 0.5418, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.39593908629441626, |
| "grad_norm": 0.44482243313717656, |
| "learning_rate": 7.081040538687649e-06, |
| "loss": 0.5421, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.3964225284022238, |
| "grad_norm": 0.4674091065145529, |
| "learning_rate": 7.073769296852888e-06, |
| "loss": 0.5322, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3969059705100314, |
| "grad_norm": 0.42008927529357837, |
| "learning_rate": 7.066492754296668e-06, |
| "loss": 0.5021, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.397389412617839, |
| "grad_norm": 0.4604995654095531, |
| "learning_rate": 7.059210929618416e-06, |
| "loss": 0.5449, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.3978728547256466, |
| "grad_norm": 0.45094698267399413, |
| "learning_rate": 7.051923841431063e-06, |
| "loss": 0.5376, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.3983562968334542, |
| "grad_norm": 0.46562198473083916, |
| "learning_rate": 7.044631508360996e-06, |
| "loss": 0.5449, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.3988397389412618, |
| "grad_norm": 0.45370053407350325, |
| "learning_rate": 7.037333949048005e-06, |
| "loss": 0.5443, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.3993231810490694, |
| "grad_norm": 0.45490375066885613, |
| "learning_rate": 7.03003118214524e-06, |
| "loss": 0.4994, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.399806623156877, |
| "grad_norm": 0.4689025885486327, |
| "learning_rate": 7.022723226319159e-06, |
| "loss": 0.5249, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.4002900652646846, |
| "grad_norm": 0.4587224558807211, |
| "learning_rate": 7.0154101002494914e-06, |
| "loss": 0.531, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.4007735073724921, |
| "grad_norm": 0.43840918685603564, |
| "learning_rate": 7.008091822629172e-06, |
| "loss": 0.5331, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.4012569494802997, |
| "grad_norm": 0.5046588696515812, |
| "learning_rate": 7.00076841216431e-06, |
| "loss": 0.5302, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.4017403915881073, |
| "grad_norm": 0.485313377425035, |
| "learning_rate": 6.993439887574133e-06, |
| "loss": 0.5418, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.4022238336959149, |
| "grad_norm": 0.47268076488419086, |
| "learning_rate": 6.986106267590942e-06, |
| "loss": 0.535, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.4027072758037225, |
| "grad_norm": 0.4528644527398112, |
| "learning_rate": 6.978767570960057e-06, |
| "loss": 0.5362, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.4031907179115301, |
| "grad_norm": 0.458241227473586, |
| "learning_rate": 6.971423816439782e-06, |
| "loss": 0.5346, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.4036741600193377, |
| "grad_norm": 0.476721951726036, |
| "learning_rate": 6.964075022801341e-06, |
| "loss": 0.541, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.4041576021271453, |
| "grad_norm": 0.4634216130492689, |
| "learning_rate": 6.956721208828847e-06, |
| "loss": 0.5441, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.40464104423495284, |
| "grad_norm": 0.46202808494163927, |
| "learning_rate": 6.949362393319239e-06, |
| "loss": 0.5416, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.40512448634276044, |
| "grad_norm": 0.44981029617918733, |
| "learning_rate": 6.941998595082243e-06, |
| "loss": 0.5438, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.40560792845056803, |
| "grad_norm": 0.44835769630421096, |
| "learning_rate": 6.934629832940322e-06, |
| "loss": 0.5322, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.40609137055837563, |
| "grad_norm": 0.4879526087044361, |
| "learning_rate": 6.927256125728624e-06, |
| "loss": 0.544, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.4065748126661832, |
| "grad_norm": 0.47319742567660167, |
| "learning_rate": 6.91987749229494e-06, |
| "loss": 0.5401, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.4070582547739908, |
| "grad_norm": 0.4492955794688954, |
| "learning_rate": 6.91249395149965e-06, |
| "loss": 0.5393, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.4075416968817984, |
| "grad_norm": 0.4436463500280526, |
| "learning_rate": 6.905105522215684e-06, |
| "loss": 0.5384, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.408025138989606, |
| "grad_norm": 0.43897712052796284, |
| "learning_rate": 6.897712223328457e-06, |
| "loss": 0.5297, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.40850858109741356, |
| "grad_norm": 0.5141765277378779, |
| "learning_rate": 6.89031407373584e-06, |
| "loss": 0.5386, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.40899202320522116, |
| "grad_norm": 0.48662407879743685, |
| "learning_rate": 6.8829110923481e-06, |
| "loss": 0.5429, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.40947546531302875, |
| "grad_norm": 0.4671606204232888, |
| "learning_rate": 6.875503298087853e-06, |
| "loss": 0.5339, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.40995890742083635, |
| "grad_norm": 0.47636170298906977, |
| "learning_rate": 6.868090709890016e-06, |
| "loss": 0.5392, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.41044234952864395, |
| "grad_norm": 0.4249603079226182, |
| "learning_rate": 6.8606733467017675e-06, |
| "loss": 0.5046, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.41092579163645154, |
| "grad_norm": 0.4669023850804772, |
| "learning_rate": 6.85325122748248e-06, |
| "loss": 0.5331, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.41140923374425914, |
| "grad_norm": 0.43795275267260886, |
| "learning_rate": 6.845824371203691e-06, |
| "loss": 0.5409, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.41189267585206674, |
| "grad_norm": 0.44363617551756607, |
| "learning_rate": 6.838392796849042e-06, |
| "loss": 0.5371, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.4123761179598743, |
| "grad_norm": 0.4451816120752011, |
| "learning_rate": 6.830956523414239e-06, |
| "loss": 0.5304, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.4128595600676819, |
| "grad_norm": 0.4684744852800341, |
| "learning_rate": 6.8235155699069944e-06, |
| "loss": 0.5316, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.41334300217548947, |
| "grad_norm": 0.44092290998190986, |
| "learning_rate": 6.816069955346986e-06, |
| "loss": 0.5127, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.41382644428329707, |
| "grad_norm": 0.49751628964469147, |
| "learning_rate": 6.808619698765804e-06, |
| "loss": 0.5459, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.41430988639110466, |
| "grad_norm": 0.45953817226939175, |
| "learning_rate": 6.8011648192069045e-06, |
| "loss": 0.5316, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.41479332849891226, |
| "grad_norm": 0.470694123027967, |
| "learning_rate": 6.7937053357255585e-06, |
| "loss": 0.5341, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.41527677060671986, |
| "grad_norm": 0.482654546386677, |
| "learning_rate": 6.786241267388812e-06, |
| "loss": 0.5392, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.41576021271452746, |
| "grad_norm": 0.49418422954918506, |
| "learning_rate": 6.778772633275421e-06, |
| "loss": 0.5259, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.41624365482233505, |
| "grad_norm": 0.4508128718503279, |
| "learning_rate": 6.771299452475818e-06, |
| "loss": 0.5439, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.4167270969301426, |
| "grad_norm": 0.483501145929134, |
| "learning_rate": 6.763821744092054e-06, |
| "loss": 0.521, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.4172105390379502, |
| "grad_norm": 0.45484324344169746, |
| "learning_rate": 6.756339527237756e-06, |
| "loss": 0.5282, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.4176939811457578, |
| "grad_norm": 0.47800071067084154, |
| "learning_rate": 6.748852821038075e-06, |
| "loss": 0.5362, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.4181774232535654, |
| "grad_norm": 0.4561384481336352, |
| "learning_rate": 6.741361644629629e-06, |
| "loss": 0.5452, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.418660865361373, |
| "grad_norm": 0.4974806458018085, |
| "learning_rate": 6.733866017160475e-06, |
| "loss": 0.5374, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.4191443074691806, |
| "grad_norm": 0.4696802681175673, |
| "learning_rate": 6.7263659577900375e-06, |
| "loss": 0.5368, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.4196277495769882, |
| "grad_norm": 0.46421987800289705, |
| "learning_rate": 6.718861485689077e-06, |
| "loss": 0.5361, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.42011119168479577, |
| "grad_norm": 0.42817223127930704, |
| "learning_rate": 6.711352620039623e-06, |
| "loss": 0.5132, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.4205946337926033, |
| "grad_norm": 0.4191221628297101, |
| "learning_rate": 6.703839380034945e-06, |
| "loss": 0.5282, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.4210780759004109, |
| "grad_norm": 0.4151086659722137, |
| "learning_rate": 6.6963217848794895e-06, |
| "loss": 0.5046, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.4215615180082185, |
| "grad_norm": 0.4638906293888461, |
| "learning_rate": 6.6887998537888354e-06, |
| "loss": 0.5365, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.4220449601160261, |
| "grad_norm": 0.46061668652958593, |
| "learning_rate": 6.681273605989643e-06, |
| "loss": 0.5315, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.4225284022238337, |
| "grad_norm": 0.5190170389663172, |
| "learning_rate": 6.673743060719613e-06, |
| "loss": 0.5328, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.4230118443316413, |
| "grad_norm": 0.45460321890251315, |
| "learning_rate": 6.666208237227421e-06, |
| "loss": 0.5359, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.4234952864394489, |
| "grad_norm": 0.4292973087733905, |
| "learning_rate": 6.6586691547726855e-06, |
| "loss": 0.5139, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.4239787285472565, |
| "grad_norm": 0.47082103025419264, |
| "learning_rate": 6.651125832625908e-06, |
| "loss": 0.5455, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.42446217065506403, |
| "grad_norm": 0.5113179606142492, |
| "learning_rate": 6.6435782900684284e-06, |
| "loss": 0.5528, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.4249456127628716, |
| "grad_norm": 0.48001824284611705, |
| "learning_rate": 6.636026546392374e-06, |
| "loss": 0.5391, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.4254290548706792, |
| "grad_norm": 0.4190847750909328, |
| "learning_rate": 6.628470620900611e-06, |
| "loss": 0.5309, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4259124969784868, |
| "grad_norm": 0.4429086362697282, |
| "learning_rate": 6.620910532906692e-06, |
| "loss": 0.5194, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.4263959390862944, |
| "grad_norm": 0.4425962079317176, |
| "learning_rate": 6.613346301734813e-06, |
| "loss": 0.5132, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.426879381194102, |
| "grad_norm": 0.4614396271141222, |
| "learning_rate": 6.605777946719757e-06, |
| "loss": 0.5358, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.4273628233019096, |
| "grad_norm": 7.793396607210369, |
| "learning_rate": 6.59820548720685e-06, |
| "loss": 0.74, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.4278462654097172, |
| "grad_norm": 0.47916638143499957, |
| "learning_rate": 6.590628942551909e-06, |
| "loss": 0.5401, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.42832970751752475, |
| "grad_norm": 0.5010520173856451, |
| "learning_rate": 6.583048332121193e-06, |
| "loss": 0.5384, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.42881314962533235, |
| "grad_norm": 0.43637840250511184, |
| "learning_rate": 6.5754636752913535e-06, |
| "loss": 0.5018, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.42929659173313994, |
| "grad_norm": 0.4347273985958766, |
| "learning_rate": 6.567874991449383e-06, |
| "loss": 0.5303, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.42978003384094754, |
| "grad_norm": 0.470696307591686, |
| "learning_rate": 6.560282299992571e-06, |
| "loss": 0.5454, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.43026347594875514, |
| "grad_norm": 0.43949603254219816, |
| "learning_rate": 6.552685620328447e-06, |
| "loss": 0.5115, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.43074691805656273, |
| "grad_norm": 0.5002179505262337, |
| "learning_rate": 6.545084971874738e-06, |
| "loss": 0.5386, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.43123036016437033, |
| "grad_norm": 0.48898211514461637, |
| "learning_rate": 6.537480374059313e-06, |
| "loss": 0.5464, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4317138022721779, |
| "grad_norm": 0.47111181264654595, |
| "learning_rate": 6.529871846320138e-06, |
| "loss": 0.5225, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.4321972443799855, |
| "grad_norm": 0.47111234243294525, |
| "learning_rate": 6.522259408105223e-06, |
| "loss": 0.5363, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.43268068648779306, |
| "grad_norm": 0.4867092311804253, |
| "learning_rate": 6.514643078872571e-06, |
| "loss": 0.533, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.43316412859560066, |
| "grad_norm": 0.4583905211154658, |
| "learning_rate": 6.507022878090137e-06, |
| "loss": 0.5428, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.43364757070340826, |
| "grad_norm": 0.4950332559469317, |
| "learning_rate": 6.499398825235767e-06, |
| "loss": 0.5337, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.43413101281121586, |
| "grad_norm": 0.46856791974797646, |
| "learning_rate": 6.491770939797152e-06, |
| "loss": 0.5323, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.43461445491902345, |
| "grad_norm": 0.4646364739679311, |
| "learning_rate": 6.4841392412717864e-06, |
| "loss": 0.5407, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.43509789702683105, |
| "grad_norm": 0.43558921933796657, |
| "learning_rate": 6.476503749166903e-06, |
| "loss": 0.5347, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.43558133913463865, |
| "grad_norm": 0.4961569425990492, |
| "learning_rate": 6.4688644829994385e-06, |
| "loss": 0.5295, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.43606478124244624, |
| "grad_norm": 0.4698493360586227, |
| "learning_rate": 6.4612214622959705e-06, |
| "loss": 0.5457, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.4365482233502538, |
| "grad_norm": 0.435939008717009, |
| "learning_rate": 6.453574706592676e-06, |
| "loss": 0.521, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.4370316654580614, |
| "grad_norm": 0.5347621942726917, |
| "learning_rate": 6.44592423543528e-06, |
| "loss": 0.5354, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.437515107565869, |
| "grad_norm": 0.49607229965381217, |
| "learning_rate": 6.4382700683790025e-06, |
| "loss": 0.544, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.4379985496736766, |
| "grad_norm": 0.4645864217211478, |
| "learning_rate": 6.4306122249885105e-06, |
| "loss": 0.5192, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.43848199178148417, |
| "grad_norm": 0.46522668277788565, |
| "learning_rate": 6.422950724837872e-06, |
| "loss": 0.526, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.43896543388929177, |
| "grad_norm": 0.46658657241667495, |
| "learning_rate": 6.415285587510495e-06, |
| "loss": 0.5088, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.43944887599709936, |
| "grad_norm": 0.49352648435992785, |
| "learning_rate": 6.407616832599091e-06, |
| "loss": 0.5291, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.43993231810490696, |
| "grad_norm": 0.5022475233173346, |
| "learning_rate": 6.399944479705615e-06, |
| "loss": 0.5349, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.4404157602127145, |
| "grad_norm": 0.4645078975290342, |
| "learning_rate": 6.392268548441218e-06, |
| "loss": 0.5356, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.4408992023205221, |
| "grad_norm": 0.4656283146754065, |
| "learning_rate": 6.384589058426201e-06, |
| "loss": 0.5297, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.4413826444283297, |
| "grad_norm": 0.4778447743673107, |
| "learning_rate": 6.3769060292899585e-06, |
| "loss": 0.531, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.4418660865361373, |
| "grad_norm": 0.5067362493290994, |
| "learning_rate": 6.3692194806709326e-06, |
| "loss": 0.5266, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.4423495286439449, |
| "grad_norm": 0.4583050351500669, |
| "learning_rate": 6.36152943221656e-06, |
| "loss": 0.5068, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.4428329707517525, |
| "grad_norm": 0.44674303828563183, |
| "learning_rate": 6.353835903583225e-06, |
| "loss": 0.5135, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.4433164128595601, |
| "grad_norm": 0.4845057402223313, |
| "learning_rate": 6.346138914436207e-06, |
| "loss": 0.53, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.4437998549673677, |
| "grad_norm": 0.46927160898270703, |
| "learning_rate": 6.338438484449632e-06, |
| "loss": 0.5282, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.4442832970751752, |
| "grad_norm": 0.46844884470128584, |
| "learning_rate": 6.330734633306415e-06, |
| "loss": 0.5205, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.4447667391829828, |
| "grad_norm": 0.46793830654029384, |
| "learning_rate": 6.3230273806982254e-06, |
| "loss": 0.5354, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.4452501812907904, |
| "grad_norm": 0.46904655646766624, |
| "learning_rate": 6.31531674632542e-06, |
| "loss": 0.5403, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.445733623398598, |
| "grad_norm": 0.4797829974325548, |
| "learning_rate": 6.307602749897001e-06, |
| "loss": 0.5285, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.4462170655064056, |
| "grad_norm": 0.5126549879385887, |
| "learning_rate": 6.299885411130566e-06, |
| "loss": 0.5293, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.4467005076142132, |
| "grad_norm": 0.47873452298166375, |
| "learning_rate": 6.292164749752256e-06, |
| "loss": 0.5358, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.4471839497220208, |
| "grad_norm": 0.4697820734594197, |
| "learning_rate": 6.284440785496701e-06, |
| "loss": 0.5303, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.4476673918298284, |
| "grad_norm": 0.48049048675766015, |
| "learning_rate": 6.27671353810698e-06, |
| "loss": 0.5384, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.448150833937636, |
| "grad_norm": 0.44829305517954404, |
| "learning_rate": 6.268983027334557e-06, |
| "loss": 0.5349, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.44863427604544354, |
| "grad_norm": 0.47046261605559686, |
| "learning_rate": 6.2612492729392396e-06, |
| "loss": 0.5445, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.44911771815325113, |
| "grad_norm": 0.43943250727373434, |
| "learning_rate": 6.25351229468913e-06, |
| "loss": 0.5243, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.44960116026105873, |
| "grad_norm": 0.457243800658822, |
| "learning_rate": 6.245772112360568e-06, |
| "loss": 0.5335, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.4500846023688663, |
| "grad_norm": 0.4558342802943274, |
| "learning_rate": 6.2380287457380814e-06, |
| "loss": 0.5295, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.4505680444766739, |
| "grad_norm": 0.46490304474994104, |
| "learning_rate": 6.230282214614342e-06, |
| "loss": 0.5277, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.4510514865844815, |
| "grad_norm": 0.46808167357734604, |
| "learning_rate": 6.222532538790107e-06, |
| "loss": 0.5038, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.4515349286922891, |
| "grad_norm": 0.44746189473984266, |
| "learning_rate": 6.214779738074169e-06, |
| "loss": 0.5482, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.4520183708000967, |
| "grad_norm": 0.4749348946440431, |
| "learning_rate": 6.2070238322833165e-06, |
| "loss": 0.5408, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.45250181290790426, |
| "grad_norm": 0.4105832023778957, |
| "learning_rate": 6.199264841242267e-06, |
| "loss": 0.507, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.45298525501571185, |
| "grad_norm": 0.45569820188618954, |
| "learning_rate": 6.191502784783627e-06, |
| "loss": 0.5361, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.45346869712351945, |
| "grad_norm": 0.4487391558411953, |
| "learning_rate": 6.183737682747839e-06, |
| "loss": 0.5404, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.45395213923132705, |
| "grad_norm": 0.38508572249224443, |
| "learning_rate": 6.17596955498313e-06, |
| "loss": 0.475, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.45443558133913464, |
| "grad_norm": 0.47287816273000344, |
| "learning_rate": 6.16819842134546e-06, |
| "loss": 0.5293, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.45491902344694224, |
| "grad_norm": 0.4650031849319042, |
| "learning_rate": 6.160424301698472e-06, |
| "loss": 0.5315, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.45540246555474984, |
| "grad_norm": 0.4613271364601288, |
| "learning_rate": 6.1526472159134454e-06, |
| "loss": 0.5398, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.45588590766255743, |
| "grad_norm": 0.42626048007656836, |
| "learning_rate": 6.1448671838692365e-06, |
| "loss": 0.5246, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.456369349770365, |
| "grad_norm": 0.45709658015590027, |
| "learning_rate": 6.1370842254522325e-06, |
| "loss": 0.5392, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.45685279187817257, |
| "grad_norm": 0.4969316742066114, |
| "learning_rate": 6.129298360556304e-06, |
| "loss": 0.5216, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.45733623398598017, |
| "grad_norm": 0.48874038272824616, |
| "learning_rate": 6.1215096090827485e-06, |
| "loss": 0.5341, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.45781967609378776, |
| "grad_norm": 0.43416511670164404, |
| "learning_rate": 6.1137179909402445e-06, |
| "loss": 0.5357, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.45830311820159536, |
| "grad_norm": 0.4398289245515299, |
| "learning_rate": 6.105923526044794e-06, |
| "loss": 0.5312, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.45878656030940296, |
| "grad_norm": 0.506729531362168, |
| "learning_rate": 6.098126234319679e-06, |
| "loss": 0.5164, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.45927000241721055, |
| "grad_norm": 0.49661020376021475, |
| "learning_rate": 6.0903261356954035e-06, |
| "loss": 0.5406, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.45975344452501815, |
| "grad_norm": 0.49213602324131056, |
| "learning_rate": 6.08252325010965e-06, |
| "loss": 0.5369, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.4602368866328257, |
| "grad_norm": 0.4522403269153531, |
| "learning_rate": 6.074717597507223e-06, |
| "loss": 0.5318, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.4607203287406333, |
| "grad_norm": 0.46334490270574613, |
| "learning_rate": 6.066909197839996e-06, |
| "loss": 0.5053, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.4612037708484409, |
| "grad_norm": 0.4555176211572043, |
| "learning_rate": 6.059098071066874e-06, |
| "loss": 0.5313, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.4616872129562485, |
| "grad_norm": 0.4697386359927019, |
| "learning_rate": 6.051284237153723e-06, |
| "loss": 0.5304, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.4621706550640561, |
| "grad_norm": 0.47606054578122203, |
| "learning_rate": 6.043467716073333e-06, |
| "loss": 0.5392, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.4626540971718637, |
| "grad_norm": 0.4504863880612236, |
| "learning_rate": 6.035648527805359e-06, |
| "loss": 0.5333, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.4631375392796713, |
| "grad_norm": 0.47075391826579777, |
| "learning_rate": 6.0278266923362805e-06, |
| "loss": 0.5331, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.46362098138747887, |
| "grad_norm": 0.4403281566352786, |
| "learning_rate": 6.0200022296593375e-06, |
| "loss": 0.5432, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.46410442349528647, |
| "grad_norm": 0.4572023392519702, |
| "learning_rate": 6.012175159774488e-06, |
| "loss": 0.5323, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.464587865603094, |
| "grad_norm": 0.44773102967818523, |
| "learning_rate": 6.004345502688353e-06, |
| "loss": 0.5299, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.4650713077109016, |
| "grad_norm": 0.4371461582728146, |
| "learning_rate": 5.996513278414166e-06, |
| "loss": 0.5385, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.4655547498187092, |
| "grad_norm": 0.42182149099377253, |
| "learning_rate": 5.988678506971726e-06, |
| "loss": 0.5303, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.4660381919265168, |
| "grad_norm": 0.4594942911793403, |
| "learning_rate": 5.980841208387338e-06, |
| "loss": 0.503, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.4665216340343244, |
| "grad_norm": 0.4501324492723119, |
| "learning_rate": 5.973001402693769e-06, |
| "loss": 0.5253, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.467005076142132, |
| "grad_norm": 0.4373460975849127, |
| "learning_rate": 5.965159109930196e-06, |
| "loss": 0.5386, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.4674885182499396, |
| "grad_norm": 0.4418857621109752, |
| "learning_rate": 5.957314350142149e-06, |
| "loss": 0.529, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.4679719603577472, |
| "grad_norm": 0.485651415733174, |
| "learning_rate": 5.94946714338147e-06, |
| "loss": 0.538, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.4684554024655547, |
| "grad_norm": 0.44827050412474007, |
| "learning_rate": 5.941617509706247e-06, |
| "loss": 0.5333, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.4689388445733623, |
| "grad_norm": 0.44674045160443804, |
| "learning_rate": 5.933765469180779e-06, |
| "loss": 0.5329, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.4694222866811699, |
| "grad_norm": 0.47047904115025807, |
| "learning_rate": 5.925911041875514e-06, |
| "loss": 0.5304, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.4699057287889775, |
| "grad_norm": 0.47618192657266745, |
| "learning_rate": 5.9180542478670025e-06, |
| "loss": 0.5339, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.4703891708967851, |
| "grad_norm": 0.4372302817189476, |
| "learning_rate": 5.910195107237842e-06, |
| "loss": 0.5311, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.4708726130045927, |
| "grad_norm": 0.4599705922186888, |
| "learning_rate": 5.902333640076627e-06, |
| "loss": 0.5294, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.4713560551124003, |
| "grad_norm": 0.4668491954105507, |
| "learning_rate": 5.894469866477905e-06, |
| "loss": 0.5319, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.4718394972202079, |
| "grad_norm": 0.4298713287991765, |
| "learning_rate": 5.886603806542114e-06, |
| "loss": 0.5308, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.47232293932801545, |
| "grad_norm": 0.44120727064086296, |
| "learning_rate": 5.878735480375537e-06, |
| "loss": 0.5271, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.47280638143582304, |
| "grad_norm": 0.45517085979612587, |
| "learning_rate": 5.87086490809025e-06, |
| "loss": 0.5363, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.47328982354363064, |
| "grad_norm": 0.41718667843710106, |
| "learning_rate": 5.862992109804071e-06, |
| "loss": 0.5208, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.47377326565143824, |
| "grad_norm": 0.4698208514148652, |
| "learning_rate": 5.855117105640503e-06, |
| "loss": 0.5045, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.47425670775924583, |
| "grad_norm": 0.44330430106357593, |
| "learning_rate": 5.847239915728695e-06, |
| "loss": 0.5213, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.47474014986705343, |
| "grad_norm": 0.46348151704515983, |
| "learning_rate": 5.839360560203379e-06, |
| "loss": 0.5416, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.475223591974861, |
| "grad_norm": 0.4369134554030048, |
| "learning_rate": 5.831479059204822e-06, |
| "loss": 0.5397, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.4757070340826686, |
| "grad_norm": 0.434332595795343, |
| "learning_rate": 5.823595432878775e-06, |
| "loss": 0.5253, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 0.45066877191979876, |
| "learning_rate": 5.815709701376424e-06, |
| "loss": 0.501, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.47667391829828376, |
| "grad_norm": 0.43680722385313575, |
| "learning_rate": 5.8078218848543326e-06, |
| "loss": 0.5425, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.47715736040609136, |
| "grad_norm": 0.462367177083934, |
| "learning_rate": 5.799932003474398e-06, |
| "loss": 0.5293, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.47764080251389895, |
| "grad_norm": 0.46046005650848004, |
| "learning_rate": 5.7920400774037884e-06, |
| "loss": 0.5273, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.47812424462170655, |
| "grad_norm": 0.4325701614014005, |
| "learning_rate": 5.784146126814909e-06, |
| "loss": 0.523, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.47860768672951415, |
| "grad_norm": 0.42688145845821907, |
| "learning_rate": 5.776250171885329e-06, |
| "loss": 0.5289, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.47909112883732174, |
| "grad_norm": 0.4553909518433745, |
| "learning_rate": 5.768352232797748e-06, |
| "loss": 0.5354, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.47957457094512934, |
| "grad_norm": 0.4612709888611227, |
| "learning_rate": 5.760452329739933e-06, |
| "loss": 0.5346, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.48005801305293694, |
| "grad_norm": 0.4671180697912862, |
| "learning_rate": 5.752550482904674e-06, |
| "loss": 0.5381, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.4805414551607445, |
| "grad_norm": 0.44306247126150655, |
| "learning_rate": 5.744646712489729e-06, |
| "loss": 0.5347, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.4810248972685521, |
| "grad_norm": 0.47845590337101257, |
| "learning_rate": 5.736741038697771e-06, |
| "loss": 0.546, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.4815083393763597, |
| "grad_norm": 0.42271596693767594, |
| "learning_rate": 5.728833481736339e-06, |
| "loss": 0.5189, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.48199178148416727, |
| "grad_norm": 0.4538365308944498, |
| "learning_rate": 5.720924061817786e-06, |
| "loss": 0.5405, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.48247522359197487, |
| "grad_norm": 0.4492861025701022, |
| "learning_rate": 5.71301279915923e-06, |
| "loss": 0.5317, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.48295866569978246, |
| "grad_norm": 0.4745326397538611, |
| "learning_rate": 5.705099713982491e-06, |
| "loss": 0.532, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.48344210780759006, |
| "grad_norm": 0.44214626511814337, |
| "learning_rate": 5.697184826514058e-06, |
| "loss": 0.5305, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.48392554991539766, |
| "grad_norm": 0.46953984488621225, |
| "learning_rate": 5.689268156985015e-06, |
| "loss": 0.5385, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.4844089920232052, |
| "grad_norm": 0.421787366978404, |
| "learning_rate": 5.6813497256310124e-06, |
| "loss": 0.5468, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.4848924341310128, |
| "grad_norm": 0.44994209423666665, |
| "learning_rate": 5.673429552692196e-06, |
| "loss": 0.5259, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.4853758762388204, |
| "grad_norm": 0.4777649344533823, |
| "learning_rate": 5.66550765841317e-06, |
| "loss": 0.5312, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.485859318346628, |
| "grad_norm": 0.4450135880363142, |
| "learning_rate": 5.6575840630429295e-06, |
| "loss": 0.5234, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.4863427604544356, |
| "grad_norm": 0.48478431421225965, |
| "learning_rate": 5.649658786834825e-06, |
| "loss": 0.5337, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.4868262025622432, |
| "grad_norm": 0.4600376201705413, |
| "learning_rate": 5.641731850046503e-06, |
| "loss": 0.5292, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.4873096446700508, |
| "grad_norm": 0.46358685580492737, |
| "learning_rate": 5.633803272939851e-06, |
| "loss": 0.5033, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.4877930867778584, |
| "grad_norm": 0.4424655348417247, |
| "learning_rate": 5.62587307578095e-06, |
| "loss": 0.5199, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.4882765288856659, |
| "grad_norm": 0.44958190643210255, |
| "learning_rate": 5.6179412788400255e-06, |
| "loss": 0.5285, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.4887599709934735, |
| "grad_norm": 0.46154354908053036, |
| "learning_rate": 5.610007902391387e-06, |
| "loss": 0.5302, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.4892434131012811, |
| "grad_norm": 0.4787830389554928, |
| "learning_rate": 5.602072966713389e-06, |
| "loss": 0.5319, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.4897268552090887, |
| "grad_norm": 0.46247126201641375, |
| "learning_rate": 5.594136492088363e-06, |
| "loss": 0.533, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.4902102973168963, |
| "grad_norm": 0.4497969101747785, |
| "learning_rate": 5.586198498802577e-06, |
| "loss": 0.5207, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.4906937394247039, |
| "grad_norm": 0.4711750674532031, |
| "learning_rate": 5.578259007146183e-06, |
| "loss": 0.5182, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.4911771815325115, |
| "grad_norm": 0.44280948751548693, |
| "learning_rate": 5.570318037413162e-06, |
| "loss": 0.5335, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.4916606236403191, |
| "grad_norm": 0.4140593593091692, |
| "learning_rate": 5.562375609901273e-06, |
| "loss": 0.498, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.49214406574812664, |
| "grad_norm": 0.5035482968527856, |
| "learning_rate": 5.5544317449119975e-06, |
| "loss": 0.535, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.49262750785593423, |
| "grad_norm": 0.470430802340302, |
| "learning_rate": 5.546486462750499e-06, |
| "loss": 0.529, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.49311094996374183, |
| "grad_norm": 0.4614608636774156, |
| "learning_rate": 5.538539783725556e-06, |
| "loss": 0.5415, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.4935943920715494, |
| "grad_norm": 0.43439106514800707, |
| "learning_rate": 5.530591728149522e-06, |
| "loss": 0.5237, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.494077834179357, |
| "grad_norm": 0.4532581332185661, |
| "learning_rate": 5.522642316338268e-06, |
| "loss": 0.5275, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.4945612762871646, |
| "grad_norm": 0.44400011265861933, |
| "learning_rate": 5.51469156861113e-06, |
| "loss": 0.5279, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.4950447183949722, |
| "grad_norm": 0.464713276215896, |
| "learning_rate": 5.50673950529086e-06, |
| "loss": 0.5261, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.4955281605027798, |
| "grad_norm": 0.46801515065857147, |
| "learning_rate": 5.498786146703575e-06, |
| "loss": 0.5392, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.4960116026105874, |
| "grad_norm": 0.43826397020984875, |
| "learning_rate": 5.490831513178698e-06, |
| "loss": 0.513, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.49649504471839495, |
| "grad_norm": 0.4545609138302334, |
| "learning_rate": 5.482875625048916e-06, |
| "loss": 0.5342, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.49697848682620255, |
| "grad_norm": 0.45528220157554766, |
| "learning_rate": 5.474918502650116e-06, |
| "loss": 0.5371, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.49746192893401014, |
| "grad_norm": 0.4918245291291218, |
| "learning_rate": 5.466960166321348e-06, |
| "loss": 0.5248, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.49794537104181774, |
| "grad_norm": 0.43603238272665606, |
| "learning_rate": 5.459000636404759e-06, |
| "loss": 0.5236, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.49842881314962534, |
| "grad_norm": 0.43802907063700564, |
| "learning_rate": 5.451039933245551e-06, |
| "loss": 0.5342, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.49891225525743294, |
| "grad_norm": 0.4660487433831574, |
| "learning_rate": 5.44307807719192e-06, |
| "loss": 0.5249, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.49939569736524053, |
| "grad_norm": 0.4349479566561577, |
| "learning_rate": 5.435115088595016e-06, |
| "loss": 0.4997, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.49987913947304813, |
| "grad_norm": 0.43592525172206065, |
| "learning_rate": 5.4271509878088755e-06, |
| "loss": 0.5263, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.5003625815808557, |
| "grad_norm": 0.42380458268711035, |
| "learning_rate": 5.4191857951903825e-06, |
| "loss": 0.503, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.5008460236886633, |
| "grad_norm": 0.47347721204466, |
| "learning_rate": 5.4112195310992144e-06, |
| "loss": 0.5228, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.5013294657964709, |
| "grad_norm": 0.4523581952116975, |
| "learning_rate": 5.403252215897781e-06, |
| "loss": 0.5295, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.5018129079042785, |
| "grad_norm": 0.44718772307460525, |
| "learning_rate": 5.395283869951184e-06, |
| "loss": 0.5402, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.5022963500120861, |
| "grad_norm": 0.42574725470561453, |
| "learning_rate": 5.387314513627156e-06, |
| "loss": 0.5228, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.5027797921198937, |
| "grad_norm": 0.4816073830916942, |
| "learning_rate": 5.379344167296017e-06, |
| "loss": 0.5302, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.5032632342277013, |
| "grad_norm": 0.454535172796951, |
| "learning_rate": 5.371372851330612e-06, |
| "loss": 0.5337, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.5037466763355088, |
| "grad_norm": 0.44304402033518747, |
| "learning_rate": 5.3634005861062675e-06, |
| "loss": 0.5348, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.5042301184433164, |
| "grad_norm": 0.4647077624164304, |
| "learning_rate": 5.355427392000736e-06, |
| "loss": 0.5367, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.504713560551124, |
| "grad_norm": 0.47204921033701974, |
| "learning_rate": 5.347453289394146e-06, |
| "loss": 0.5236, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.5051970026589316, |
| "grad_norm": 0.444269913904137, |
| "learning_rate": 5.339478298668943e-06, |
| "loss": 0.5374, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.5056804447667392, |
| "grad_norm": 0.437749383717649, |
| "learning_rate": 5.331502440209849e-06, |
| "loss": 0.529, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.5061638868745467, |
| "grad_norm": 0.44850371734571, |
| "learning_rate": 5.3235257344037996e-06, |
| "loss": 0.5363, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.5066473289823543, |
| "grad_norm": 0.4687290830806752, |
| "learning_rate": 5.3155482016398995e-06, |
| "loss": 0.5335, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.5071307710901619, |
| "grad_norm": 0.43863314490483407, |
| "learning_rate": 5.307569862309363e-06, |
| "loss": 0.5269, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.5076142131979695, |
| "grad_norm": 0.44499358455418236, |
| "learning_rate": 5.29959073680547e-06, |
| "loss": 0.5269, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.5080976553057771, |
| "grad_norm": 0.4443034689772289, |
| "learning_rate": 5.2916108455235084e-06, |
| "loss": 0.5247, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.5085810974135847, |
| "grad_norm": 0.41196428985814043, |
| "learning_rate": 5.2836302088607235e-06, |
| "loss": 0.5257, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.5090645395213923, |
| "grad_norm": 0.4196111969098687, |
| "learning_rate": 5.275648847216263e-06, |
| "loss": 0.5326, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.5095479816291999, |
| "grad_norm": 0.4396027925039716, |
| "learning_rate": 5.267666780991135e-06, |
| "loss": 0.5384, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.5100314237370075, |
| "grad_norm": 0.4918850008899013, |
| "learning_rate": 5.259684030588141e-06, |
| "loss": 0.5217, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.5105148658448151, |
| "grad_norm": 0.4636011028874602, |
| "learning_rate": 5.251700616411836e-06, |
| "loss": 0.5292, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.5109983079526227, |
| "grad_norm": 0.45080015588791633, |
| "learning_rate": 5.243716558868469e-06, |
| "loss": 0.5335, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.5114817500604303, |
| "grad_norm": 0.4430606358386943, |
| "learning_rate": 5.235731878365935e-06, |
| "loss": 0.5366, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.5119651921682379, |
| "grad_norm": 0.4765633831252005, |
| "learning_rate": 5.22774659531372e-06, |
| "loss": 0.5343, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.5124486342760455, |
| "grad_norm": 0.47759199812886083, |
| "learning_rate": 5.219760730122854e-06, |
| "loss": 0.5318, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.5129320763838531, |
| "grad_norm": 0.4277450357502153, |
| "learning_rate": 5.211774303205849e-06, |
| "loss": 0.5055, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.5134155184916607, |
| "grad_norm": 0.42824551582926373, |
| "learning_rate": 5.203787334976655e-06, |
| "loss": 0.5015, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.5138989605994683, |
| "grad_norm": 0.5041405711353805, |
| "learning_rate": 5.195799845850611e-06, |
| "loss": 0.525, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.5143824027072758, |
| "grad_norm": 0.4306413852615145, |
| "learning_rate": 5.18781185624438e-06, |
| "loss": 0.5265, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.5148658448150834, |
| "grad_norm": 0.4361432944536921, |
| "learning_rate": 5.179823386575908e-06, |
| "loss": 0.5311, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.515349286922891, |
| "grad_norm": 0.4240690760700838, |
| "learning_rate": 5.171834457264364e-06, |
| "loss": 0.5286, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.5158327290306985, |
| "grad_norm": 0.47256746355593565, |
| "learning_rate": 5.1638450887301006e-06, |
| "loss": 0.5282, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.5163161711385061, |
| "grad_norm": 0.4445988854998036, |
| "learning_rate": 5.155855301394585e-06, |
| "loss": 0.527, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.5167996132463137, |
| "grad_norm": 0.46405127876443125, |
| "learning_rate": 5.147865115680357e-06, |
| "loss": 0.5289, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.5172830553541213, |
| "grad_norm": 0.45801111107179227, |
| "learning_rate": 5.139874552010975e-06, |
| "loss": 0.531, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.5177664974619289, |
| "grad_norm": 0.454345233725776, |
| "learning_rate": 5.131883630810966e-06, |
| "loss": 0.5428, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.5182499395697365, |
| "grad_norm": 0.4649418568564353, |
| "learning_rate": 5.123892372505768e-06, |
| "loss": 0.524, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.5187333816775441, |
| "grad_norm": 0.4609921972037312, |
| "learning_rate": 5.11590079752168e-06, |
| "loss": 0.5337, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.5192168237853517, |
| "grad_norm": 0.4257268056499296, |
| "learning_rate": 5.107908926285813e-06, |
| "loss": 0.5247, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.5197002658931593, |
| "grad_norm": 0.4277925159892485, |
| "learning_rate": 5.099916779226032e-06, |
| "loss": 0.5314, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.5201837080009669, |
| "grad_norm": 0.46374173796570095, |
| "learning_rate": 5.091924376770912e-06, |
| "loss": 0.5267, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.5206671501087745, |
| "grad_norm": 0.46413399868377414, |
| "learning_rate": 5.083931739349675e-06, |
| "loss": 0.5227, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.5211505922165821, |
| "grad_norm": 0.42805047816232233, |
| "learning_rate": 5.075938887392149e-06, |
| "loss": 0.5148, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.5216340343243897, |
| "grad_norm": 0.42348168283938675, |
| "learning_rate": 5.0679458413287055e-06, |
| "loss": 0.5168, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.5221174764321972, |
| "grad_norm": 0.48349127354280697, |
| "learning_rate": 5.059952621590216e-06, |
| "loss": 0.5274, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.5226009185400048, |
| "grad_norm": 0.447828940551305, |
| "learning_rate": 5.051959248607993e-06, |
| "loss": 0.5251, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.5230843606478124, |
| "grad_norm": 0.45995219297674483, |
| "learning_rate": 5.043965742813744e-06, |
| "loss": 0.5246, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.52356780275562, |
| "grad_norm": 0.4744443087812869, |
| "learning_rate": 5.035972124639511e-06, |
| "loss": 0.5299, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.5240512448634276, |
| "grad_norm": 0.4140590259366713, |
| "learning_rate": 5.02797841451763e-06, |
| "loss": 0.5273, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.5245346869712352, |
| "grad_norm": 0.4421687910150297, |
| "learning_rate": 5.019984632880665e-06, |
| "loss": 0.5342, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.5250181290790428, |
| "grad_norm": 0.4665180180836343, |
| "learning_rate": 5.011990800161369e-06, |
| "loss": 0.5314, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.5255015711868504, |
| "grad_norm": 0.45569707259235365, |
| "learning_rate": 5.00399693679262e-06, |
| "loss": 0.5291, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.525985013294658, |
| "grad_norm": 0.3966652463954235, |
| "learning_rate": 4.9960030632073815e-06, |
| "loss": 0.4852, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.5264684554024656, |
| "grad_norm": 0.4156779611396039, |
| "learning_rate": 4.988009199838632e-06, |
| "loss": 0.5266, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.5269518975102732, |
| "grad_norm": 0.4459927015276638, |
| "learning_rate": 4.980015367119336e-06, |
| "loss": 0.5128, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.5274353396180808, |
| "grad_norm": 0.4470940459251613, |
| "learning_rate": 4.9720215854823716e-06, |
| "loss": 0.5215, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.5279187817258884, |
| "grad_norm": 0.4379040861596386, |
| "learning_rate": 4.96402787536049e-06, |
| "loss": 0.529, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.528402223833696, |
| "grad_norm": 0.42846117019918506, |
| "learning_rate": 4.956034257186258e-06, |
| "loss": 0.5196, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.5288856659415035, |
| "grad_norm": 0.45571668646782787, |
| "learning_rate": 4.9480407513920086e-06, |
| "loss": 0.527, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.5293691080493111, |
| "grad_norm": 0.4680208745726276, |
| "learning_rate": 4.940047378409786e-06, |
| "loss": 0.523, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.5298525501571187, |
| "grad_norm": 0.44029959009092084, |
| "learning_rate": 4.932054158671295e-06, |
| "loss": 0.5244, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.5303359922649262, |
| "grad_norm": 0.45471058687459337, |
| "learning_rate": 4.924061112607853e-06, |
| "loss": 0.532, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.5308194343727338, |
| "grad_norm": 0.43297324457453135, |
| "learning_rate": 4.9160682606503255e-06, |
| "loss": 0.5226, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.5313028764805414, |
| "grad_norm": 0.4269816974887771, |
| "learning_rate": 4.908075623229089e-06, |
| "loss": 0.5242, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.531786318588349, |
| "grad_norm": 0.4280455143246443, |
| "learning_rate": 4.900083220773968e-06, |
| "loss": 0.5082, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5322697606961566, |
| "grad_norm": 0.4422972364197889, |
| "learning_rate": 4.892091073714189e-06, |
| "loss": 0.5162, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.5327532028039642, |
| "grad_norm": 0.44177794728378245, |
| "learning_rate": 4.88409920247832e-06, |
| "loss": 0.5287, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.5332366449117718, |
| "grad_norm": 0.4384466281073893, |
| "learning_rate": 4.876107627494234e-06, |
| "loss": 0.5224, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.5337200870195794, |
| "grad_norm": 0.42791066381831555, |
| "learning_rate": 4.868116369189033e-06, |
| "loss": 0.519, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.534203529127387, |
| "grad_norm": 0.43287603203122865, |
| "learning_rate": 4.860125447989026e-06, |
| "loss": 0.5315, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.5346869712351946, |
| "grad_norm": 0.40835340581900775, |
| "learning_rate": 4.852134884319646e-06, |
| "loss": 0.5013, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.5351704133430022, |
| "grad_norm": 0.4564877621647975, |
| "learning_rate": 4.844144698605418e-06, |
| "loss": 0.5163, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.5356538554508098, |
| "grad_norm": 0.4350849446506223, |
| "learning_rate": 4.836154911269902e-06, |
| "loss": 0.5216, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.5361372975586174, |
| "grad_norm": 0.45777989132001196, |
| "learning_rate": 4.8281655427356375e-06, |
| "loss": 0.5248, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.536620739666425, |
| "grad_norm": 0.463821530722432, |
| "learning_rate": 4.820176613424095e-06, |
| "loss": 0.5302, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.5371041817742326, |
| "grad_norm": 0.44517887708797427, |
| "learning_rate": 4.812188143755621e-06, |
| "loss": 0.5214, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.5375876238820402, |
| "grad_norm": 0.5017058167871686, |
| "learning_rate": 4.80420015414939e-06, |
| "loss": 0.5305, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.5380710659898477, |
| "grad_norm": 0.4672768776486219, |
| "learning_rate": 4.796212665023345e-06, |
| "loss": 0.5237, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.5385545080976553, |
| "grad_norm": 0.4419995316595335, |
| "learning_rate": 4.788225696794153e-06, |
| "loss": 0.5277, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.5390379502054629, |
| "grad_norm": 0.44730511943868706, |
| "learning_rate": 4.780239269877147e-06, |
| "loss": 0.5313, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.5395213923132705, |
| "grad_norm": 0.45204850110236905, |
| "learning_rate": 4.7722534046862805e-06, |
| "loss": 0.5231, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.540004834421078, |
| "grad_norm": 0.45779960123303604, |
| "learning_rate": 4.764268121634066e-06, |
| "loss": 0.507, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.5404882765288856, |
| "grad_norm": 0.41695878051285573, |
| "learning_rate": 4.7562834411315324e-06, |
| "loss": 0.5042, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.5409717186366932, |
| "grad_norm": 0.4513235919318933, |
| "learning_rate": 4.748299383588167e-06, |
| "loss": 0.5258, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.5414551607445008, |
| "grad_norm": 0.467590473541893, |
| "learning_rate": 4.74031596941186e-06, |
| "loss": 0.5375, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5419386028523084, |
| "grad_norm": 0.4524946558422428, |
| "learning_rate": 4.7323332190088675e-06, |
| "loss": 0.5199, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.542422044960116, |
| "grad_norm": 0.42113263739650647, |
| "learning_rate": 4.7243511527837374e-06, |
| "loss": 0.5251, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.5429054870679236, |
| "grad_norm": 0.4636225245244688, |
| "learning_rate": 4.716369791139279e-06, |
| "loss": 0.5308, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.5433889291757312, |
| "grad_norm": 0.4398566927754983, |
| "learning_rate": 4.708389154476492e-06, |
| "loss": 0.5201, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.5438723712835388, |
| "grad_norm": 0.43330211663967066, |
| "learning_rate": 4.7004092631945315e-06, |
| "loss": 0.5258, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.5443558133913464, |
| "grad_norm": 0.44482177202458967, |
| "learning_rate": 4.692430137690638e-06, |
| "loss": 0.5222, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.544839255499154, |
| "grad_norm": 0.4318024796342877, |
| "learning_rate": 4.684451798360102e-06, |
| "loss": 0.5204, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.5453226976069616, |
| "grad_norm": 0.45444414518744425, |
| "learning_rate": 4.6764742655962e-06, |
| "loss": 0.5255, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.5458061397147692, |
| "grad_norm": 0.4372839038666406, |
| "learning_rate": 4.6684975597901526e-06, |
| "loss": 0.5275, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.5462895818225767, |
| "grad_norm": 0.43767287064021165, |
| "learning_rate": 4.660521701331058e-06, |
| "loss": 0.5046, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5467730239303843, |
| "grad_norm": 0.42146624184063447, |
| "learning_rate": 4.652546710605857e-06, |
| "loss": 0.5284, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.5472564660381919, |
| "grad_norm": 0.4528806888549099, |
| "learning_rate": 4.644572607999267e-06, |
| "loss": 0.5234, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.5477399081459995, |
| "grad_norm": 0.41615032339392954, |
| "learning_rate": 4.636599413893734e-06, |
| "loss": 0.5149, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.5482233502538071, |
| "grad_norm": 0.4080780400252472, |
| "learning_rate": 4.628627148669391e-06, |
| "loss": 0.5069, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.5487067923616147, |
| "grad_norm": 0.4376373218589361, |
| "learning_rate": 4.620655832703984e-06, |
| "loss": 0.5232, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.5491902344694223, |
| "grad_norm": 0.44817086720050736, |
| "learning_rate": 4.612685486372846e-06, |
| "loss": 0.5284, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.5496736765772299, |
| "grad_norm": 0.4269717422470433, |
| "learning_rate": 4.604716130048818e-06, |
| "loss": 0.5292, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.5501571186850375, |
| "grad_norm": 0.4463889311004707, |
| "learning_rate": 4.596747784102221e-06, |
| "loss": 0.5296, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.5506405607928451, |
| "grad_norm": 0.4544805899306494, |
| "learning_rate": 4.588780468900787e-06, |
| "loss": 0.5342, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.5511240029006527, |
| "grad_norm": 0.43223974279832084, |
| "learning_rate": 4.580814204809618e-06, |
| "loss": 0.5278, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5516074450084603, |
| "grad_norm": 0.4526101073084036, |
| "learning_rate": 4.572849012191126e-06, |
| "loss": 0.5274, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.5520908871162679, |
| "grad_norm": 0.46135315991278786, |
| "learning_rate": 4.564884911404986e-06, |
| "loss": 0.5308, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.5525743292240755, |
| "grad_norm": 0.451744932774668, |
| "learning_rate": 4.5569219228080805e-06, |
| "loss": 0.5228, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.553057771331883, |
| "grad_norm": 0.4431407162306295, |
| "learning_rate": 4.54896006675445e-06, |
| "loss": 0.5001, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.5535412134396906, |
| "grad_norm": 0.4115173802208087, |
| "learning_rate": 4.540999363595242e-06, |
| "loss": 0.4963, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.5540246555474981, |
| "grad_norm": 0.418962800898634, |
| "learning_rate": 4.5330398336786526e-06, |
| "loss": 0.5277, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.5545080976553057, |
| "grad_norm": 0.46372386433812574, |
| "learning_rate": 4.525081497349887e-06, |
| "loss": 0.5427, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.5549915397631133, |
| "grad_norm": 0.4530031866173555, |
| "learning_rate": 4.517124374951086e-06, |
| "loss": 0.5178, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.5554749818709209, |
| "grad_norm": 0.440822304311767, |
| "learning_rate": 4.509168486821304e-06, |
| "loss": 0.5225, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.5559584239787285, |
| "grad_norm": 0.4127961848352273, |
| "learning_rate": 4.501213853296425e-06, |
| "loss": 0.523, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.5564418660865361, |
| "grad_norm": 0.461288293810827, |
| "learning_rate": 4.493260494709141e-06, |
| "loss": 0.5251, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.5569253081943437, |
| "grad_norm": 0.43458067348060225, |
| "learning_rate": 4.48530843138887e-06, |
| "loss": 0.5419, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.5574087503021513, |
| "grad_norm": 0.42472609884541546, |
| "learning_rate": 4.477357683661734e-06, |
| "loss": 0.5318, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.5578921924099589, |
| "grad_norm": 0.4256107384490566, |
| "learning_rate": 4.469408271850479e-06, |
| "loss": 0.506, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.5583756345177665, |
| "grad_norm": 0.4098807197744025, |
| "learning_rate": 4.4614602162744455e-06, |
| "loss": 0.5152, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.5588590766255741, |
| "grad_norm": 0.4390148331821114, |
| "learning_rate": 4.453513537249503e-06, |
| "loss": 0.527, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.5593425187333817, |
| "grad_norm": 0.4711204869094555, |
| "learning_rate": 4.445568255088003e-06, |
| "loss": 0.5247, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.5598259608411893, |
| "grad_norm": 0.41653734784347574, |
| "learning_rate": 4.4376243900987296e-06, |
| "loss": 0.5232, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.5603094029489969, |
| "grad_norm": 0.44411194102788987, |
| "learning_rate": 4.429681962586839e-06, |
| "loss": 0.5365, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.5607928450568045, |
| "grad_norm": 0.44003715852304276, |
| "learning_rate": 4.421740992853818e-06, |
| "loss": 0.5311, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.5612762871646121, |
| "grad_norm": 0.43445702749764165, |
| "learning_rate": 4.413801501197424e-06, |
| "loss": 0.5192, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.5617597292724196, |
| "grad_norm": 0.4315566299087621, |
| "learning_rate": 4.405863507911638e-06, |
| "loss": 0.5007, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.5622431713802272, |
| "grad_norm": 0.3786991552102392, |
| "learning_rate": 4.3979270332866105e-06, |
| "loss": 0.4867, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.5627266134880348, |
| "grad_norm": 0.4244074214223604, |
| "learning_rate": 4.389992097608613e-06, |
| "loss": 0.5271, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.5632100555958424, |
| "grad_norm": 0.430673286759424, |
| "learning_rate": 4.3820587211599745e-06, |
| "loss": 0.5292, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.56369349770365, |
| "grad_norm": 0.46109079230422667, |
| "learning_rate": 4.374126924219052e-06, |
| "loss": 0.5197, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.5641769398114576, |
| "grad_norm": 0.4275407630254517, |
| "learning_rate": 4.366196727060152e-06, |
| "loss": 0.5196, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.5646603819192652, |
| "grad_norm": 0.4407289737300367, |
| "learning_rate": 4.3582681499535e-06, |
| "loss": 0.5206, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.5651438240270727, |
| "grad_norm": 0.4119248969357382, |
| "learning_rate": 4.3503412131651765e-06, |
| "loss": 0.5341, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.5656272661348803, |
| "grad_norm": 0.4621359921349015, |
| "learning_rate": 4.342415936957073e-06, |
| "loss": 0.5225, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.5661107082426879, |
| "grad_norm": 0.45885127710831636, |
| "learning_rate": 4.334492341586833e-06, |
| "loss": 0.5328, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.5665941503504955, |
| "grad_norm": 0.4217251248929531, |
| "learning_rate": 4.326570447307804e-06, |
| "loss": 0.5024, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.5670775924583031, |
| "grad_norm": 0.43349297844561585, |
| "learning_rate": 4.318650274368989e-06, |
| "loss": 0.5302, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.5675610345661107, |
| "grad_norm": 0.44173213520395654, |
| "learning_rate": 4.310731843014985e-06, |
| "loss": 0.518, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.5680444766739183, |
| "grad_norm": 0.44550331806578247, |
| "learning_rate": 4.302815173485944e-06, |
| "loss": 0.5262, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.5685279187817259, |
| "grad_norm": 0.41790214183276025, |
| "learning_rate": 4.294900286017509e-06, |
| "loss": 0.5249, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.5690113608895335, |
| "grad_norm": 0.4406462454965783, |
| "learning_rate": 4.286987200840772e-06, |
| "loss": 0.5399, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.5694948029973411, |
| "grad_norm": 0.42899916006281247, |
| "learning_rate": 4.279075938182214e-06, |
| "loss": 0.522, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.5699782451051486, |
| "grad_norm": 0.39425395091405524, |
| "learning_rate": 4.271166518263662e-06, |
| "loss": 0.4916, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.5704616872129562, |
| "grad_norm": 0.4325306883401716, |
| "learning_rate": 4.263258961302232e-06, |
| "loss": 0.5297, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5709451293207638, |
| "grad_norm": 0.42524338171666753, |
| "learning_rate": 4.255353287510272e-06, |
| "loss": 0.524, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.44279478264935357, |
| "learning_rate": 4.247449517095329e-06, |
| "loss": 0.5215, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.571912013536379, |
| "grad_norm": 0.4197931530174695, |
| "learning_rate": 4.239547670260069e-06, |
| "loss": 0.5099, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.5723954556441866, |
| "grad_norm": 0.4310563818067479, |
| "learning_rate": 4.231647767202254e-06, |
| "loss": 0.5191, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.5728788977519942, |
| "grad_norm": 0.42356166524796646, |
| "learning_rate": 4.223749828114672e-06, |
| "loss": 0.528, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.5733623398598018, |
| "grad_norm": 0.4095445643557658, |
| "learning_rate": 4.215853873185093e-06, |
| "loss": 0.4862, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.5738457819676094, |
| "grad_norm": 0.47015761976525233, |
| "learning_rate": 4.2079599225962115e-06, |
| "loss": 0.5183, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.574329224075417, |
| "grad_norm": 0.4175313339147143, |
| "learning_rate": 4.2000679965256045e-06, |
| "loss": 0.5045, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.5748126661832246, |
| "grad_norm": 0.43952093100735207, |
| "learning_rate": 4.192178115145668e-06, |
| "loss": 0.4942, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.5752961082910322, |
| "grad_norm": 0.41782905847478197, |
| "learning_rate": 4.184290298623578e-06, |
| "loss": 0.4962, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.5757795503988398, |
| "grad_norm": 0.4321674737064156, |
| "learning_rate": 4.176404567121225e-06, |
| "loss": 0.5397, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.5762629925066474, |
| "grad_norm": 0.4802929727845466, |
| "learning_rate": 4.16852094079518e-06, |
| "loss": 0.5249, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.576746434614455, |
| "grad_norm": 0.4829071925445664, |
| "learning_rate": 4.160639439796624e-06, |
| "loss": 0.5192, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.5772298767222626, |
| "grad_norm": 0.41462388567526964, |
| "learning_rate": 4.152760084271305e-06, |
| "loss": 0.5224, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.57771331883007, |
| "grad_norm": 0.41336785715866026, |
| "learning_rate": 4.1448828943595e-06, |
| "loss": 0.531, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.5781967609378776, |
| "grad_norm": 0.4424735299028098, |
| "learning_rate": 4.1370078901959306e-06, |
| "loss": 0.5309, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.5786802030456852, |
| "grad_norm": 0.43755728178912967, |
| "learning_rate": 4.129135091909752e-06, |
| "loss": 0.5314, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.5791636451534928, |
| "grad_norm": 0.4234445834897067, |
| "learning_rate": 4.121264519624463e-06, |
| "loss": 0.4971, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.5796470872613004, |
| "grad_norm": 0.4465261505400547, |
| "learning_rate": 4.113396193457887e-06, |
| "loss": 0.5421, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.580130529369108, |
| "grad_norm": 0.43864096822609294, |
| "learning_rate": 4.105530133522096e-06, |
| "loss": 0.5286, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5806139714769156, |
| "grad_norm": 0.44812939972631666, |
| "learning_rate": 4.0976663599233745e-06, |
| "loss": 0.5283, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.5810974135847232, |
| "grad_norm": 0.41015688502886277, |
| "learning_rate": 4.08980489276216e-06, |
| "loss": 0.5031, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.5815808556925308, |
| "grad_norm": 0.4371592707730246, |
| "learning_rate": 4.081945752133e-06, |
| "loss": 0.5247, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.5820642978003384, |
| "grad_norm": 0.4545452075633195, |
| "learning_rate": 4.074088958124488e-06, |
| "loss": 0.5233, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.582547739908146, |
| "grad_norm": 0.4034341944937049, |
| "learning_rate": 4.066234530819222e-06, |
| "loss": 0.4997, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.5830311820159536, |
| "grad_norm": 0.44158682076629885, |
| "learning_rate": 4.058382490293755e-06, |
| "loss": 0.5327, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.5835146241237612, |
| "grad_norm": 0.44126859021633347, |
| "learning_rate": 4.050532856618532e-06, |
| "loss": 0.5172, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.5839980662315688, |
| "grad_norm": 0.44983862546632325, |
| "learning_rate": 4.0426856498578515e-06, |
| "loss": 0.5321, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.5844815083393764, |
| "grad_norm": 0.4306823960936312, |
| "learning_rate": 4.034840890069805e-06, |
| "loss": 0.5286, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.584964950447184, |
| "grad_norm": 0.46647403495000134, |
| "learning_rate": 4.0269985973062325e-06, |
| "loss": 0.5205, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.5854483925549916, |
| "grad_norm": 0.43512362259260234, |
| "learning_rate": 4.019158791612662e-06, |
| "loss": 0.5186, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.5859318346627991, |
| "grad_norm": 0.42474616162914414, |
| "learning_rate": 4.0113214930282765e-06, |
| "loss": 0.5019, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.5864152767706067, |
| "grad_norm": 0.4496255266024326, |
| "learning_rate": 4.003486721585834e-06, |
| "loss": 0.5292, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.5868987188784143, |
| "grad_norm": 0.4692064186214037, |
| "learning_rate": 3.995654497311649e-06, |
| "loss": 0.524, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.5873821609862219, |
| "grad_norm": 0.47446797627391324, |
| "learning_rate": 3.987824840225512e-06, |
| "loss": 0.5296, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.5878656030940295, |
| "grad_norm": 0.45351628397562704, |
| "learning_rate": 3.979997770340664e-06, |
| "loss": 0.5191, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.5883490452018371, |
| "grad_norm": 0.4469658212338775, |
| "learning_rate": 3.972173307663721e-06, |
| "loss": 0.5259, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.5888324873096447, |
| "grad_norm": 0.42106841746243373, |
| "learning_rate": 3.964351472194642e-06, |
| "loss": 0.5169, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.5893159294174523, |
| "grad_norm": 0.46081869250650287, |
| "learning_rate": 3.95653228392667e-06, |
| "loss": 0.5324, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.5897993715252599, |
| "grad_norm": 0.43787433991965447, |
| "learning_rate": 3.9487157628462784e-06, |
| "loss": 0.5253, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.5902828136330674, |
| "grad_norm": 0.4446710684339124, |
| "learning_rate": 3.940901928933127e-06, |
| "loss": 0.5207, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.590766255740875, |
| "grad_norm": 0.4878921355151572, |
| "learning_rate": 3.933090802160004e-06, |
| "loss": 0.5216, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.5912496978486826, |
| "grad_norm": 0.43014793362728476, |
| "learning_rate": 3.925282402492779e-06, |
| "loss": 0.5158, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.5917331399564902, |
| "grad_norm": 0.4785437280134671, |
| "learning_rate": 3.917476749890351e-06, |
| "loss": 0.5337, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.5922165820642978, |
| "grad_norm": 0.4374190045344671, |
| "learning_rate": 3.909673864304597e-06, |
| "loss": 0.528, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.5927000241721054, |
| "grad_norm": 0.4547685840805692, |
| "learning_rate": 3.901873765680322e-06, |
| "loss": 0.5366, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.593183466279913, |
| "grad_norm": 0.4594395711536057, |
| "learning_rate": 3.894076473955207e-06, |
| "loss": 0.5202, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.5936669083877205, |
| "grad_norm": 0.5197830445285274, |
| "learning_rate": 3.886282009059757e-06, |
| "loss": 0.5293, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.5941503504955281, |
| "grad_norm": 0.438870970435199, |
| "learning_rate": 3.878490390917253e-06, |
| "loss": 0.521, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.5946337926033357, |
| "grad_norm": 0.43955571970066076, |
| "learning_rate": 3.8707016394436985e-06, |
| "loss": 0.5219, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.5951172347111433, |
| "grad_norm": 0.4319400896121196, |
| "learning_rate": 3.86291577454777e-06, |
| "loss": 0.5274, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.5956006768189509, |
| "grad_norm": 0.42866945423374303, |
| "learning_rate": 3.855132816130767e-06, |
| "loss": 0.5287, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.5960841189267585, |
| "grad_norm": 0.4905566924544953, |
| "learning_rate": 3.847352784086556e-06, |
| "loss": 0.5214, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.5965675610345661, |
| "grad_norm": 0.4815238783873125, |
| "learning_rate": 3.839575698301529e-06, |
| "loss": 0.5348, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.5970510031423737, |
| "grad_norm": 0.4611545386580029, |
| "learning_rate": 3.831801578654541e-06, |
| "loss": 0.521, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.5975344452501813, |
| "grad_norm": 0.44138236799554953, |
| "learning_rate": 3.8240304450168716e-06, |
| "loss": 0.5012, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.5980178873579889, |
| "grad_norm": 0.4729557809805851, |
| "learning_rate": 3.8162623172521615e-06, |
| "loss": 0.5239, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.5985013294657965, |
| "grad_norm": 0.47751713396917145, |
| "learning_rate": 3.808497215216374e-06, |
| "loss": 0.518, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.5989847715736041, |
| "grad_norm": 0.4632727328684821, |
| "learning_rate": 3.8007351587577342e-06, |
| "loss": 0.5212, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.5994682136814117, |
| "grad_norm": 0.43452448731632676, |
| "learning_rate": 3.7929761677166847e-06, |
| "loss": 0.5256, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.5999516557892193, |
| "grad_norm": 0.41707858133064807, |
| "learning_rate": 3.7852202619258327e-06, |
| "loss": 0.5258, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.6004350978970269, |
| "grad_norm": 0.42484738876263983, |
| "learning_rate": 3.777467461209895e-06, |
| "loss": 0.5226, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.6009185400048345, |
| "grad_norm": 0.46278022630826876, |
| "learning_rate": 3.76971778538566e-06, |
| "loss": 0.5265, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.6014019821126421, |
| "grad_norm": 0.4136178806145892, |
| "learning_rate": 3.76197125426192e-06, |
| "loss": 0.521, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.6018854242204495, |
| "grad_norm": 0.42100500706131366, |
| "learning_rate": 3.754227887639434e-06, |
| "loss": 0.5119, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.6023688663282571, |
| "grad_norm": 0.4123940096542578, |
| "learning_rate": 3.7464877053108706e-06, |
| "loss": 0.5258, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.6028523084360647, |
| "grad_norm": 0.4195138029502561, |
| "learning_rate": 3.7387507270607617e-06, |
| "loss": 0.529, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.6033357505438723, |
| "grad_norm": 0.45482061749805036, |
| "learning_rate": 3.7310169726654444e-06, |
| "loss": 0.528, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.6038191926516799, |
| "grad_norm": 0.4371974850319641, |
| "learning_rate": 3.7232864618930217e-06, |
| "loss": 0.5182, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.6043026347594875, |
| "grad_norm": 0.43099991632770085, |
| "learning_rate": 3.715559214503298e-06, |
| "loss": 0.5133, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.6047860768672951, |
| "grad_norm": 0.4048318396541149, |
| "learning_rate": 3.707835250247745e-06, |
| "loss": 0.4877, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.6052695189751027, |
| "grad_norm": 0.43040118158566426, |
| "learning_rate": 3.7001145888694335e-06, |
| "loss": 0.5256, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.6057529610829103, |
| "grad_norm": 0.43151561170744174, |
| "learning_rate": 3.6923972501029996e-06, |
| "loss": 0.5028, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.6062364031907179, |
| "grad_norm": 0.4168861988285146, |
| "learning_rate": 3.684683253674583e-06, |
| "loss": 0.5249, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.6067198452985255, |
| "grad_norm": 0.4341349343804055, |
| "learning_rate": 3.676972619301776e-06, |
| "loss": 0.5119, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.6072032874063331, |
| "grad_norm": 0.45716952276328937, |
| "learning_rate": 3.6692653666935875e-06, |
| "loss": 0.5262, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.6076867295141407, |
| "grad_norm": 0.4304219021067875, |
| "learning_rate": 3.6615615155503703e-06, |
| "loss": 0.5168, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.6081701716219483, |
| "grad_norm": 0.4098861167543977, |
| "learning_rate": 3.6538610855637953e-06, |
| "loss": 0.5193, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.6086536137297559, |
| "grad_norm": 0.45344600794993284, |
| "learning_rate": 3.6461640964167755e-06, |
| "loss": 0.5213, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.6091370558375635, |
| "grad_norm": 0.408785994340013, |
| "learning_rate": 3.638470567783442e-06, |
| "loss": 0.4982, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.609620497945371, |
| "grad_norm": 0.4481815917202834, |
| "learning_rate": 3.630780519329069e-06, |
| "loss": 0.5329, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.6101039400531786, |
| "grad_norm": 0.4271346926738626, |
| "learning_rate": 3.623093970710043e-06, |
| "loss": 0.5278, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.6105873821609862, |
| "grad_norm": 0.4443519643294819, |
| "learning_rate": 3.615410941573799e-06, |
| "loss": 0.5358, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.6110708242687938, |
| "grad_norm": 0.41093410035963396, |
| "learning_rate": 3.607731451558783e-06, |
| "loss": 0.4973, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.6115542663766014, |
| "grad_norm": 0.4526917508007144, |
| "learning_rate": 3.6000555202943872e-06, |
| "loss": 0.5223, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.612037708484409, |
| "grad_norm": 0.458600770079082, |
| "learning_rate": 3.59238316740091e-06, |
| "loss": 0.5206, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.6125211505922166, |
| "grad_norm": 0.4390522720994981, |
| "learning_rate": 3.584714412489506e-06, |
| "loss": 0.5306, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.6130045927000242, |
| "grad_norm": 0.41799452227515504, |
| "learning_rate": 3.5770492751621292e-06, |
| "loss": 0.5029, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.6134880348078318, |
| "grad_norm": 0.43636835439611227, |
| "learning_rate": 3.5693877750114903e-06, |
| "loss": 0.5167, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.6139714769156394, |
| "grad_norm": 0.43211619164840076, |
| "learning_rate": 3.5617299316209984e-06, |
| "loss": 0.5049, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.614454919023447, |
| "grad_norm": 0.42547953690955836, |
| "learning_rate": 3.5540757645647217e-06, |
| "loss": 0.4939, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.6149383611312546, |
| "grad_norm": 0.4170182860561763, |
| "learning_rate": 3.546425293407324e-06, |
| "loss": 0.5199, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.6154218032390621, |
| "grad_norm": 0.4222424350681242, |
| "learning_rate": 3.5387785377040316e-06, |
| "loss": 0.5132, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.6159052453468697, |
| "grad_norm": 0.4552864030500758, |
| "learning_rate": 3.531135517000561e-06, |
| "loss": 0.5269, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.6163886874546773, |
| "grad_norm": 0.43855551812482985, |
| "learning_rate": 3.523496250833098e-06, |
| "loss": 0.5122, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.6168721295624849, |
| "grad_norm": 0.46449768544610603, |
| "learning_rate": 3.515860758728214e-06, |
| "loss": 0.5234, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.6173555716702925, |
| "grad_norm": 0.46363731713711515, |
| "learning_rate": 3.5082290602028492e-06, |
| "loss": 0.5269, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.6178390137781, |
| "grad_norm": 0.42830833200680596, |
| "learning_rate": 3.5006011747642366e-06, |
| "loss": 0.5177, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.6183224558859076, |
| "grad_norm": 0.45403297568672957, |
| "learning_rate": 3.492977121909865e-06, |
| "loss": 0.5329, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.6188058979937152, |
| "grad_norm": 0.4707193517755616, |
| "learning_rate": 3.4853569211274306e-06, |
| "loss": 0.5275, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.6192893401015228, |
| "grad_norm": 0.4382585499371175, |
| "learning_rate": 3.4777405918947795e-06, |
| "loss": 0.5117, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.6197727822093304, |
| "grad_norm": 0.43247243116721396, |
| "learning_rate": 3.4701281536798638e-06, |
| "loss": 0.5274, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.620256224317138, |
| "grad_norm": 0.4457194202455219, |
| "learning_rate": 3.462519625940688e-06, |
| "loss": 0.5282, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.6207396664249456, |
| "grad_norm": 0.45208181214242377, |
| "learning_rate": 3.4549150281252635e-06, |
| "loss": 0.5224, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.6212231085327532, |
| "grad_norm": 0.43419084686544124, |
| "learning_rate": 3.4473143796715537e-06, |
| "loss": 0.5221, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.6217065506405608, |
| "grad_norm": 0.45834613229050314, |
| "learning_rate": 3.4397177000074307e-06, |
| "loss": 0.5286, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.6221899927483684, |
| "grad_norm": 0.4551713196802731, |
| "learning_rate": 3.4321250085506174e-06, |
| "loss": 0.519, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.622673434856176, |
| "grad_norm": 0.45716705644935435, |
| "learning_rate": 3.4245363247086477e-06, |
| "loss": 0.5291, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.6231568769639836, |
| "grad_norm": 0.4214582611290155, |
| "learning_rate": 3.4169516678788096e-06, |
| "loss": 0.5084, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.6236403190717912, |
| "grad_norm": 0.43304150590207136, |
| "learning_rate": 3.4093710574480926e-06, |
| "loss": 0.5181, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.6241237611795988, |
| "grad_norm": 0.4201723521255349, |
| "learning_rate": 3.4017945127931517e-06, |
| "loss": 0.5215, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.6246072032874064, |
| "grad_norm": 0.43513341637312203, |
| "learning_rate": 3.394222053280245e-06, |
| "loss": 0.5219, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.625090645395214, |
| "grad_norm": 0.41778354176218346, |
| "learning_rate": 3.386653698265189e-06, |
| "loss": 0.5295, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.6255740875030215, |
| "grad_norm": 0.4289158469228602, |
| "learning_rate": 3.3790894670933096e-06, |
| "loss": 0.4993, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.626057529610829, |
| "grad_norm": 0.4362452363722638, |
| "learning_rate": 3.3715293790993906e-06, |
| "loss": 0.5212, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.6265409717186367, |
| "grad_norm": 0.4287020140556269, |
| "learning_rate": 3.3639734536076263e-06, |
| "loss": 0.5145, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.6270244138264442, |
| "grad_norm": 0.4371619386426176, |
| "learning_rate": 3.356421709931573e-06, |
| "loss": 0.5263, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.6275078559342518, |
| "grad_norm": 0.4558196176944881, |
| "learning_rate": 3.348874167374093e-06, |
| "loss": 0.5193, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.6279912980420594, |
| "grad_norm": 0.433400379634257, |
| "learning_rate": 3.341330845227316e-06, |
| "loss": 0.5342, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.628474740149867, |
| "grad_norm": 0.44874534866829735, |
| "learning_rate": 3.33379176277258e-06, |
| "loss": 0.5192, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.6289581822576746, |
| "grad_norm": 0.42814892447345076, |
| "learning_rate": 3.326256939280389e-06, |
| "loss": 0.5222, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.6294416243654822, |
| "grad_norm": 0.4175511142456795, |
| "learning_rate": 3.3187263940103587e-06, |
| "loss": 0.5008, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.6299250664732898, |
| "grad_norm": 0.43012465279292367, |
| "learning_rate": 3.3112001462111666e-06, |
| "loss": 0.5252, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.6304085085810974, |
| "grad_norm": 0.4343943982087317, |
| "learning_rate": 3.3036782151205134e-06, |
| "loss": 0.5269, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.630891950688905, |
| "grad_norm": 0.4399879585469454, |
| "learning_rate": 3.296160619965056e-06, |
| "loss": 0.5348, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.6313753927967126, |
| "grad_norm": 0.41469773355380823, |
| "learning_rate": 3.2886473799603793e-06, |
| "loss": 0.5315, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.6318588349045202, |
| "grad_norm": 0.4118723735223055, |
| "learning_rate": 3.2811385143109254e-06, |
| "loss": 0.5199, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.6323422770123278, |
| "grad_norm": 0.42911493217824326, |
| "learning_rate": 3.2736340422099633e-06, |
| "loss": 0.5287, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.6328257191201354, |
| "grad_norm": 0.42229266051589764, |
| "learning_rate": 3.2661339828395263e-06, |
| "loss": 0.5034, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.633309161227943, |
| "grad_norm": 0.4171629681078508, |
| "learning_rate": 3.2586383553703723e-06, |
| "loss": 0.5169, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.6337926033357505, |
| "grad_norm": 0.4335972937122555, |
| "learning_rate": 3.2511471789619274e-06, |
| "loss": 0.5084, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.6342760454435581, |
| "grad_norm": 0.42465524204681926, |
| "learning_rate": 3.2436604727622447e-06, |
| "loss": 0.5126, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.6347594875513657, |
| "grad_norm": 0.4075010185296818, |
| "learning_rate": 3.2361782559079465e-06, |
| "loss": 0.5158, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.6352429296591733, |
| "grad_norm": 0.4308541178278502, |
| "learning_rate": 3.228700547524184e-06, |
| "loss": 0.5145, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.6357263717669809, |
| "grad_norm": 0.428226473042128, |
| "learning_rate": 3.221227366724581e-06, |
| "loss": 0.5146, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.6362098138747885, |
| "grad_norm": 0.4299253923140349, |
| "learning_rate": 3.2137587326111896e-06, |
| "loss": 0.5207, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.6366932559825961, |
| "grad_norm": 0.4181473728080887, |
| "learning_rate": 3.206294664274443e-06, |
| "loss": 0.5268, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.6371766980904037, |
| "grad_norm": 0.4474913351660176, |
| "learning_rate": 3.198835180793097e-06, |
| "loss": 0.5277, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.6376601401982113, |
| "grad_norm": 0.432676302059824, |
| "learning_rate": 3.1913803012341987e-06, |
| "loss": 0.5195, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.6381435823060189, |
| "grad_norm": 0.44131963361999216, |
| "learning_rate": 3.183930044653014e-06, |
| "loss": 0.5157, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.6386270244138265, |
| "grad_norm": 0.4300971230394046, |
| "learning_rate": 3.176484430093007e-06, |
| "loss": 0.5312, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.6391104665216341, |
| "grad_norm": 0.44998544162450493, |
| "learning_rate": 3.1690434765857604e-06, |
| "loss": 0.523, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.6395939086294417, |
| "grad_norm": 0.4472778564601584, |
| "learning_rate": 3.1616072031509594e-06, |
| "loss": 0.5155, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.6400773507372493, |
| "grad_norm": 0.4278886331070072, |
| "learning_rate": 3.154175628796311e-06, |
| "loss": 0.5214, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.6405607928450568, |
| "grad_norm": 0.4354238172365612, |
| "learning_rate": 3.146748772517523e-06, |
| "loss": 0.5202, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.6410442349528644, |
| "grad_norm": 0.48506314375368076, |
| "learning_rate": 3.139326653298236e-06, |
| "loss": 0.5226, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.6415276770606719, |
| "grad_norm": 0.42689588677759766, |
| "learning_rate": 3.1319092901099847e-06, |
| "loss": 0.5192, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.6420111191684795, |
| "grad_norm": 0.44087925863432936, |
| "learning_rate": 3.1244967019121496e-06, |
| "loss": 0.5127, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.6424945612762871, |
| "grad_norm": 0.42605880529211515, |
| "learning_rate": 3.117088907651902e-06, |
| "loss": 0.5176, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.6429780033840947, |
| "grad_norm": 0.42151112378627853, |
| "learning_rate": 3.109685926264161e-06, |
| "loss": 0.5165, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.6434614454919023, |
| "grad_norm": 0.4268979096606091, |
| "learning_rate": 3.102287776671544e-06, |
| "loss": 0.5339, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.6439448875997099, |
| "grad_norm": 0.4392831430463705, |
| "learning_rate": 3.094894477784318e-06, |
| "loss": 0.5166, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.6444283297075175, |
| "grad_norm": 0.4146076328411298, |
| "learning_rate": 3.0875060485003496e-06, |
| "loss": 0.5274, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.6449117718153251, |
| "grad_norm": 0.42157017978455763, |
| "learning_rate": 3.080122507705062e-06, |
| "loss": 0.5243, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.6453952139231327, |
| "grad_norm": 0.41448886842899935, |
| "learning_rate": 3.0727438742713766e-06, |
| "loss": 0.4982, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.6458786560309403, |
| "grad_norm": 0.4311221980804021, |
| "learning_rate": 3.0653701670596805e-06, |
| "loss": 0.5222, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.6463620981387479, |
| "grad_norm": 0.42860321112689415, |
| "learning_rate": 3.0580014049177566e-06, |
| "loss": 0.5203, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.6468455402465555, |
| "grad_norm": 0.42363404336164073, |
| "learning_rate": 3.0506376066807632e-06, |
| "loss": 0.5131, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.6473289823543631, |
| "grad_norm": 0.4352925336415172, |
| "learning_rate": 3.0432787911711553e-06, |
| "loss": 0.5244, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.6478124244621707, |
| "grad_norm": 0.4213888881156135, |
| "learning_rate": 3.0359249771986605e-06, |
| "loss": 0.5114, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.6482958665699783, |
| "grad_norm": 0.44310700039649703, |
| "learning_rate": 3.028576183560221e-06, |
| "loss": 0.5345, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.6487793086777859, |
| "grad_norm": 0.46384351518694394, |
| "learning_rate": 3.021232429039944e-06, |
| "loss": 0.5103, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.6492627507855935, |
| "grad_norm": 0.420074285156663, |
| "learning_rate": 3.01389373240906e-06, |
| "loss": 0.5307, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.649746192893401, |
| "grad_norm": 0.46717936599561755, |
| "learning_rate": 3.006560112425867e-06, |
| "loss": 0.5146, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.6502296350012086, |
| "grad_norm": 0.4411181920960548, |
| "learning_rate": 2.999231587835691e-06, |
| "loss": 0.5113, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.6507130771090162, |
| "grad_norm": 0.4353633086273451, |
| "learning_rate": 2.9919081773708293e-06, |
| "loss": 0.5195, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.6511965192168238, |
| "grad_norm": 0.4555577360086876, |
| "learning_rate": 2.9845898997505102e-06, |
| "loss": 0.5201, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.6516799613246314, |
| "grad_norm": 0.43234112941806857, |
| "learning_rate": 2.9772767736808406e-06, |
| "loss": 0.5194, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.652163403432439, |
| "grad_norm": 0.43499867909496204, |
| "learning_rate": 2.9699688178547615e-06, |
| "loss": 0.5252, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.6526468455402465, |
| "grad_norm": 0.4243771527145887, |
| "learning_rate": 2.962666050951997e-06, |
| "loss": 0.5122, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.6531302876480541, |
| "grad_norm": 0.4158873835022681, |
| "learning_rate": 2.9553684916390053e-06, |
| "loss": 0.5092, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.6536137297558617, |
| "grad_norm": 0.4494940146550669, |
| "learning_rate": 2.948076158568939e-06, |
| "loss": 0.5256, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.6540971718636693, |
| "grad_norm": 0.39860657725442444, |
| "learning_rate": 2.940789070381587e-06, |
| "loss": 0.5001, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.6545806139714769, |
| "grad_norm": 0.41241371956595596, |
| "learning_rate": 2.933507245703335e-06, |
| "loss": 0.5174, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.6550640560792845, |
| "grad_norm": 0.41370768103275585, |
| "learning_rate": 2.9262307031471132e-06, |
| "loss": 0.5142, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.6555474981870921, |
| "grad_norm": 0.4457235721009106, |
| "learning_rate": 2.918959461312353e-06, |
| "loss": 0.5212, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.6560309402948997, |
| "grad_norm": 0.44189033261031596, |
| "learning_rate": 2.911693538784931e-06, |
| "loss": 0.5315, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.6565143824027073, |
| "grad_norm": 0.417441027459776, |
| "learning_rate": 2.904432954137136e-06, |
| "loss": 0.5197, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.6569978245105149, |
| "grad_norm": 0.41446617019595194, |
| "learning_rate": 2.897177725927599e-06, |
| "loss": 0.4977, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.6574812666183224, |
| "grad_norm": 0.424230216094722, |
| "learning_rate": 2.889927872701278e-06, |
| "loss": 0.5319, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.65796470872613, |
| "grad_norm": 0.42985260891150956, |
| "learning_rate": 2.8826834129893755e-06, |
| "loss": 0.5166, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.6584481508339376, |
| "grad_norm": 0.38447115699767576, |
| "learning_rate": 2.8754443653093186e-06, |
| "loss": 0.4786, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.6589315929417452, |
| "grad_norm": 0.4044201835297723, |
| "learning_rate": 2.8682107481646915e-06, |
| "loss": 0.5216, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.6594150350495528, |
| "grad_norm": 0.4022645350617216, |
| "learning_rate": 2.8609825800452063e-06, |
| "loss": 0.4988, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.6598984771573604, |
| "grad_norm": 0.4401229027571195, |
| "learning_rate": 2.853759879426644e-06, |
| "loss": 0.5181, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.660381919265168, |
| "grad_norm": 0.42349548120906483, |
| "learning_rate": 2.8465426647708067e-06, |
| "loss": 0.5163, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.6608653613729756, |
| "grad_norm": 0.40013997451662586, |
| "learning_rate": 2.8393309545254776e-06, |
| "loss": 0.5214, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.6613488034807832, |
| "grad_norm": 0.4359244125864156, |
| "learning_rate": 2.8321247671243695e-06, |
| "loss": 0.5179, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.6618322455885908, |
| "grad_norm": 0.41425895295471055, |
| "learning_rate": 2.82492412098708e-06, |
| "loss": 0.5081, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.6623156876963984, |
| "grad_norm": 0.4210065663342879, |
| "learning_rate": 2.8177290345190387e-06, |
| "loss": 0.5194, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.662799129804206, |
| "grad_norm": 0.4028980901393777, |
| "learning_rate": 2.8105395261114666e-06, |
| "loss": 0.5234, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.6632825719120136, |
| "grad_norm": 0.4325922757476261, |
| "learning_rate": 2.803355614141327e-06, |
| "loss": 0.5188, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.6637660140198212, |
| "grad_norm": 0.4308186918740408, |
| "learning_rate": 2.7961773169712803e-06, |
| "loss": 0.5125, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.6642494561276288, |
| "grad_norm": 0.4211885259856405, |
| "learning_rate": 2.7890046529496284e-06, |
| "loss": 0.5233, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.6647328982354364, |
| "grad_norm": 0.4304676159038956, |
| "learning_rate": 2.7818376404102832e-06, |
| "loss": 0.5188, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.665216340343244, |
| "grad_norm": 0.4137521174014562, |
| "learning_rate": 2.774676297672701e-06, |
| "loss": 0.5248, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.6656997824510514, |
| "grad_norm": 0.4389331875357886, |
| "learning_rate": 2.7675206430418542e-06, |
| "loss": 0.5265, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.666183224558859, |
| "grad_norm": 0.46429330512304384, |
| "learning_rate": 2.7603706948081745e-06, |
| "loss": 0.5211, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.4260734411731187, |
| "learning_rate": 2.753226471247501e-06, |
| "loss": 0.517, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.6671501087744742, |
| "grad_norm": 0.4189810127916622, |
| "learning_rate": 2.7460879906210485e-06, |
| "loss": 0.5107, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.6676335508822818, |
| "grad_norm": 0.41204910620329505, |
| "learning_rate": 2.7389552711753477e-06, |
| "loss": 0.5191, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.6681169929900894, |
| "grad_norm": 0.4267680612975131, |
| "learning_rate": 2.731828331142207e-06, |
| "loss": 0.5128, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.668600435097897, |
| "grad_norm": 0.42901984315752384, |
| "learning_rate": 2.7247071887386544e-06, |
| "loss": 0.5257, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.6690838772057046, |
| "grad_norm": 0.4146728225846163, |
| "learning_rate": 2.7175918621669074e-06, |
| "loss": 0.5184, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.6695673193135122, |
| "grad_norm": 0.4782489091382579, |
| "learning_rate": 2.7104823696143136e-06, |
| "loss": 0.5298, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.6700507614213198, |
| "grad_norm": 0.40703765978893935, |
| "learning_rate": 2.70337872925331e-06, |
| "loss": 0.5111, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.6705342035291274, |
| "grad_norm": 0.4109547447766556, |
| "learning_rate": 2.6962809592413726e-06, |
| "loss": 0.5002, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.671017645636935, |
| "grad_norm": 0.4498968198632276, |
| "learning_rate": 2.6891890777209696e-06, |
| "loss": 0.5256, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.6715010877447426, |
| "grad_norm": 0.42208190857564254, |
| "learning_rate": 2.68210310281953e-06, |
| "loss": 0.5193, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.6719845298525502, |
| "grad_norm": 0.41822528698390377, |
| "learning_rate": 2.67502305264937e-06, |
| "loss": 0.5163, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.6724679719603578, |
| "grad_norm": 0.4218034674050614, |
| "learning_rate": 2.667948945307674e-06, |
| "loss": 0.5174, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.6729514140681654, |
| "grad_norm": 0.4206471334382422, |
| "learning_rate": 2.6608807988764252e-06, |
| "loss": 0.4936, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.6734348561759729, |
| "grad_norm": 0.42181885072694014, |
| "learning_rate": 2.653818631422378e-06, |
| "loss": 0.5138, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.6739182982837805, |
| "grad_norm": 0.4261589725068296, |
| "learning_rate": 2.6467624609970005e-06, |
| "loss": 0.5145, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.6744017403915881, |
| "grad_norm": 0.40519700853309554, |
| "learning_rate": 2.6397123056364364e-06, |
| "loss": 0.5013, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.6748851824993957, |
| "grad_norm": 0.4510436140721377, |
| "learning_rate": 2.6326681833614464e-06, |
| "loss": 0.5184, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.6753686246072033, |
| "grad_norm": 0.45003681113297744, |
| "learning_rate": 2.6256301121773775e-06, |
| "loss": 0.5149, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.6758520667150109, |
| "grad_norm": 0.4263810181960221, |
| "learning_rate": 2.618598110074105e-06, |
| "loss": 0.5115, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.6763355088228185, |
| "grad_norm": 0.4263005297393967, |
| "learning_rate": 2.6115721950259977e-06, |
| "loss": 0.5243, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.676818950930626, |
| "grad_norm": 0.4563967376255983, |
| "learning_rate": 2.6045523849918553e-06, |
| "loss": 0.5314, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6773023930384336, |
| "grad_norm": 0.4478593986013541, |
| "learning_rate": 2.5975386979148792e-06, |
| "loss": 0.5179, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.6777858351462412, |
| "grad_norm": 0.40707261007936574, |
| "learning_rate": 2.590531151722622e-06, |
| "loss": 0.5165, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.6782692772540488, |
| "grad_norm": 0.45689999209163507, |
| "learning_rate": 2.5835297643269326e-06, |
| "loss": 0.5212, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.6787527193618564, |
| "grad_norm": 0.41521250872284, |
| "learning_rate": 2.576534553623925e-06, |
| "loss": 0.5197, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.679236161469664, |
| "grad_norm": 0.3969654860159799, |
| "learning_rate": 2.5695455374939147e-06, |
| "loss": 0.4939, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.6797196035774716, |
| "grad_norm": 0.4115250925249713, |
| "learning_rate": 2.5625627338014004e-06, |
| "loss": 0.5242, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.6802030456852792, |
| "grad_norm": 0.4253454941567133, |
| "learning_rate": 2.5555861603949832e-06, |
| "loss": 0.513, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.6806864877930868, |
| "grad_norm": 0.4448844424181978, |
| "learning_rate": 2.548615835107352e-06, |
| "loss": 0.5047, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.6811699299008944, |
| "grad_norm": 0.41222858577096244, |
| "learning_rate": 2.5416517757552157e-06, |
| "loss": 0.5286, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.6816533720087019, |
| "grad_norm": 0.42285086542458045, |
| "learning_rate": 2.534694000139273e-06, |
| "loss": 0.5169, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.6821368141165095, |
| "grad_norm": 0.4122433378845125, |
| "learning_rate": 2.5277425260441616e-06, |
| "loss": 0.515, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.6826202562243171, |
| "grad_norm": 0.4362061175188878, |
| "learning_rate": 2.520797371238406e-06, |
| "loss": 0.5225, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.6831036983321247, |
| "grad_norm": 0.4411789430289944, |
| "learning_rate": 2.513858553474382e-06, |
| "loss": 0.5191, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.6835871404399323, |
| "grad_norm": 0.4415744443134195, |
| "learning_rate": 2.506926090488269e-06, |
| "loss": 0.5306, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.6840705825477399, |
| "grad_norm": 0.4477316137829116, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.5248, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.6845540246555475, |
| "grad_norm": 0.41099572818531255, |
| "learning_rate": 2.4930802997132213e-06, |
| "loss": 0.5218, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.6850374667633551, |
| "grad_norm": 0.4009913145578469, |
| "learning_rate": 2.486167007315243e-06, |
| "loss": 0.5189, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.6855209088711627, |
| "grad_norm": 0.43880257019064667, |
| "learning_rate": 2.479260140476999e-06, |
| "loss": 0.5114, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.6860043509789703, |
| "grad_norm": 0.4322007294880164, |
| "learning_rate": 2.4723597168529984e-06, |
| "loss": 0.5066, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.6864877930867779, |
| "grad_norm": 0.4106120224272021, |
| "learning_rate": 2.465465754081277e-06, |
| "loss": 0.4888, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.6869712351945855, |
| "grad_norm": 0.422067985874925, |
| "learning_rate": 2.458578269783364e-06, |
| "loss": 0.5155, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.6874546773023931, |
| "grad_norm": 0.4136266956566046, |
| "learning_rate": 2.4516972815642166e-06, |
| "loss": 0.5143, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.6879381194102007, |
| "grad_norm": 0.4335536983962682, |
| "learning_rate": 2.444822807012204e-06, |
| "loss": 0.5196, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.6884215615180083, |
| "grad_norm": 0.42723749184962806, |
| "learning_rate": 2.4379548636990343e-06, |
| "loss": 0.5136, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.6889050036258159, |
| "grad_norm": 0.4307011628135296, |
| "learning_rate": 2.4310934691797207e-06, |
| "loss": 0.5305, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.6893884457336233, |
| "grad_norm": 0.45161428649005025, |
| "learning_rate": 2.4242386409925435e-06, |
| "loss": 0.5048, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.6898718878414309, |
| "grad_norm": 0.4351186095813856, |
| "learning_rate": 2.4173903966589957e-06, |
| "loss": 0.5216, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.6903553299492385, |
| "grad_norm": 0.4128958039987362, |
| "learning_rate": 2.410548753683743e-06, |
| "loss": 0.5206, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.6908387720570461, |
| "grad_norm": 0.4185374425485222, |
| "learning_rate": 2.4037137295545737e-06, |
| "loss": 0.5205, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.6913222141648537, |
| "grad_norm": 0.4256083734187945, |
| "learning_rate": 2.396885341742361e-06, |
| "loss": 0.4804, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.6918056562726613, |
| "grad_norm": 0.411514639053229, |
| "learning_rate": 2.390063607701016e-06, |
| "loss": 0.5194, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.6922890983804689, |
| "grad_norm": 0.43241514860902464, |
| "learning_rate": 2.3832485448674407e-06, |
| "loss": 0.53, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.6927725404882765, |
| "grad_norm": 0.4291596725507727, |
| "learning_rate": 2.3764401706614832e-06, |
| "loss": 0.5144, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.6932559825960841, |
| "grad_norm": 0.42041788788695633, |
| "learning_rate": 2.369638502485897e-06, |
| "loss": 0.5148, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.6937394247038917, |
| "grad_norm": 0.4482987713314786, |
| "learning_rate": 2.3628435577262947e-06, |
| "loss": 0.5191, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.6942228668116993, |
| "grad_norm": 0.42573448798758273, |
| "learning_rate": 2.3560553537511043e-06, |
| "loss": 0.5021, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.6947063089195069, |
| "grad_norm": 0.41739963072931596, |
| "learning_rate": 2.3492739079115214e-06, |
| "loss": 0.5061, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.6951897510273145, |
| "grad_norm": 0.4366261411331466, |
| "learning_rate": 2.3424992375414655e-06, |
| "loss": 0.5133, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.6956731931351221, |
| "grad_norm": 0.42225675860612266, |
| "learning_rate": 2.3357313599575422e-06, |
| "loss": 0.5254, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.6961566352429297, |
| "grad_norm": 0.4347650420428982, |
| "learning_rate": 2.3289702924589914e-06, |
| "loss": 0.5143, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.6966400773507373, |
| "grad_norm": 0.4220266027824235, |
| "learning_rate": 2.3222160523276486e-06, |
| "loss": 0.5194, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.6971235194585449, |
| "grad_norm": 0.400495176856287, |
| "learning_rate": 2.3154686568278933e-06, |
| "loss": 0.5315, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.6976069615663524, |
| "grad_norm": 0.4149083634198192, |
| "learning_rate": 2.3087281232066134e-06, |
| "loss": 0.5109, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.69809040367416, |
| "grad_norm": 0.43831779922906355, |
| "learning_rate": 2.3019944686931554e-06, |
| "loss": 0.5256, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.6985738457819676, |
| "grad_norm": 0.4379300687242213, |
| "learning_rate": 2.2952677104992855e-06, |
| "loss": 0.5287, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.6990572878897752, |
| "grad_norm": 0.43973213205463885, |
| "learning_rate": 2.2885478658191364e-06, |
| "loss": 0.5192, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.6995407299975828, |
| "grad_norm": 0.42002084857343974, |
| "learning_rate": 2.281834951829174e-06, |
| "loss": 0.521, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.7000241721053904, |
| "grad_norm": 0.38595076036167364, |
| "learning_rate": 2.2751289856881487e-06, |
| "loss": 0.4869, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.700507614213198, |
| "grad_norm": 0.436647846778714, |
| "learning_rate": 2.268429984537048e-06, |
| "loss": 0.5216, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.7009910563210056, |
| "grad_norm": 0.4140253730185284, |
| "learning_rate": 2.2617379654990623e-06, |
| "loss": 0.5165, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.7014744984288132, |
| "grad_norm": 0.4644944125638521, |
| "learning_rate": 2.255052945679525e-06, |
| "loss": 0.5183, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.7019579405366208, |
| "grad_norm": 0.41536119938345195, |
| "learning_rate": 2.248374942165894e-06, |
| "loss": 0.5231, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.7024413826444283, |
| "grad_norm": 0.4012349549582878, |
| "learning_rate": 2.241703972027679e-06, |
| "loss": 0.5168, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.7029248247522359, |
| "grad_norm": 0.4521292215779327, |
| "learning_rate": 2.23504005231642e-06, |
| "loss": 0.5158, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.7034082668600435, |
| "grad_norm": 0.4172271643387044, |
| "learning_rate": 2.2283832000656304e-06, |
| "loss": 0.4941, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.7038917089678511, |
| "grad_norm": 0.421958406666486, |
| "learning_rate": 2.221733432290762e-06, |
| "loss": 0.5209, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.7043751510756587, |
| "grad_norm": 0.42224698163781604, |
| "learning_rate": 2.2150907659891566e-06, |
| "loss": 0.5173, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.7048585931834663, |
| "grad_norm": 0.43523243642666853, |
| "learning_rate": 2.2084552181400087e-06, |
| "loss": 0.5186, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.7053420352912738, |
| "grad_norm": 0.4437233504227722, |
| "learning_rate": 2.201826805704308e-06, |
| "loss": 0.5125, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.7058254773990814, |
| "grad_norm": 0.42532048824174346, |
| "learning_rate": 2.195205545624813e-06, |
| "loss": 0.5243, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.706308919506889, |
| "grad_norm": 0.4322950043512432, |
| "learning_rate": 2.188591454826e-06, |
| "loss": 0.5135, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.7067923616146966, |
| "grad_norm": 0.4272575345234204, |
| "learning_rate": 2.181984550214015e-06, |
| "loss": 0.5116, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.7072758037225042, |
| "grad_norm": 0.41921770884395154, |
| "learning_rate": 2.175384848676639e-06, |
| "loss": 0.5165, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.7077592458303118, |
| "grad_norm": 0.43176187181049736, |
| "learning_rate": 2.168792367083243e-06, |
| "loss": 0.5138, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.7082426879381194, |
| "grad_norm": 0.41695232513283254, |
| "learning_rate": 2.162207122284742e-06, |
| "loss": 0.5091, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.708726130045927, |
| "grad_norm": 0.41339935320490057, |
| "learning_rate": 2.155629131113549e-06, |
| "loss": 0.5158, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.7092095721537346, |
| "grad_norm": 0.40689486411834114, |
| "learning_rate": 2.1490584103835433e-06, |
| "loss": 0.4847, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.7096930142615422, |
| "grad_norm": 0.417060588337446, |
| "learning_rate": 2.142494976890011e-06, |
| "loss": 0.5241, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.7101764563693498, |
| "grad_norm": 0.4289677663647557, |
| "learning_rate": 2.135938847409625e-06, |
| "loss": 0.5206, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.7106598984771574, |
| "grad_norm": 0.43410470718447147, |
| "learning_rate": 2.1293900387003742e-06, |
| "loss": 0.4931, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.711143340584965, |
| "grad_norm": 0.42958196993128944, |
| "learning_rate": 2.1228485675015455e-06, |
| "loss": 0.5204, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.7116267826927726, |
| "grad_norm": 0.4311771692424152, |
| "learning_rate": 2.1163144505336634e-06, |
| "loss": 0.5219, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.7121102248005802, |
| "grad_norm": 0.4150104118521869, |
| "learning_rate": 2.109787704498459e-06, |
| "loss": 0.519, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.7125936669083878, |
| "grad_norm": 0.43013467795196153, |
| "learning_rate": 2.1032683460788223e-06, |
| "loss": 0.4979, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.7130771090161954, |
| "grad_norm": 0.4303795815833922, |
| "learning_rate": 2.0967563919387563e-06, |
| "loss": 0.5256, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.7135605511240029, |
| "grad_norm": 0.4386538663824397, |
| "learning_rate": 2.0902518587233418e-06, |
| "loss": 0.5195, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.7140439932318104, |
| "grad_norm": 0.41141211228553354, |
| "learning_rate": 2.08375476305869e-06, |
| "loss": 0.5238, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.714527435339618, |
| "grad_norm": 0.3832973623968104, |
| "learning_rate": 2.077265121551903e-06, |
| "loss": 0.4914, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.7150108774474256, |
| "grad_norm": 0.4396380345403612, |
| "learning_rate": 2.0707829507910237e-06, |
| "loss": 0.5224, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.7154943195552332, |
| "grad_norm": 0.4084969868928133, |
| "learning_rate": 2.0643082673450053e-06, |
| "loss": 0.5214, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.7159777616630408, |
| "grad_norm": 0.41940449704789057, |
| "learning_rate": 2.05784108776366e-06, |
| "loss": 0.5098, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.7164612037708484, |
| "grad_norm": 0.4368606150106444, |
| "learning_rate": 2.051381428577622e-06, |
| "loss": 0.5213, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.716944645878656, |
| "grad_norm": 0.4475169176125263, |
| "learning_rate": 2.044929306298298e-06, |
| "loss": 0.5169, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.7174280879864636, |
| "grad_norm": 0.4192404761939798, |
| "learning_rate": 2.0384847374178346e-06, |
| "loss": 0.5214, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.7179115300942712, |
| "grad_norm": 0.4000794067095613, |
| "learning_rate": 2.0320477384090665e-06, |
| "loss": 0.5002, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.7183949722020788, |
| "grad_norm": 0.4083964682274076, |
| "learning_rate": 2.0256183257254837e-06, |
| "loss": 0.5057, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.7188784143098864, |
| "grad_norm": 0.4286205023949667, |
| "learning_rate": 2.0191965158011854e-06, |
| "loss": 0.4815, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.719361856417694, |
| "grad_norm": 0.40907099979637535, |
| "learning_rate": 2.012782325050831e-06, |
| "loss": 0.5283, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.7198452985255016, |
| "grad_norm": 0.41946463733283473, |
| "learning_rate": 2.006375769869611e-06, |
| "loss": 0.522, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.7203287406333092, |
| "grad_norm": 0.4222854300641897, |
| "learning_rate": 1.9999768666331974e-06, |
| "loss": 0.5132, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.7208121827411168, |
| "grad_norm": 0.3830302288103666, |
| "learning_rate": 1.9935856316977044e-06, |
| "loss": 0.4938, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.7212956248489243, |
| "grad_norm": 0.47757660690611003, |
| "learning_rate": 1.987202081399639e-06, |
| "loss": 0.5251, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.7217790669567319, |
| "grad_norm": 0.3992903621119011, |
| "learning_rate": 1.9808262320558724e-06, |
| "loss": 0.506, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.7222625090645395, |
| "grad_norm": 0.41142424465140587, |
| "learning_rate": 1.9744580999635902e-06, |
| "loss": 0.5143, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.7227459511723471, |
| "grad_norm": 0.4124129943865437, |
| "learning_rate": 1.968097701400252e-06, |
| "loss": 0.5245, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.7232293932801547, |
| "grad_norm": 0.4312737875038871, |
| "learning_rate": 1.9617450526235464e-06, |
| "loss": 0.5178, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.7237128353879623, |
| "grad_norm": 0.43509903197162936, |
| "learning_rate": 1.9554001698713572e-06, |
| "loss": 0.5131, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.7241962774957699, |
| "grad_norm": 0.4260008705271214, |
| "learning_rate": 1.949063069361717e-06, |
| "loss": 0.5136, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.7246797196035775, |
| "grad_norm": 0.42356802738060345, |
| "learning_rate": 1.9427337672927632e-06, |
| "loss": 0.5146, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.7251631617113851, |
| "grad_norm": 0.4027997963462275, |
| "learning_rate": 1.936412279842705e-06, |
| "loss": 0.4913, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.7256466038191927, |
| "grad_norm": 0.4124397793510055, |
| "learning_rate": 1.9300986231697705e-06, |
| "loss": 0.5175, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.7261300459270003, |
| "grad_norm": 0.4442811918906246, |
| "learning_rate": 1.9237928134121757e-06, |
| "loss": 0.516, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.7266134880348079, |
| "grad_norm": 0.4393627100062481, |
| "learning_rate": 1.9174948666880805e-06, |
| "loss": 0.5155, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.7270969301426154, |
| "grad_norm": 0.43133527501756386, |
| "learning_rate": 1.9112047990955446e-06, |
| "loss": 0.5136, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.727580372250423, |
| "grad_norm": 0.45322135855021595, |
| "learning_rate": 1.9049226267124844e-06, |
| "loss": 0.5172, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.7280638143582306, |
| "grad_norm": 0.41078461158260915, |
| "learning_rate": 1.8986483655966408e-06, |
| "loss": 0.5179, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.7285472564660382, |
| "grad_norm": 0.4178604053793329, |
| "learning_rate": 1.8923820317855307e-06, |
| "loss": 0.5076, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.7290306985738458, |
| "grad_norm": 0.42623268157040256, |
| "learning_rate": 1.8861236412964106e-06, |
| "loss": 0.5172, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.7295141406816533, |
| "grad_norm": 0.42835046843347674, |
| "learning_rate": 1.879873210126229e-06, |
| "loss": 0.5259, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.7299975827894609, |
| "grad_norm": 0.4196504177616674, |
| "learning_rate": 1.873630754251588e-06, |
| "loss": 0.5177, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.7304810248972685, |
| "grad_norm": 0.4079644120305993, |
| "learning_rate": 1.8673962896287152e-06, |
| "loss": 0.5201, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.7309644670050761, |
| "grad_norm": 0.43892341061011425, |
| "learning_rate": 1.8611698321933991e-06, |
| "loss": 0.5186, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.7314479091128837, |
| "grad_norm": 0.42683430911112086, |
| "learning_rate": 1.8549513978609707e-06, |
| "loss": 0.5111, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.7319313512206913, |
| "grad_norm": 0.41062878136002484, |
| "learning_rate": 1.8487410025262436e-06, |
| "loss": 0.5103, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.7324147933284989, |
| "grad_norm": 0.4256013874707191, |
| "learning_rate": 1.8425386620634961e-06, |
| "loss": 0.5167, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.7328982354363065, |
| "grad_norm": 0.4388797350675763, |
| "learning_rate": 1.8363443923264046e-06, |
| "loss": 0.5125, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.7333816775441141, |
| "grad_norm": 0.4394233254146738, |
| "learning_rate": 1.8301582091480264e-06, |
| "loss": 0.5217, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.7338651196519217, |
| "grad_norm": 0.41564422037394944, |
| "learning_rate": 1.8239801283407393e-06, |
| "loss": 0.5164, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.7343485617597293, |
| "grad_norm": 0.4173422643681329, |
| "learning_rate": 1.8178101656962188e-06, |
| "loss": 0.5205, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.7348320038675369, |
| "grad_norm": 0.39698118648442665, |
| "learning_rate": 1.8116483369853853e-06, |
| "loss": 0.4835, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.7353154459753445, |
| "grad_norm": 0.42300362992419904, |
| "learning_rate": 1.8054946579583732e-06, |
| "loss": 0.5143, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.7357988880831521, |
| "grad_norm": 0.42464469919772974, |
| "learning_rate": 1.7993491443444771e-06, |
| "loss": 0.5129, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.7362823301909597, |
| "grad_norm": 0.4501988280108448, |
| "learning_rate": 1.7932118118521274e-06, |
| "loss": 0.5131, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.7367657722987673, |
| "grad_norm": 0.41493548901611477, |
| "learning_rate": 1.787082676168842e-06, |
| "loss": 0.5268, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.7372492144065748, |
| "grad_norm": 0.4436917707906808, |
| "learning_rate": 1.7809617529611828e-06, |
| "loss": 0.5126, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.7377326565143824, |
| "grad_norm": 0.39767655781448813, |
| "learning_rate": 1.7748490578747257e-06, |
| "loss": 0.4945, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.73821609862219, |
| "grad_norm": 0.4281607415979641, |
| "learning_rate": 1.7687446065340074e-06, |
| "loss": 0.5189, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.7386995407299976, |
| "grad_norm": 0.4123906023331037, |
| "learning_rate": 1.7626484145425038e-06, |
| "loss": 0.5117, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.7391829828378051, |
| "grad_norm": 0.39861909677156787, |
| "learning_rate": 1.7565604974825678e-06, |
| "loss": 0.4917, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.7396664249456127, |
| "grad_norm": 0.4164290248459804, |
| "learning_rate": 1.7504808709154104e-06, |
| "loss": 0.5187, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.7401498670534203, |
| "grad_norm": 0.4149617264710624, |
| "learning_rate": 1.744409550381041e-06, |
| "loss": 0.529, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.7406333091612279, |
| "grad_norm": 0.402995768205116, |
| "learning_rate": 1.7383465513982517e-06, |
| "loss": 0.4906, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.7411167512690355, |
| "grad_norm": 0.4357911248878148, |
| "learning_rate": 1.7322918894645525e-06, |
| "loss": 0.5209, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.7416001933768431, |
| "grad_norm": 0.4310636351470309, |
| "learning_rate": 1.7262455800561456e-06, |
| "loss": 0.529, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.7420836354846507, |
| "grad_norm": 0.40110062198063573, |
| "learning_rate": 1.7202076386278876e-06, |
| "loss": 0.5218, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.7425670775924583, |
| "grad_norm": 0.4044655145984996, |
| "learning_rate": 1.7141780806132429e-06, |
| "loss": 0.5038, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.7430505197002659, |
| "grad_norm": 0.4169687562172726, |
| "learning_rate": 1.70815692142425e-06, |
| "loss": 0.5094, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.7435339618080735, |
| "grad_norm": 0.3892005945860465, |
| "learning_rate": 1.702144176451473e-06, |
| "loss": 0.4909, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.7440174039158811, |
| "grad_norm": 0.4059894671987348, |
| "learning_rate": 1.696139861063974e-06, |
| "loss": 0.5231, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.7445008460236887, |
| "grad_norm": 0.4235285224343199, |
| "learning_rate": 1.690143990609268e-06, |
| "loss": 0.5116, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.7449842881314963, |
| "grad_norm": 0.4066059462995061, |
| "learning_rate": 1.6841565804132843e-06, |
| "loss": 0.5159, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.7454677302393038, |
| "grad_norm": 0.41374792014057904, |
| "learning_rate": 1.6781776457803227e-06, |
| "loss": 0.5146, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.7459511723471114, |
| "grad_norm": 0.41330516594974576, |
| "learning_rate": 1.6722072019930242e-06, |
| "loss": 0.4841, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.746434614454919, |
| "grad_norm": 0.4342078760633199, |
| "learning_rate": 1.6662452643123234e-06, |
| "loss": 0.5181, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.7469180565627266, |
| "grad_norm": 0.4366803318877013, |
| "learning_rate": 1.660291847977415e-06, |
| "loss": 0.5056, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.7474014986705342, |
| "grad_norm": 0.4107968782550443, |
| "learning_rate": 1.6543469682057105e-06, |
| "loss": 0.5102, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.7478849407783418, |
| "grad_norm": 0.43703346533243426, |
| "learning_rate": 1.6484106401927991e-06, |
| "loss": 0.517, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.7483683828861494, |
| "grad_norm": 0.4185149815126949, |
| "learning_rate": 1.6424828791124159e-06, |
| "loss": 0.5162, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.748851824993957, |
| "grad_norm": 0.3941815905233016, |
| "learning_rate": 1.6365637001163958e-06, |
| "loss": 0.4694, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.7493352671017646, |
| "grad_norm": 0.4069386532862478, |
| "learning_rate": 1.6306531183346387e-06, |
| "loss": 0.5172, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.7498187092095722, |
| "grad_norm": 0.44449597102378385, |
| "learning_rate": 1.624751148875065e-06, |
| "loss": 0.5227, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.7503021513173798, |
| "grad_norm": 0.4200070436877298, |
| "learning_rate": 1.6188578068235855e-06, |
| "loss": 0.5227, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.7507855934251874, |
| "grad_norm": 0.4134676341568954, |
| "learning_rate": 1.6129731072440586e-06, |
| "loss": 0.5197, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.751269035532995, |
| "grad_norm": 0.4342416540931307, |
| "learning_rate": 1.6070970651782514e-06, |
| "loss": 0.5234, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.7517524776408026, |
| "grad_norm": 0.4621699665968105, |
| "learning_rate": 1.6012296956457972e-06, |
| "loss": 0.5224, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.7522359197486101, |
| "grad_norm": 0.39794619123328484, |
| "learning_rate": 1.5953710136441685e-06, |
| "loss": 0.5222, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.7527193618564177, |
| "grad_norm": 0.39795969856270086, |
| "learning_rate": 1.5895210341486279e-06, |
| "loss": 0.4697, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.7532028039642252, |
| "grad_norm": 0.4348573897259895, |
| "learning_rate": 1.583679772112196e-06, |
| "loss": 0.5256, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.7536862460720328, |
| "grad_norm": 0.4108494121358044, |
| "learning_rate": 1.5778472424656083e-06, |
| "loss": 0.5185, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.7541696881798404, |
| "grad_norm": 0.41224584403564757, |
| "learning_rate": 1.5720234601172767e-06, |
| "loss": 0.5203, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.754653130287648, |
| "grad_norm": 0.4348874788487397, |
| "learning_rate": 1.566208439953265e-06, |
| "loss": 0.5189, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.7551365723954556, |
| "grad_norm": 0.42842919833727694, |
| "learning_rate": 1.5604021968372286e-06, |
| "loss": 0.5111, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.7556200145032632, |
| "grad_norm": 0.43772492324957596, |
| "learning_rate": 1.5546047456103964e-06, |
| "loss": 0.5147, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.7561034566110708, |
| "grad_norm": 0.41431446343362865, |
| "learning_rate": 1.548816101091517e-06, |
| "loss": 0.5149, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.7565868987188784, |
| "grad_norm": 0.40777837421338714, |
| "learning_rate": 1.5430362780768343e-06, |
| "loss": 0.5117, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.757070340826686, |
| "grad_norm": 0.4454487846070906, |
| "learning_rate": 1.537265291340042e-06, |
| "loss": 0.5074, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.7575537829344936, |
| "grad_norm": 0.46396843002779686, |
| "learning_rate": 1.531503155632249e-06, |
| "loss": 0.5223, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.7580372250423012, |
| "grad_norm": 0.41741600165011983, |
| "learning_rate": 1.5257498856819353e-06, |
| "loss": 0.5158, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.7585206671501088, |
| "grad_norm": 0.4059061868499258, |
| "learning_rate": 1.5200054961949233e-06, |
| "loss": 0.5049, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.7590041092579164, |
| "grad_norm": 0.41330390270516437, |
| "learning_rate": 1.5142700018543382e-06, |
| "loss": 0.5305, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.759487551365724, |
| "grad_norm": 0.43099056056318497, |
| "learning_rate": 1.508543417320562e-06, |
| "loss": 0.5212, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.7599709934735316, |
| "grad_norm": 0.39882553101049034, |
| "learning_rate": 1.5028257572312105e-06, |
| "loss": 0.4883, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.7604544355813392, |
| "grad_norm": 0.4581685557000849, |
| "learning_rate": 1.4971170362010774e-06, |
| "loss": 0.5225, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.7609378776891468, |
| "grad_norm": 0.4428964310587446, |
| "learning_rate": 1.4914172688221213e-06, |
| "loss": 0.5195, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.7614213197969543, |
| "grad_norm": 0.4170791170307987, |
| "learning_rate": 1.485726469663401e-06, |
| "loss": 0.5294, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.7619047619047619, |
| "grad_norm": 0.4212168944035229, |
| "learning_rate": 1.4800446532710627e-06, |
| "loss": 0.5143, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.7623882040125695, |
| "grad_norm": 0.4317778496296824, |
| "learning_rate": 1.4743718341682806e-06, |
| "loss": 0.5242, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.7628716461203771, |
| "grad_norm": 0.3887549768642727, |
| "learning_rate": 1.468708026855245e-06, |
| "loss": 0.4927, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.7633550882281847, |
| "grad_norm": 0.41991973562573803, |
| "learning_rate": 1.463053245809099e-06, |
| "loss": 0.5248, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.7638385303359922, |
| "grad_norm": 0.41267795471721197, |
| "learning_rate": 1.457407505483921e-06, |
| "loss": 0.5187, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.7643219724437998, |
| "grad_norm": 0.44716407911896383, |
| "learning_rate": 1.4517708203106763e-06, |
| "loss": 0.523, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.7648054145516074, |
| "grad_norm": 0.4254440302923612, |
| "learning_rate": 1.446143204697187e-06, |
| "loss": 0.5233, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.765288856659415, |
| "grad_norm": 0.39996785018921494, |
| "learning_rate": 1.4405246730280946e-06, |
| "loss": 0.5172, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.7657722987672226, |
| "grad_norm": 0.443369622770567, |
| "learning_rate": 1.4349152396648153e-06, |
| "loss": 0.5183, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.7662557408750302, |
| "grad_norm": 0.40505843584897416, |
| "learning_rate": 1.4293149189455146e-06, |
| "loss": 0.5161, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.7667391829828378, |
| "grad_norm": 0.4077704595280849, |
| "learning_rate": 1.4237237251850634e-06, |
| "loss": 0.5107, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.7672226250906454, |
| "grad_norm": 0.40791039312028615, |
| "learning_rate": 1.4181416726750052e-06, |
| "loss": 0.5146, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.767706067198453, |
| "grad_norm": 0.41705043398231784, |
| "learning_rate": 1.4125687756835132e-06, |
| "loss": 0.4812, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.7681895093062606, |
| "grad_norm": 0.4235182346193989, |
| "learning_rate": 1.4070050484553644e-06, |
| "loss": 0.5129, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.7686729514140682, |
| "grad_norm": 0.414137655909364, |
| "learning_rate": 1.4014505052118893e-06, |
| "loss": 0.5236, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.7691563935218757, |
| "grad_norm": 0.43611300077847176, |
| "learning_rate": 1.3959051601509537e-06, |
| "loss": 0.5345, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.7696398356296833, |
| "grad_norm": 0.410845648388898, |
| "learning_rate": 1.3903690274469029e-06, |
| "loss": 0.5115, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.7701232777374909, |
| "grad_norm": 0.3961083948871449, |
| "learning_rate": 1.3848421212505404e-06, |
| "loss": 0.5168, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.7706067198452985, |
| "grad_norm": 0.42179325369386034, |
| "learning_rate": 1.37932445568908e-06, |
| "loss": 0.5125, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.7710901619531061, |
| "grad_norm": 0.4213217250215216, |
| "learning_rate": 1.3738160448661253e-06, |
| "loss": 0.5267, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.7715736040609137, |
| "grad_norm": 0.4143253090473424, |
| "learning_rate": 1.3683169028616155e-06, |
| "loss": 0.5178, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.7720570461687213, |
| "grad_norm": 0.4171850827541685, |
| "learning_rate": 1.3628270437317993e-06, |
| "loss": 0.5211, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.7725404882765289, |
| "grad_norm": 0.39565458081679644, |
| "learning_rate": 1.3573464815092003e-06, |
| "loss": 0.5055, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.7730239303843365, |
| "grad_norm": 0.4271922188091497, |
| "learning_rate": 1.3518752302025773e-06, |
| "loss": 0.5279, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.7735073724921441, |
| "grad_norm": 0.4151739224827406, |
| "learning_rate": 1.3464133037968914e-06, |
| "loss": 0.5239, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7739908145999517, |
| "grad_norm": 0.3960683162461613, |
| "learning_rate": 1.3409607162532628e-06, |
| "loss": 0.4987, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.7744742567077593, |
| "grad_norm": 0.43044333694614223, |
| "learning_rate": 1.3355174815089477e-06, |
| "loss": 0.5273, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.7749576988155669, |
| "grad_norm": 0.4121649380386113, |
| "learning_rate": 1.3300836134772916e-06, |
| "loss": 0.5162, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.7754411409233745, |
| "grad_norm": 0.4005354058641754, |
| "learning_rate": 1.3246591260477015e-06, |
| "loss": 0.5167, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.7759245830311821, |
| "grad_norm": 0.3951020817933521, |
| "learning_rate": 1.3192440330856005e-06, |
| "loss": 0.5251, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.7764080251389897, |
| "grad_norm": 0.42611917105831465, |
| "learning_rate": 1.3138383484324063e-06, |
| "loss": 0.5252, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.7768914672467973, |
| "grad_norm": 0.40098636118444037, |
| "learning_rate": 1.308442085905482e-06, |
| "loss": 0.5101, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.7773749093546047, |
| "grad_norm": 0.4404415072756006, |
| "learning_rate": 1.30305525929811e-06, |
| "loss": 0.5224, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.7778583514624123, |
| "grad_norm": 0.40390400609014704, |
| "learning_rate": 1.297677882379455e-06, |
| "loss": 0.5191, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.7783417935702199, |
| "grad_norm": 0.43645719023114843, |
| "learning_rate": 1.2923099688945234e-06, |
| "loss": 0.5096, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.7788252356780275, |
| "grad_norm": 0.401799031041578, |
| "learning_rate": 1.2869515325641357e-06, |
| "loss": 0.4812, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.7793086777858351, |
| "grad_norm": 0.40544675897829047, |
| "learning_rate": 1.281602587084887e-06, |
| "loss": 0.5211, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.7797921198936427, |
| "grad_norm": 0.4166351291750946, |
| "learning_rate": 1.2762631461291148e-06, |
| "loss": 0.5294, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.7802755620014503, |
| "grad_norm": 0.4334981607396633, |
| "learning_rate": 1.2709332233448573e-06, |
| "loss": 0.5096, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.7807590041092579, |
| "grad_norm": 0.437984950036233, |
| "learning_rate": 1.2656128323558286e-06, |
| "loss": 0.5135, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.7812424462170655, |
| "grad_norm": 0.41467240914944964, |
| "learning_rate": 1.2603019867613764e-06, |
| "loss": 0.5162, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.7817258883248731, |
| "grad_norm": 0.40797210573439474, |
| "learning_rate": 1.2550007001364518e-06, |
| "loss": 0.5064, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.7822093304326807, |
| "grad_norm": 0.40625079236189654, |
| "learning_rate": 1.2497089860315675e-06, |
| "loss": 0.5057, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.7826927725404883, |
| "grad_norm": 0.3973135238618207, |
| "learning_rate": 1.244426857972773e-06, |
| "loss": 0.5125, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.7831762146482959, |
| "grad_norm": 0.41758654400468537, |
| "learning_rate": 1.239154329461615e-06, |
| "loss": 0.5146, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.7836596567561035, |
| "grad_norm": 0.4546571879884002, |
| "learning_rate": 1.233891413975098e-06, |
| "loss": 0.5138, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.7841430988639111, |
| "grad_norm": 0.4501304501527847, |
| "learning_rate": 1.228638124965661e-06, |
| "loss": 0.5111, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.7846265409717187, |
| "grad_norm": 0.40173574952002505, |
| "learning_rate": 1.223394475861131e-06, |
| "loss": 0.5134, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.7851099830795262, |
| "grad_norm": 0.4105768174048188, |
| "learning_rate": 1.2181604800646996e-06, |
| "loss": 0.5092, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.7855934251873338, |
| "grad_norm": 0.39390517153871624, |
| "learning_rate": 1.212936150954882e-06, |
| "loss": 0.498, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.7860768672951414, |
| "grad_norm": 0.41453725871465896, |
| "learning_rate": 1.207721501885486e-06, |
| "loss": 0.5063, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.786560309402949, |
| "grad_norm": 0.44249465126635484, |
| "learning_rate": 1.2025165461855714e-06, |
| "loss": 0.5212, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.7870437515107566, |
| "grad_norm": 0.4079816768267276, |
| "learning_rate": 1.1973212971594262e-06, |
| "loss": 0.5155, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.7875271936185642, |
| "grad_norm": 0.4318458945961838, |
| "learning_rate": 1.1921357680865258e-06, |
| "loss": 0.5183, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.7880106357263718, |
| "grad_norm": 0.40656198305401237, |
| "learning_rate": 1.1869599722215013e-06, |
| "loss": 0.4949, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.7884940778341794, |
| "grad_norm": 0.4056814293942294, |
| "learning_rate": 1.181793922794102e-06, |
| "loss": 0.5206, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.788977519941987, |
| "grad_norm": 0.42895763169120843, |
| "learning_rate": 1.1766376330091684e-06, |
| "loss": 0.503, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.7894609620497945, |
| "grad_norm": 0.4165970675717556, |
| "learning_rate": 1.1714911160465924e-06, |
| "loss": 0.5255, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.7899444041576021, |
| "grad_norm": 0.4123917311937627, |
| "learning_rate": 1.1663543850612847e-06, |
| "loss": 0.5169, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.7904278462654097, |
| "grad_norm": 0.41612583641837364, |
| "learning_rate": 1.1612274531831463e-06, |
| "loss": 0.4938, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.7909112883732173, |
| "grad_norm": 0.40728900719245686, |
| "learning_rate": 1.1561103335170242e-06, |
| "loss": 0.5222, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.7913947304810249, |
| "grad_norm": 0.4348645075910405, |
| "learning_rate": 1.1510030391426941e-06, |
| "loss": 0.5192, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.7918781725888325, |
| "grad_norm": 0.4086546804175218, |
| "learning_rate": 1.1459055831148074e-06, |
| "loss": 0.5232, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.7923616146966401, |
| "grad_norm": 0.40880965205946446, |
| "learning_rate": 1.140817978462876e-06, |
| "loss": 0.5212, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.7928450568044476, |
| "grad_norm": 0.3893016631161895, |
| "learning_rate": 1.1357402381912224e-06, |
| "loss": 0.4873, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.7933284989122552, |
| "grad_norm": 0.4215992969510908, |
| "learning_rate": 1.1306723752789672e-06, |
| "loss": 0.5211, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.7938119410200628, |
| "grad_norm": 0.420615559845491, |
| "learning_rate": 1.1256144026799703e-06, |
| "loss": 0.5179, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.7942953831278704, |
| "grad_norm": 0.39236133338098145, |
| "learning_rate": 1.1205663333228217e-06, |
| "loss": 0.4911, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.794778825235678, |
| "grad_norm": 0.4158254754636244, |
| "learning_rate": 1.1155281801107897e-06, |
| "loss": 0.5146, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.7952622673434856, |
| "grad_norm": 0.4092049660763265, |
| "learning_rate": 1.1104999559218022e-06, |
| "loss": 0.5063, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.7957457094512932, |
| "grad_norm": 0.43121118572534733, |
| "learning_rate": 1.1054816736084057e-06, |
| "loss": 0.5122, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.7962291515591008, |
| "grad_norm": 0.40574034047521074, |
| "learning_rate": 1.1004733459977325e-06, |
| "loss": 0.5089, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.7967125936669084, |
| "grad_norm": 0.4343773778355907, |
| "learning_rate": 1.0954749858914727e-06, |
| "loss": 0.5177, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.797196035774716, |
| "grad_norm": 0.429877165339691, |
| "learning_rate": 1.0904866060658376e-06, |
| "loss": 0.5211, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.7976794778825236, |
| "grad_norm": 0.4108995062804379, |
| "learning_rate": 1.0855082192715294e-06, |
| "loss": 0.5174, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.7981629199903312, |
| "grad_norm": 0.4018616150052113, |
| "learning_rate": 1.0805398382337035e-06, |
| "loss": 0.5049, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.7986463620981388, |
| "grad_norm": 0.4089174910335269, |
| "learning_rate": 1.0755814756519445e-06, |
| "loss": 0.5226, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.7991298042059464, |
| "grad_norm": 0.3964537076582955, |
| "learning_rate": 1.0706331442002226e-06, |
| "loss": 0.5095, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.799613246313754, |
| "grad_norm": 0.4267767025207229, |
| "learning_rate": 1.0656948565268782e-06, |
| "loss": 0.5168, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.8000966884215616, |
| "grad_norm": 0.41363796984886936, |
| "learning_rate": 1.0607666252545673e-06, |
| "loss": 0.5128, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.8005801305293692, |
| "grad_norm": 0.43264146945425214, |
| "learning_rate": 1.0558484629802502e-06, |
| "loss": 0.514, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.8010635726371766, |
| "grad_norm": 0.42544390140386235, |
| "learning_rate": 1.0509403822751425e-06, |
| "loss": 0.512, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.8015470147449842, |
| "grad_norm": 0.3932679351449648, |
| "learning_rate": 1.0460423956846955e-06, |
| "loss": 0.4941, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.8020304568527918, |
| "grad_norm": 0.4096876585407803, |
| "learning_rate": 1.041154515728559e-06, |
| "loss": 0.5088, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.8025138989605994, |
| "grad_norm": 0.4173497731763413, |
| "learning_rate": 1.0362767549005454e-06, |
| "loss": 0.5119, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.802997341068407, |
| "grad_norm": 0.41012015779324845, |
| "learning_rate": 1.0314091256686065e-06, |
| "loss": 0.5212, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.8034807831762146, |
| "grad_norm": 0.38447439239259856, |
| "learning_rate": 1.0265516404747943e-06, |
| "loss": 0.5052, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.8039642252840222, |
| "grad_norm": 0.40948392634706504, |
| "learning_rate": 1.0217043117352337e-06, |
| "loss": 0.5109, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.8044476673918298, |
| "grad_norm": 0.40148827230751766, |
| "learning_rate": 1.0168671518400853e-06, |
| "loss": 0.5118, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.8049311094996374, |
| "grad_norm": 0.3939565441232479, |
| "learning_rate": 1.0120401731535213e-06, |
| "loss": 0.4879, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.805414551607445, |
| "grad_norm": 0.4394864393242481, |
| "learning_rate": 1.0072233880136872e-06, |
| "loss": 0.5104, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.8058979937152526, |
| "grad_norm": 0.4318770671908104, |
| "learning_rate": 1.0024168087326764e-06, |
| "loss": 0.5235, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.8063814358230602, |
| "grad_norm": 0.4281259140520081, |
| "learning_rate": 9.976204475964907e-07, |
| "loss": 0.5149, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.8068648779308678, |
| "grad_norm": 0.43979946361695016, |
| "learning_rate": 9.92834316865015e-07, |
| "loss": 0.5191, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.8073483200386754, |
| "grad_norm": 0.4312412015437643, |
| "learning_rate": 9.88058428771987e-07, |
| "loss": 0.5188, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.807831762146483, |
| "grad_norm": 0.4461824252192259, |
| "learning_rate": 9.832927955249605e-07, |
| "loss": 0.518, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.8083152042542906, |
| "grad_norm": 0.40455517199845253, |
| "learning_rate": 9.785374293052802e-07, |
| "loss": 0.5279, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.8087986463620981, |
| "grad_norm": 0.40500700400967726, |
| "learning_rate": 9.737923422680424e-07, |
| "loss": 0.5267, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.8092820884699057, |
| "grad_norm": 0.4053422468834684, |
| "learning_rate": 9.690575465420733e-07, |
| "loss": 0.5098, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.8097655305777133, |
| "grad_norm": 0.41221923071964073, |
| "learning_rate": 9.643330542298929e-07, |
| "loss": 0.5171, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.8102489726855209, |
| "grad_norm": 0.4289210188727792, |
| "learning_rate": 9.596188774076849e-07, |
| "loss": 0.5164, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.8107324147933285, |
| "grad_norm": 0.4119920227929362, |
| "learning_rate": 9.549150281252633e-07, |
| "loss": 0.5167, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.8112158569011361, |
| "grad_norm": 0.43146374267443927, |
| "learning_rate": 9.50221518406047e-07, |
| "loss": 0.5198, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.8116992990089437, |
| "grad_norm": 0.3915995001014536, |
| "learning_rate": 9.455383602470247e-07, |
| "loss": 0.5194, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.8121827411167513, |
| "grad_norm": 0.42092897815810126, |
| "learning_rate": 9.408655656187282e-07, |
| "loss": 0.5154, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.8126661832245589, |
| "grad_norm": 0.43929014126287974, |
| "learning_rate": 9.362031464651955e-07, |
| "loss": 0.5111, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.8131496253323665, |
| "grad_norm": 0.419403258433708, |
| "learning_rate": 9.31551114703943e-07, |
| "loss": 0.5175, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.813633067440174, |
| "grad_norm": 0.4235039718034734, |
| "learning_rate": 9.269094822259439e-07, |
| "loss": 0.5219, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.8141165095479816, |
| "grad_norm": 0.403949404981181, |
| "learning_rate": 9.22278260895581e-07, |
| "loss": 0.5257, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.8145999516557892, |
| "grad_norm": 0.40201626032689436, |
| "learning_rate": 9.176574625506324e-07, |
| "loss": 0.5065, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.8150833937635968, |
| "grad_norm": 0.42029809516611727, |
| "learning_rate": 9.130470990022283e-07, |
| "loss": 0.5198, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.8155668358714044, |
| "grad_norm": 0.4443584968330059, |
| "learning_rate": 9.084471820348306e-07, |
| "loss": 0.5054, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.816050277979212, |
| "grad_norm": 0.4011266291605723, |
| "learning_rate": 9.038577234061979e-07, |
| "loss": 0.481, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.8165337200870196, |
| "grad_norm": 0.4116565403445696, |
| "learning_rate": 8.992787348473575e-07, |
| "loss": 0.512, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.8170171621948271, |
| "grad_norm": 0.3855753519601646, |
| "learning_rate": 8.947102280625708e-07, |
| "loss": 0.4919, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.8175006043026347, |
| "grad_norm": 0.3998193393341577, |
| "learning_rate": 8.901522147293107e-07, |
| "loss": 0.5063, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.8179840464104423, |
| "grad_norm": 0.40465428030335077, |
| "learning_rate": 8.856047064982276e-07, |
| "loss": 0.4969, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.8184674885182499, |
| "grad_norm": 0.3993077607842942, |
| "learning_rate": 8.810677149931168e-07, |
| "loss": 0.5123, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.8189509306260575, |
| "grad_norm": 0.41845032917424874, |
| "learning_rate": 8.765412518108957e-07, |
| "loss": 0.5222, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.8194343727338651, |
| "grad_norm": 0.4482989172909152, |
| "learning_rate": 8.720253285215685e-07, |
| "loss": 0.5245, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.8199178148416727, |
| "grad_norm": 0.4096945568958353, |
| "learning_rate": 8.675199566682002e-07, |
| "loss": 0.4987, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.8204012569494803, |
| "grad_norm": 0.42715377043083036, |
| "learning_rate": 8.630251477668828e-07, |
| "loss": 0.4956, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.8208846990572879, |
| "grad_norm": 0.42586545844645524, |
| "learning_rate": 8.585409133067119e-07, |
| "loss": 0.5096, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.8213681411650955, |
| "grad_norm": 0.43766586659276707, |
| "learning_rate": 8.540672647497483e-07, |
| "loss": 0.5136, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.8218515832729031, |
| "grad_norm": 0.4371618341766256, |
| "learning_rate": 8.49604213531004e-07, |
| "loss": 0.5213, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.8223350253807107, |
| "grad_norm": 0.4375571316772861, |
| "learning_rate": 8.451517710583934e-07, |
| "loss": 0.5051, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.8228184674885183, |
| "grad_norm": 0.4132441919616583, |
| "learning_rate": 8.407099487127207e-07, |
| "loss": 0.5257, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.8233019095963259, |
| "grad_norm": 0.42607745465695845, |
| "learning_rate": 8.362787578476395e-07, |
| "loss": 0.5249, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.8237853517041335, |
| "grad_norm": 0.4075673839523143, |
| "learning_rate": 8.318582097896316e-07, |
| "loss": 0.5058, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.8242687938119411, |
| "grad_norm": 0.42693741052199397, |
| "learning_rate": 8.274483158379759e-07, |
| "loss": 0.5111, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.8247522359197486, |
| "grad_norm": 0.39832416179935565, |
| "learning_rate": 8.230490872647146e-07, |
| "loss": 0.4938, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.8252356780275562, |
| "grad_norm": 0.422151557962671, |
| "learning_rate": 8.18660535314631e-07, |
| "loss": 0.5183, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.8257191201353637, |
| "grad_norm": 0.4003210551929738, |
| "learning_rate": 8.142826712052177e-07, |
| "loss": 0.5131, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.8262025622431713, |
| "grad_norm": 0.41552515229148246, |
| "learning_rate": 8.099155061266495e-07, |
| "loss": 0.5104, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.8266860043509789, |
| "grad_norm": 0.4199192751255081, |
| "learning_rate": 8.055590512417499e-07, |
| "loss": 0.504, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.8271694464587865, |
| "grad_norm": 0.4183052253157522, |
| "learning_rate": 8.012133176859705e-07, |
| "loss": 0.5183, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.8276528885665941, |
| "grad_norm": 0.40771463289221466, |
| "learning_rate": 7.968783165673554e-07, |
| "loss": 0.5134, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.8281363306744017, |
| "grad_norm": 0.4201027836512912, |
| "learning_rate": 7.925540589665187e-07, |
| "loss": 0.5074, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.8286197727822093, |
| "grad_norm": 0.395143526726159, |
| "learning_rate": 7.882405559366091e-07, |
| "loss": 0.4907, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.8291032148900169, |
| "grad_norm": 0.39924930985003787, |
| "learning_rate": 7.839378185032897e-07, |
| "loss": 0.5107, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.8295866569978245, |
| "grad_norm": 0.4132095601626946, |
| "learning_rate": 7.796458576647015e-07, |
| "loss": 0.5185, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.8300700991056321, |
| "grad_norm": 0.40587201306044, |
| "learning_rate": 7.753646843914465e-07, |
| "loss": 0.5182, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.8305535412134397, |
| "grad_norm": 0.4094094956774689, |
| "learning_rate": 7.710943096265461e-07, |
| "loss": 0.5029, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.8310369833212473, |
| "grad_norm": 0.41067812349491495, |
| "learning_rate": 7.668347442854218e-07, |
| "loss": 0.5021, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.8315204254290549, |
| "grad_norm": 0.39956787890532264, |
| "learning_rate": 7.625859992558665e-07, |
| "loss": 0.5206, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.8320038675368625, |
| "grad_norm": 0.43928086956712875, |
| "learning_rate": 7.583480853980158e-07, |
| "loss": 0.5134, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.8324873096446701, |
| "grad_norm": 0.4100632271699525, |
| "learning_rate": 7.541210135443188e-07, |
| "loss": 0.5184, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.8329707517524776, |
| "grad_norm": 0.3961555211112688, |
| "learning_rate": 7.499047944995108e-07, |
| "loss": 0.5222, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.8334541938602852, |
| "grad_norm": 0.41401758140390904, |
| "learning_rate": 7.45699439040588e-07, |
| "loss": 0.5149, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.8339376359680928, |
| "grad_norm": 0.41725576477900833, |
| "learning_rate": 7.415049579167783e-07, |
| "loss": 0.5086, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.8344210780759004, |
| "grad_norm": 0.40808361223845036, |
| "learning_rate": 7.37321361849514e-07, |
| "loss": 0.5171, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.834904520183708, |
| "grad_norm": 0.4044441513281848, |
| "learning_rate": 7.331486615324024e-07, |
| "loss": 0.4931, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.8353879622915156, |
| "grad_norm": 0.39255016483428246, |
| "learning_rate": 7.289868676312023e-07, |
| "loss": 0.4895, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.8358714043993232, |
| "grad_norm": 0.4273894357037594, |
| "learning_rate": 7.248359907837959e-07, |
| "loss": 0.5141, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.8363548465071308, |
| "grad_norm": 0.41270523260835523, |
| "learning_rate": 7.206960416001563e-07, |
| "loss": 0.5053, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.8368382886149384, |
| "grad_norm": 0.42210989792552517, |
| "learning_rate": 7.165670306623296e-07, |
| "loss": 0.515, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.837321730722746, |
| "grad_norm": 0.4005116526979819, |
| "learning_rate": 7.124489685243985e-07, |
| "loss": 0.5084, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.8378051728305536, |
| "grad_norm": 0.42730888005294004, |
| "learning_rate": 7.08341865712463e-07, |
| "loss": 0.5149, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.8382886149383612, |
| "grad_norm": 0.3946117211995092, |
| "learning_rate": 7.042457327246088e-07, |
| "loss": 0.5272, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.8387720570461688, |
| "grad_norm": 0.40058125990145727, |
| "learning_rate": 7.001605800308825e-07, |
| "loss": 0.5173, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.8392554991539763, |
| "grad_norm": 0.39419621537510763, |
| "learning_rate": 6.960864180732618e-07, |
| "loss": 0.5182, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.8397389412617839, |
| "grad_norm": 0.4302451888948554, |
| "learning_rate": 6.920232572656349e-07, |
| "loss": 0.5145, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.8402223833695915, |
| "grad_norm": 0.39221396906385003, |
| "learning_rate": 6.879711079937667e-07, |
| "loss": 0.5079, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.840705825477399, |
| "grad_norm": 0.4210023704512398, |
| "learning_rate": 6.839299806152799e-07, |
| "loss": 0.5061, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.8411892675852066, |
| "grad_norm": 0.4031707044630559, |
| "learning_rate": 6.79899885459619e-07, |
| "loss": 0.5174, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.8416727096930142, |
| "grad_norm": 0.40104705743190977, |
| "learning_rate": 6.758808328280325e-07, |
| "loss": 0.4981, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.8421561518008218, |
| "grad_norm": 0.4158859718137932, |
| "learning_rate": 6.718728329935448e-07, |
| "loss": 0.5216, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.8426395939086294, |
| "grad_norm": 0.4140963838597211, |
| "learning_rate": 6.678758962009241e-07, |
| "loss": 0.5154, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.843123036016437, |
| "grad_norm": 0.41926365963573253, |
| "learning_rate": 6.638900326666653e-07, |
| "loss": 0.5181, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.8436064781242446, |
| "grad_norm": 0.4007033614343704, |
| "learning_rate": 6.599152525789531e-07, |
| "loss": 0.4772, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.8440899202320522, |
| "grad_norm": 0.4266694328755557, |
| "learning_rate": 6.559515660976506e-07, |
| "loss": 0.5153, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.8445733623398598, |
| "grad_norm": 0.42158713984389296, |
| "learning_rate": 6.519989833542567e-07, |
| "loss": 0.5218, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.8450568044476674, |
| "grad_norm": 0.4190422236566301, |
| "learning_rate": 6.480575144518931e-07, |
| "loss": 0.5267, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.845540246555475, |
| "grad_norm": 0.42322451653416415, |
| "learning_rate": 6.441271694652701e-07, |
| "loss": 0.517, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.8460236886632826, |
| "grad_norm": 0.4050974433698499, |
| "learning_rate": 6.402079584406673e-07, |
| "loss": 0.523, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.8465071307710902, |
| "grad_norm": 0.39927068510798064, |
| "learning_rate": 6.36299891395904e-07, |
| "loss": 0.4943, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.8469905728788978, |
| "grad_norm": 0.40520913199613756, |
| "learning_rate": 6.32402978320315e-07, |
| "loss": 0.519, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.8474740149867054, |
| "grad_norm": 0.41524557234436116, |
| "learning_rate": 6.285172291747232e-07, |
| "loss": 0.5087, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.847957457094513, |
| "grad_norm": 0.39348055940589066, |
| "learning_rate": 6.246426538914174e-07, |
| "loss": 0.5135, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.8484408992023206, |
| "grad_norm": 0.40472211918575973, |
| "learning_rate": 6.207792623741249e-07, |
| "loss": 0.5181, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.8489243413101281, |
| "grad_norm": 0.3797781522780497, |
| "learning_rate": 6.169270644979836e-07, |
| "loss": 0.4718, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.8494077834179357, |
| "grad_norm": 0.4172564454240539, |
| "learning_rate": 6.130860701095226e-07, |
| "loss": 0.5093, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.8498912255257433, |
| "grad_norm": 0.460481903524328, |
| "learning_rate": 6.092562890266341e-07, |
| "loss": 0.5245, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.8503746676335509, |
| "grad_norm": 0.40544203017797725, |
| "learning_rate": 6.054377310385479e-07, |
| "loss": 0.5067, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.8508581097413584, |
| "grad_norm": 0.4225253280006634, |
| "learning_rate": 6.016304059058031e-07, |
| "loss": 0.5169, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.851341551849166, |
| "grad_norm": 0.40769266639259943, |
| "learning_rate": 5.97834323360233e-07, |
| "loss": 0.5243, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.8518249939569736, |
| "grad_norm": 0.42284940262412657, |
| "learning_rate": 5.940494931049262e-07, |
| "loss": 0.5194, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.8523084360647812, |
| "grad_norm": 0.3916025337851957, |
| "learning_rate": 5.902759248142187e-07, |
| "loss": 0.4975, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.8527918781725888, |
| "grad_norm": 0.41326270414280697, |
| "learning_rate": 5.86513628133652e-07, |
| "loss": 0.5154, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.8532753202803964, |
| "grad_norm": 0.40856588365868324, |
| "learning_rate": 5.827626126799613e-07, |
| "loss": 0.5154, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.853758762388204, |
| "grad_norm": 0.42831173680710594, |
| "learning_rate": 5.790228880410426e-07, |
| "loss": 0.5163, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.8542422044960116, |
| "grad_norm": 0.4218590594382107, |
| "learning_rate": 5.75294463775935e-07, |
| "loss": 0.517, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.8547256466038192, |
| "grad_norm": 0.38253864809006055, |
| "learning_rate": 5.715773494147919e-07, |
| "loss": 0.4929, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.8552090887116268, |
| "grad_norm": 0.40270548702028475, |
| "learning_rate": 5.678715544588547e-07, |
| "loss": 0.5088, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.8556925308194344, |
| "grad_norm": 0.4229953125269584, |
| "learning_rate": 5.641770883804365e-07, |
| "loss": 0.5258, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.856175972927242, |
| "grad_norm": 0.4037677845049078, |
| "learning_rate": 5.604939606228887e-07, |
| "loss": 0.5095, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.8566594150350495, |
| "grad_norm": 0.39977977942883575, |
| "learning_rate": 5.568221806005847e-07, |
| "loss": 0.5128, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.4175904938844971, |
| "learning_rate": 5.531617576988879e-07, |
| "loss": 0.5114, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.8576262992506647, |
| "grad_norm": 0.41812393010867166, |
| "learning_rate": 5.495127012741352e-07, |
| "loss": 0.5188, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.8581097413584723, |
| "grad_norm": 0.38871202154348194, |
| "learning_rate": 5.45875020653609e-07, |
| "loss": 0.4882, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.8585931834662799, |
| "grad_norm": 0.40175664384357557, |
| "learning_rate": 5.422487251355146e-07, |
| "loss": 0.5088, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.8590766255740875, |
| "grad_norm": 0.4258611448475652, |
| "learning_rate": 5.386338239889549e-07, |
| "loss": 0.5136, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.8595600676818951, |
| "grad_norm": 0.38040545155326977, |
| "learning_rate": 5.350303264539091e-07, |
| "loss": 0.4692, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.8600435097897027, |
| "grad_norm": 0.4185751036827134, |
| "learning_rate": 5.314382417412062e-07, |
| "loss": 0.516, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.8605269518975103, |
| "grad_norm": 0.4237092619379993, |
| "learning_rate": 5.278575790325052e-07, |
| "loss": 0.5146, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.8610103940053179, |
| "grad_norm": 0.4173802982789206, |
| "learning_rate": 5.242883474802696e-07, |
| "loss": 0.5125, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.8614938361131255, |
| "grad_norm": 0.41838440801291993, |
| "learning_rate": 5.207305562077403e-07, |
| "loss": 0.5177, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.8619772782209331, |
| "grad_norm": 0.4779855097218796, |
| "learning_rate": 5.1718421430892e-07, |
| "loss": 0.5304, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.8624607203287407, |
| "grad_norm": 0.37738685143261025, |
| "learning_rate": 5.136493308485446e-07, |
| "loss": 0.486, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.8629441624365483, |
| "grad_norm": 0.39963258309250466, |
| "learning_rate": 5.101259148620618e-07, |
| "loss": 0.4959, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.8634276045443559, |
| "grad_norm": 0.39604391770722097, |
| "learning_rate": 5.066139753556049e-07, |
| "loss": 0.4993, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.8639110466521635, |
| "grad_norm": 0.40732958269577874, |
| "learning_rate": 5.031135213059756e-07, |
| "loss": 0.5153, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.864394488759971, |
| "grad_norm": 0.3897806967927546, |
| "learning_rate": 4.99624561660616e-07, |
| "loss": 0.4871, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.8648779308677785, |
| "grad_norm": 0.4048723969181331, |
| "learning_rate": 4.961471053375899e-07, |
| "loss": 0.512, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.8653613729755861, |
| "grad_norm": 0.4203351282800037, |
| "learning_rate": 4.926811612255539e-07, |
| "loss": 0.5121, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.8658448150833937, |
| "grad_norm": 0.39858565202586066, |
| "learning_rate": 4.892267381837396e-07, |
| "loss": 0.5011, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.8663282571912013, |
| "grad_norm": 0.4344627773200746, |
| "learning_rate": 4.857838450419339e-07, |
| "loss": 0.5103, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.8668116992990089, |
| "grad_norm": 0.40293448022650774, |
| "learning_rate": 4.823524906004468e-07, |
| "loss": 0.5138, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.8672951414068165, |
| "grad_norm": 0.41801337173969716, |
| "learning_rate": 4.789326836300983e-07, |
| "loss": 0.5151, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.8677785835146241, |
| "grad_norm": 0.4058943681689954, |
| "learning_rate": 4.7552443287218866e-07, |
| "loss": 0.5098, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.8682620256224317, |
| "grad_norm": 0.42652856984845416, |
| "learning_rate": 4.7212774703848273e-07, |
| "loss": 0.508, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.8687454677302393, |
| "grad_norm": 0.4211824745719729, |
| "learning_rate": 4.687426348111834e-07, |
| "loss": 0.5122, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.8692289098380469, |
| "grad_norm": 0.4022753726796167, |
| "learning_rate": 4.65369104842911e-07, |
| "loss": 0.52, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.8697123519458545, |
| "grad_norm": 0.40283890754002527, |
| "learning_rate": 4.620071657566777e-07, |
| "loss": 0.5072, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.8701957940536621, |
| "grad_norm": 0.4283203699114763, |
| "learning_rate": 4.586568261458729e-07, |
| "loss": 0.5096, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8706792361614697, |
| "grad_norm": 0.38328034805235095, |
| "learning_rate": 4.553180945742336e-07, |
| "loss": 0.4861, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.8711626782692773, |
| "grad_norm": 0.4122501087059972, |
| "learning_rate": 4.5199097957582816e-07, |
| "loss": 0.5136, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.8716461203770849, |
| "grad_norm": 0.41264272327652995, |
| "learning_rate": 4.486754896550288e-07, |
| "loss": 0.5012, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.8721295624848925, |
| "grad_norm": 0.41725003600600513, |
| "learning_rate": 4.45371633286496e-07, |
| "loss": 0.5185, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.8726130045927, |
| "grad_norm": 0.4078148663174146, |
| "learning_rate": 4.4207941891515335e-07, |
| "loss": 0.5135, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.8730964467005076, |
| "grad_norm": 0.4209684818924423, |
| "learning_rate": 4.3879885495616505e-07, |
| "loss": 0.512, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.8735798888083152, |
| "grad_norm": 0.3832255061477332, |
| "learning_rate": 4.3552994979491836e-07, |
| "loss": 0.5131, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.8740633309161228, |
| "grad_norm": 0.415646535369065, |
| "learning_rate": 4.322727117869951e-07, |
| "loss": 0.5156, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.8745467730239304, |
| "grad_norm": 0.403529021224522, |
| "learning_rate": 4.290271492581627e-07, |
| "loss": 0.5225, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.875030215131738, |
| "grad_norm": 0.4248226663595473, |
| "learning_rate": 4.257932705043372e-07, |
| "loss": 0.5276, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.8755136572395456, |
| "grad_norm": 0.42279657022545747, |
| "learning_rate": 4.2257108379157586e-07, |
| "loss": 0.5224, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.8759970993473531, |
| "grad_norm": 0.4140176038814713, |
| "learning_rate": 4.1936059735604497e-07, |
| "loss": 0.5161, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.8764805414551607, |
| "grad_norm": 0.39792458444383394, |
| "learning_rate": 4.161618194040079e-07, |
| "loss": 0.5277, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.8769639835629683, |
| "grad_norm": 0.39549573015495143, |
| "learning_rate": 4.129747581117993e-07, |
| "loss": 0.5053, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.8774474256707759, |
| "grad_norm": 0.4160802633412061, |
| "learning_rate": 4.0979942162580387e-07, |
| "loss": 0.516, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.8779308677785835, |
| "grad_norm": 0.41239251805984983, |
| "learning_rate": 4.06635818062438e-07, |
| "loss": 0.5278, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.8784143098863911, |
| "grad_norm": 0.40871873580107365, |
| "learning_rate": 4.0348395550812713e-07, |
| "loss": 0.5294, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.8788977519941987, |
| "grad_norm": 0.40365670038657436, |
| "learning_rate": 4.003438420192873e-07, |
| "loss": 0.5158, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.8793811941020063, |
| "grad_norm": 0.4255428234546921, |
| "learning_rate": 3.9721548562229985e-07, |
| "loss": 0.5114, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.8798646362098139, |
| "grad_norm": 0.41203315649756733, |
| "learning_rate": 3.9409889431349656e-07, |
| "loss": 0.5116, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.8803480783176215, |
| "grad_norm": 0.4149872650348109, |
| "learning_rate": 3.9099407605913576e-07, |
| "loss": 0.5099, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.880831520425429, |
| "grad_norm": 0.4258100076362105, |
| "learning_rate": 3.879010387953841e-07, |
| "loss": 0.5175, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.8813149625332366, |
| "grad_norm": 0.3902355927247227, |
| "learning_rate": 3.84819790428293e-07, |
| "loss": 0.498, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.8817984046410442, |
| "grad_norm": 0.40842472365457144, |
| "learning_rate": 3.8175033883378233e-07, |
| "loss": 0.518, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.8822818467488518, |
| "grad_norm": 0.4221970543634826, |
| "learning_rate": 3.7869269185761613e-07, |
| "loss": 0.5216, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.8827652888566594, |
| "grad_norm": 0.40616883661281006, |
| "learning_rate": 3.7564685731538985e-07, |
| "loss": 0.5066, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.883248730964467, |
| "grad_norm": 0.4061562407072031, |
| "learning_rate": 3.7261284299249967e-07, |
| "loss": 0.517, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.8837321730722746, |
| "grad_norm": 0.4079225433423233, |
| "learning_rate": 3.695906566441304e-07, |
| "loss": 0.4959, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.8842156151800822, |
| "grad_norm": 0.38197368709112006, |
| "learning_rate": 3.665803059952344e-07, |
| "loss": 0.4871, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.8846990572878898, |
| "grad_norm": 0.411849076052872, |
| "learning_rate": 3.63581798740511e-07, |
| "loss": 0.5143, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.8851824993956974, |
| "grad_norm": 0.393276210273132, |
| "learning_rate": 3.605951425443871e-07, |
| "loss": 0.4936, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.885665941503505, |
| "grad_norm": 0.369604359657528, |
| "learning_rate": 3.576203450409943e-07, |
| "loss": 0.4684, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.8861493836113126, |
| "grad_norm": 0.43326466002005165, |
| "learning_rate": 3.5465741383415684e-07, |
| "loss": 0.5104, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.8866328257191202, |
| "grad_norm": 0.41527359664646213, |
| "learning_rate": 3.5170635649736497e-07, |
| "loss": 0.519, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.8871162678269278, |
| "grad_norm": 0.41356740894281485, |
| "learning_rate": 3.487671805737597e-07, |
| "loss": 0.508, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.8875997099347354, |
| "grad_norm": 0.4050751048123327, |
| "learning_rate": 3.4583989357611037e-07, |
| "loss": 0.5135, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.888083152042543, |
| "grad_norm": 0.3923610722591795, |
| "learning_rate": 3.4292450298679945e-07, |
| "loss": 0.5075, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.8885665941503504, |
| "grad_norm": 0.41919225013002887, |
| "learning_rate": 3.400210162577999e-07, |
| "loss": 0.5166, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.889050036258158, |
| "grad_norm": 0.42118222715491443, |
| "learning_rate": 3.371294408106585e-07, |
| "loss": 0.523, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.8895334783659656, |
| "grad_norm": 0.3952238335142466, |
| "learning_rate": 3.3424978403647443e-07, |
| "loss": 0.5138, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.8900169204737732, |
| "grad_norm": 0.4163195177412695, |
| "learning_rate": 3.313820532958817e-07, |
| "loss": 0.5274, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.8905003625815808, |
| "grad_norm": 0.3930314520659748, |
| "learning_rate": 3.285262559190322e-07, |
| "loss": 0.4991, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.8909838046893884, |
| "grad_norm": 0.4336804309313973, |
| "learning_rate": 3.256823992055741e-07, |
| "loss": 0.5009, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.891467246797196, |
| "grad_norm": 0.41714068524986875, |
| "learning_rate": 3.228504904246349e-07, |
| "loss": 0.5238, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.8919506889050036, |
| "grad_norm": 0.41848606366751967, |
| "learning_rate": 3.20030536814801e-07, |
| "loss": 0.5202, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.8924341310128112, |
| "grad_norm": 0.422964314144621, |
| "learning_rate": 3.1722254558410047e-07, |
| "loss": 0.5104, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.8929175731206188, |
| "grad_norm": 0.41539348703446205, |
| "learning_rate": 3.144265239099864e-07, |
| "loss": 0.5152, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.8934010152284264, |
| "grad_norm": 0.3936271006898258, |
| "learning_rate": 3.1164247893931575e-07, |
| "loss": 0.5071, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.893884457336234, |
| "grad_norm": 0.4152031331913687, |
| "learning_rate": 3.088704177883306e-07, |
| "loss": 0.5181, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.8943678994440416, |
| "grad_norm": 0.4176432021270733, |
| "learning_rate": 3.06110347542643e-07, |
| "loss": 0.5235, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.8948513415518492, |
| "grad_norm": 0.3954219378639727, |
| "learning_rate": 3.033622752572157e-07, |
| "loss": 0.5019, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.8953347836596568, |
| "grad_norm": 0.45830856560980365, |
| "learning_rate": 3.0062620795634214e-07, |
| "loss": 0.5263, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.8958182257674644, |
| "grad_norm": 0.4009466020951186, |
| "learning_rate": 2.9790215263363174e-07, |
| "loss": 0.5222, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.896301667875272, |
| "grad_norm": 0.3933495297633584, |
| "learning_rate": 2.951901162519877e-07, |
| "loss": 0.5233, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.8967851099830795, |
| "grad_norm": 0.39895160904445, |
| "learning_rate": 2.9249010574359636e-07, |
| "loss": 0.5212, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.8972685520908871, |
| "grad_norm": 0.42068899596041226, |
| "learning_rate": 2.898021280098995e-07, |
| "loss": 0.5168, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.8977519941986947, |
| "grad_norm": 0.39971963228555085, |
| "learning_rate": 2.8712618992158656e-07, |
| "loss": 0.5084, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.8982354363065023, |
| "grad_norm": 0.3999616227972635, |
| "learning_rate": 2.8446229831856964e-07, |
| "loss": 0.5088, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.8987188784143099, |
| "grad_norm": 0.4001447692276326, |
| "learning_rate": 2.8181046000997136e-07, |
| "loss": 0.521, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.8992023205221175, |
| "grad_norm": 0.41592034251039167, |
| "learning_rate": 2.791706817741041e-07, |
| "loss": 0.5072, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.8996857626299251, |
| "grad_norm": 0.4445686187455443, |
| "learning_rate": 2.765429703584538e-07, |
| "loss": 0.5148, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.9001692047377327, |
| "grad_norm": 0.40228802491920107, |
| "learning_rate": 2.739273324796621e-07, |
| "loss": 0.5262, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.9006526468455403, |
| "grad_norm": 0.40404504261863744, |
| "learning_rate": 2.7132377482351037e-07, |
| "loss": 0.5147, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.9011360889533478, |
| "grad_norm": 0.3986359660989621, |
| "learning_rate": 2.687323040449025e-07, |
| "loss": 0.5172, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.9016195310611554, |
| "grad_norm": 0.42039178580411435, |
| "learning_rate": 2.6615292676784533e-07, |
| "loss": 0.5191, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.902102973168963, |
| "grad_norm": 0.4168785648766661, |
| "learning_rate": 2.635856495854372e-07, |
| "loss": 0.5116, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.9025864152767706, |
| "grad_norm": 0.4006359687639295, |
| "learning_rate": 2.6103047905984224e-07, |
| "loss": 0.5243, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.9030698573845782, |
| "grad_norm": 0.4136741219117099, |
| "learning_rate": 2.584874217222855e-07, |
| "loss": 0.516, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.9035532994923858, |
| "grad_norm": 0.41454758895188654, |
| "learning_rate": 2.5595648407302496e-07, |
| "loss": 0.5299, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.9040367416001934, |
| "grad_norm": 0.43072596167116733, |
| "learning_rate": 2.53437672581342e-07, |
| "loss": 0.5192, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.9045201837080009, |
| "grad_norm": 0.413346134850188, |
| "learning_rate": 2.5093099368551974e-07, |
| "loss": 0.5135, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.9050036258158085, |
| "grad_norm": 0.44414111234791465, |
| "learning_rate": 2.484364537928341e-07, |
| "loss": 0.5248, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.9054870679236161, |
| "grad_norm": 0.41031454686253116, |
| "learning_rate": 2.45954059279529e-07, |
| "loss": 0.5198, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.9059705100314237, |
| "grad_norm": 0.3982976345229948, |
| "learning_rate": 2.4348381649080486e-07, |
| "loss": 0.5163, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.9064539521392313, |
| "grad_norm": 0.4007617837820295, |
| "learning_rate": 2.41025731740801e-07, |
| "loss": 0.511, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.9069373942470389, |
| "grad_norm": 0.40168617787804406, |
| "learning_rate": 2.3857981131258037e-07, |
| "loss": 0.5114, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.9074208363548465, |
| "grad_norm": 0.38110421429609603, |
| "learning_rate": 2.3614606145811347e-07, |
| "loss": 0.4992, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.9079042784626541, |
| "grad_norm": 0.3870732423514054, |
| "learning_rate": 2.3372448839825978e-07, |
| "loss": 0.4887, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.9083877205704617, |
| "grad_norm": 0.39979584331802676, |
| "learning_rate": 2.3131509832275633e-07, |
| "loss": 0.5122, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.9088711626782693, |
| "grad_norm": 0.3996732608438804, |
| "learning_rate": 2.2891789739019733e-07, |
| "loss": 0.5102, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.9093546047860769, |
| "grad_norm": 0.40968516048558534, |
| "learning_rate": 2.2653289172802295e-07, |
| "loss": 0.5049, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.9098380468938845, |
| "grad_norm": 0.4006751726323446, |
| "learning_rate": 2.241600874324984e-07, |
| "loss": 0.5144, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.9103214890016921, |
| "grad_norm": 0.4066456668668, |
| "learning_rate": 2.2179949056870432e-07, |
| "loss": 0.5184, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.9108049311094997, |
| "grad_norm": 0.4179374057794063, |
| "learning_rate": 2.194511071705141e-07, |
| "loss": 0.5131, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.9112883732173073, |
| "grad_norm": 0.419480536858942, |
| "learning_rate": 2.1711494324058724e-07, |
| "loss": 0.5147, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.9117718153251149, |
| "grad_norm": 0.40624640146953556, |
| "learning_rate": 2.1479100475034598e-07, |
| "loss": 0.5084, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.9122552574329225, |
| "grad_norm": 0.40367583928635464, |
| "learning_rate": 2.1247929763996534e-07, |
| "loss": 0.4832, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.91273869954073, |
| "grad_norm": 0.3989060344990105, |
| "learning_rate": 2.101798278183542e-07, |
| "loss": 0.5144, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.9132221416485375, |
| "grad_norm": 0.3998308893808953, |
| "learning_rate": 2.0789260116314215e-07, |
| "loss": 0.5081, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.9137055837563451, |
| "grad_norm": 0.4063990008087812, |
| "learning_rate": 2.0561762352066638e-07, |
| "loss": 0.5109, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.9141890258641527, |
| "grad_norm": 0.4167108480628528, |
| "learning_rate": 2.0335490070595208e-07, |
| "loss": 0.5186, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.9146724679719603, |
| "grad_norm": 0.39430080435851855, |
| "learning_rate": 2.011044385027011e-07, |
| "loss": 0.5101, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.9151559100797679, |
| "grad_norm": 0.42096559238441866, |
| "learning_rate": 1.988662426632765e-07, |
| "loss": 0.5078, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.9156393521875755, |
| "grad_norm": 0.39723951707790667, |
| "learning_rate": 1.9664031890868795e-07, |
| "loss": 0.5223, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.9161227942953831, |
| "grad_norm": 0.3912147208179025, |
| "learning_rate": 1.9442667292857432e-07, |
| "loss": 0.509, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.9166062364031907, |
| "grad_norm": 0.4054442997347736, |
| "learning_rate": 1.922253103811944e-07, |
| "loss": 0.4972, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.9170896785109983, |
| "grad_norm": 0.4117401816100168, |
| "learning_rate": 1.9003623689340777e-07, |
| "loss": 0.5143, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.9175731206188059, |
| "grad_norm": 0.40528953423093284, |
| "learning_rate": 1.8785945806066297e-07, |
| "loss": 0.5186, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.9180565627266135, |
| "grad_norm": 0.4027696401480633, |
| "learning_rate": 1.85694979446982e-07, |
| "loss": 0.5167, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.9185400048344211, |
| "grad_norm": 0.38938110778215645, |
| "learning_rate": 1.835428065849465e-07, |
| "loss": 0.5141, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.9190234469422287, |
| "grad_norm": 0.3958049685314876, |
| "learning_rate": 1.814029449756849e-07, |
| "loss": 0.5231, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.9195068890500363, |
| "grad_norm": 0.4039199277502588, |
| "learning_rate": 1.7927540008885414e-07, |
| "loss": 0.5088, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.9199903311578439, |
| "grad_norm": 0.40426884197944674, |
| "learning_rate": 1.7716017736263192e-07, |
| "loss": 0.5129, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.9204737732656514, |
| "grad_norm": 0.41358470698939953, |
| "learning_rate": 1.7505728220369667e-07, |
| "loss": 0.5203, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.920957215373459, |
| "grad_norm": 0.4250820090378729, |
| "learning_rate": 1.729667199872187e-07, |
| "loss": 0.5223, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.9214406574812666, |
| "grad_norm": 0.40899977989644076, |
| "learning_rate": 1.70888496056843e-07, |
| "loss": 0.5107, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.9219240995890742, |
| "grad_norm": 0.4187760713922149, |
| "learning_rate": 1.6882261572467862e-07, |
| "loss": 0.5142, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.9224075416968818, |
| "grad_norm": 0.39684261118945696, |
| "learning_rate": 1.6676908427128103e-07, |
| "loss": 0.4847, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.9228909838046894, |
| "grad_norm": 0.4124141033869449, |
| "learning_rate": 1.64727906945642e-07, |
| "loss": 0.5063, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.923374425912497, |
| "grad_norm": 0.4104731721152495, |
| "learning_rate": 1.6269908896517638e-07, |
| "loss": 0.5035, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.9238578680203046, |
| "grad_norm": 0.38208183163995635, |
| "learning_rate": 1.6068263551570596e-07, |
| "loss": 0.4855, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.9243413101281122, |
| "grad_norm": 0.37943822460943005, |
| "learning_rate": 1.5867855175144885e-07, |
| "loss": 0.4863, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.9248247522359198, |
| "grad_norm": 0.4169103989292416, |
| "learning_rate": 1.5668684279500245e-07, |
| "loss": 0.5077, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.9253081943437274, |
| "grad_norm": 0.41157707540822663, |
| "learning_rate": 1.5470751373733773e-07, |
| "loss": 0.5184, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.925791636451535, |
| "grad_norm": 0.39771451862665147, |
| "learning_rate": 1.5274056963777817e-07, |
| "loss": 0.5094, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.9262750785593425, |
| "grad_norm": 0.4092987974762817, |
| "learning_rate": 1.507860155239921e-07, |
| "loss": 0.5154, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.9267585206671501, |
| "grad_norm": 0.3854503813446518, |
| "learning_rate": 1.488438563919764e-07, |
| "loss": 0.4938, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.9272419627749577, |
| "grad_norm": 0.3900052964813903, |
| "learning_rate": 1.4691409720604732e-07, |
| "loss": 0.5077, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.9277254048827653, |
| "grad_norm": 0.40750712678387396, |
| "learning_rate": 1.449967428988247e-07, |
| "loss": 0.5145, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.9282088469905729, |
| "grad_norm": 0.4023813113333878, |
| "learning_rate": 1.4309179837122045e-07, |
| "loss": 0.5291, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.9286922890983804, |
| "grad_norm": 0.38502235475455626, |
| "learning_rate": 1.411992684924257e-07, |
| "loss": 0.5119, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.929175731206188, |
| "grad_norm": 0.40862887218787325, |
| "learning_rate": 1.3931915809990039e-07, |
| "loss": 0.5106, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.9296591733139956, |
| "grad_norm": 0.4123756674563694, |
| "learning_rate": 1.374514719993575e-07, |
| "loss": 0.5126, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.9301426154218032, |
| "grad_norm": 0.41456641529199556, |
| "learning_rate": 1.3559621496475438e-07, |
| "loss": 0.5145, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.9306260575296108, |
| "grad_norm": 0.4049152537963314, |
| "learning_rate": 1.3375339173827551e-07, |
| "loss": 0.5261, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.9311094996374184, |
| "grad_norm": 0.37450439680837744, |
| "learning_rate": 1.3192300703032733e-07, |
| "loss": 0.474, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.931592941745226, |
| "grad_norm": 0.41100475742292075, |
| "learning_rate": 1.3010506551952018e-07, |
| "loss": 0.5134, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.9320763838530336, |
| "grad_norm": 0.41369315234307685, |
| "learning_rate": 1.2829957185265863e-07, |
| "loss": 0.52, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.9325598259608412, |
| "grad_norm": 0.3885589982730842, |
| "learning_rate": 1.2650653064473106e-07, |
| "loss": 0.5031, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.9330432680686488, |
| "grad_norm": 0.3951920703691663, |
| "learning_rate": 1.2472594647889357e-07, |
| "loss": 0.5092, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.9335267101764564, |
| "grad_norm": 0.40947647060207415, |
| "learning_rate": 1.2295782390646494e-07, |
| "loss": 0.5177, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.934010152284264, |
| "grad_norm": 0.390574491653679, |
| "learning_rate": 1.2120216744690716e-07, |
| "loss": 0.5133, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.9344935943920716, |
| "grad_norm": 0.4045498383765011, |
| "learning_rate": 1.194589815878211e-07, |
| "loss": 0.5163, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.9349770364998792, |
| "grad_norm": 0.40440549648310886, |
| "learning_rate": 1.177282707849281e-07, |
| "loss": 0.5181, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.9354604786076868, |
| "grad_norm": 0.4024689599876574, |
| "learning_rate": 1.1601003946206723e-07, |
| "loss": 0.5181, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.9359439207154944, |
| "grad_norm": 0.3986512567562451, |
| "learning_rate": 1.1430429201117476e-07, |
| "loss": 0.5032, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.9364273628233019, |
| "grad_norm": 0.39397430112101045, |
| "learning_rate": 1.1261103279227858e-07, |
| "loss": 0.5178, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.9369108049311095, |
| "grad_norm": 0.4291769455926264, |
| "learning_rate": 1.1093026613348601e-07, |
| "loss": 0.5196, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.937394247038917, |
| "grad_norm": 0.3917679927009391, |
| "learning_rate": 1.0926199633097156e-07, |
| "loss": 0.4919, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.9378776891467246, |
| "grad_norm": 0.42599062790587783, |
| "learning_rate": 1.0760622764896866e-07, |
| "loss": 0.5147, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.9383611312545322, |
| "grad_norm": 0.4023777838627757, |
| "learning_rate": 1.0596296431975406e-07, |
| "loss": 0.5156, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.9388445733623398, |
| "grad_norm": 0.3966354448847634, |
| "learning_rate": 1.0433221054364174e-07, |
| "loss": 0.5065, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.9393280154701474, |
| "grad_norm": 0.4013413460541232, |
| "learning_rate": 1.0271397048897014e-07, |
| "loss": 0.5053, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.939811457577955, |
| "grad_norm": 0.37653088174864213, |
| "learning_rate": 1.0110824829209164e-07, |
| "loss": 0.4939, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.9402948996857626, |
| "grad_norm": 0.399035469753345, |
| "learning_rate": 9.951504805735979e-08, |
| "loss": 0.5106, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.9407783417935702, |
| "grad_norm": 0.3991989592342914, |
| "learning_rate": 9.793437385712479e-08, |
| "loss": 0.5153, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.9412617839013778, |
| "grad_norm": 0.4057585743893453, |
| "learning_rate": 9.636622973171583e-08, |
| "loss": 0.51, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.9417452260091854, |
| "grad_norm": 0.4054528739627977, |
| "learning_rate": 9.481061968943717e-08, |
| "loss": 0.516, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.942228668116993, |
| "grad_norm": 0.3921980636127477, |
| "learning_rate": 9.3267547706552e-08, |
| "loss": 0.5051, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.9427121102248006, |
| "grad_norm": 0.39913144156030567, |
| "learning_rate": 9.17370177272775e-08, |
| "loss": 0.5055, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.9431955523326082, |
| "grad_norm": 0.4004586843938766, |
| "learning_rate": 9.021903366377093e-08, |
| "loss": 0.5164, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.9436789944404158, |
| "grad_norm": 0.4037223050343566, |
| "learning_rate": 8.8713599396123e-08, |
| "loss": 0.5098, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.9441624365482234, |
| "grad_norm": 0.39850858877215634, |
| "learning_rate": 8.72207187723445e-08, |
| "loss": 0.5211, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.9446458786560309, |
| "grad_norm": 0.41059877409881057, |
| "learning_rate": 8.5740395608358e-08, |
| "loss": 0.5121, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.9451293207638385, |
| "grad_norm": 0.40573184845060545, |
| "learning_rate": 8.427263368798955e-08, |
| "loss": 0.5256, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.9456127628716461, |
| "grad_norm": 0.3966583772201167, |
| "learning_rate": 8.281743676295639e-08, |
| "loss": 0.5183, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.9460962049794537, |
| "grad_norm": 0.40701943797191764, |
| "learning_rate": 8.13748085528604e-08, |
| "loss": 0.5135, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.9465796470872613, |
| "grad_norm": 0.37606341196980025, |
| "learning_rate": 7.99447527451741e-08, |
| "loss": 0.4903, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.9470630891950689, |
| "grad_norm": 0.4114856897492863, |
| "learning_rate": 7.852727299523577e-08, |
| "loss": 0.5068, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.9475465313028765, |
| "grad_norm": 0.4093526523044555, |
| "learning_rate": 7.71223729262377e-08, |
| "loss": 0.5127, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.9480299734106841, |
| "grad_norm": 0.4130076310229578, |
| "learning_rate": 7.573005612921903e-08, |
| "loss": 0.5121, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.9485134155184917, |
| "grad_norm": 0.40254945616875554, |
| "learning_rate": 7.435032616305238e-08, |
| "loss": 0.5178, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.9489968576262993, |
| "grad_norm": 0.4108181664423654, |
| "learning_rate": 7.298318655443893e-08, |
| "loss": 0.5078, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.9494802997341069, |
| "grad_norm": 0.3954161759006289, |
| "learning_rate": 7.162864079789777e-08, |
| "loss": 0.5137, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.9499637418419145, |
| "grad_norm": 0.3993428213266096, |
| "learning_rate": 7.028669235575714e-08, |
| "loss": 0.496, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.950447183949722, |
| "grad_norm": 0.41493027982851327, |
| "learning_rate": 6.895734465814597e-08, |
| "loss": 0.5257, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.9509306260575296, |
| "grad_norm": 0.38537633628397905, |
| "learning_rate": 6.764060110298287e-08, |
| "loss": 0.5208, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.9514140681653372, |
| "grad_norm": 0.41057398606285567, |
| "learning_rate": 6.633646505597113e-08, |
| "loss": 0.5224, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.9518975102731448, |
| "grad_norm": 0.4420797620121168, |
| "learning_rate": 6.504493985058813e-08, |
| "loss": 0.5108, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 0.39854773939873966, |
| "learning_rate": 6.376602878807592e-08, |
| "loss": 0.5134, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.9528643944887599, |
| "grad_norm": 0.4104047856111181, |
| "learning_rate": 6.249973513743345e-08, |
| "loss": 0.5079, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.9533478365965675, |
| "grad_norm": 0.40077931999667527, |
| "learning_rate": 6.124606213541052e-08, |
| "loss": 0.5196, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.9538312787043751, |
| "grad_norm": 0.43500257686302385, |
| "learning_rate": 6.000501298649653e-08, |
| "loss": 0.5197, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.9543147208121827, |
| "grad_norm": 0.4186094433656202, |
| "learning_rate": 5.8776590862911764e-08, |
| "loss": 0.5135, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.9547981629199903, |
| "grad_norm": 0.4119358911199865, |
| "learning_rate": 5.756079890460342e-08, |
| "loss": 0.5137, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.9552816050277979, |
| "grad_norm": 0.39694645564275877, |
| "learning_rate": 5.635764021923229e-08, |
| "loss": 0.5121, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.9557650471356055, |
| "grad_norm": 0.4154887872586203, |
| "learning_rate": 5.5167117882171104e-08, |
| "loss": 0.516, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.9562484892434131, |
| "grad_norm": 0.7692472130509296, |
| "learning_rate": 5.3989234936489556e-08, |
| "loss": 0.5055, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.9567319313512207, |
| "grad_norm": 0.4198618304821996, |
| "learning_rate": 5.2823994392951497e-08, |
| "loss": 0.5094, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.9572153734590283, |
| "grad_norm": 0.39385026351820934, |
| "learning_rate": 5.167139923000553e-08, |
| "loss": 0.4933, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.9576988155668359, |
| "grad_norm": 0.4159053427086944, |
| "learning_rate": 5.053145239377777e-08, |
| "loss": 0.4936, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.9581822576746435, |
| "grad_norm": 0.3990167973444839, |
| "learning_rate": 4.940415679806465e-08, |
| "loss": 0.5124, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.9586656997824511, |
| "grad_norm": 0.4012277528608715, |
| "learning_rate": 4.828951532432457e-08, |
| "loss": 0.5151, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.9591491418902587, |
| "grad_norm": 0.4099731484035176, |
| "learning_rate": 4.718753082167071e-08, |
| "loss": 0.5191, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.9596325839980663, |
| "grad_norm": 0.41474696363438857, |
| "learning_rate": 4.6098206106863774e-08, |
| "loss": 0.515, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.9601160261058739, |
| "grad_norm": 0.4044716506352786, |
| "learning_rate": 4.5021543964306466e-08, |
| "loss": 0.5123, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.9605994682136814, |
| "grad_norm": 0.40133573312591214, |
| "learning_rate": 4.395754714603351e-08, |
| "loss": 0.5133, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.961082910321489, |
| "grad_norm": 0.4089192998561785, |
| "learning_rate": 4.290621837170661e-08, |
| "loss": 0.5236, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.9615663524292966, |
| "grad_norm": 0.39452360352891674, |
| "learning_rate": 4.186756032860728e-08, |
| "loss": 0.5137, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.9620497945371042, |
| "grad_norm": 0.39867371724056727, |
| "learning_rate": 4.08415756716285e-08, |
| "loss": 0.5093, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.9625332366449117, |
| "grad_norm": 0.357065447847406, |
| "learning_rate": 3.9828267023269696e-08, |
| "loss": 0.4505, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.9630166787527193, |
| "grad_norm": 0.427089982663271, |
| "learning_rate": 3.8827636973630126e-08, |
| "loss": 0.5101, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.9635001208605269, |
| "grad_norm": 0.4025101063369687, |
| "learning_rate": 3.783968808039995e-08, |
| "loss": 0.5245, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.9639835629683345, |
| "grad_norm": 0.4012223737061637, |
| "learning_rate": 3.68644228688575e-08, |
| "loss": 0.514, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.9644670050761421, |
| "grad_norm": 0.39715847085154765, |
| "learning_rate": 3.590184383185758e-08, |
| "loss": 0.507, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.9649504471839497, |
| "grad_norm": 0.4019019064729592, |
| "learning_rate": 3.4951953429831484e-08, |
| "loss": 0.5093, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.9654338892917573, |
| "grad_norm": 0.3997438820964838, |
| "learning_rate": 3.401475409077426e-08, |
| "loss": 0.4987, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.9659173313995649, |
| "grad_norm": 0.42247021949710184, |
| "learning_rate": 3.309024821024354e-08, |
| "loss": 0.5099, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.9664007735073725, |
| "grad_norm": 0.4228197536210846, |
| "learning_rate": 3.2178438151350685e-08, |
| "loss": 0.5181, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.9668842156151801, |
| "grad_norm": 0.40195549014330173, |
| "learning_rate": 3.127932624475638e-08, |
| "loss": 0.5118, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.9673676577229877, |
| "grad_norm": 0.40083823310970984, |
| "learning_rate": 3.039291478866169e-08, |
| "loss": 0.5265, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.9678510998307953, |
| "grad_norm": 0.4054162095867977, |
| "learning_rate": 2.9519206048807535e-08, |
| "loss": 0.5173, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.9683345419386028, |
| "grad_norm": 0.4091589042260666, |
| "learning_rate": 2.8658202258462498e-08, |
| "loss": 0.5199, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.9688179840464104, |
| "grad_norm": 0.37360554914951866, |
| "learning_rate": 2.7809905618422227e-08, |
| "loss": 0.4667, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.969301426154218, |
| "grad_norm": 0.4264262470861418, |
| "learning_rate": 2.6974318297001144e-08, |
| "loss": 0.5208, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.9697848682620256, |
| "grad_norm": 0.4133603239690626, |
| "learning_rate": 2.615144243002743e-08, |
| "loss": 0.5049, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.9702683103698332, |
| "grad_norm": 0.41234915425778607, |
| "learning_rate": 2.534128012083914e-08, |
| "loss": 0.5215, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.9707517524776408, |
| "grad_norm": 0.39530605693418713, |
| "learning_rate": 2.4543833440275332e-08, |
| "loss": 0.5096, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.9712351945854484, |
| "grad_norm": 0.42034129099753553, |
| "learning_rate": 2.375910442667495e-08, |
| "loss": 0.5111, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.971718636693256, |
| "grad_norm": 0.4128961831040994, |
| "learning_rate": 2.298709508586794e-08, |
| "loss": 0.5136, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.9722020788010636, |
| "grad_norm": 0.40946352601157776, |
| "learning_rate": 2.2227807391172474e-08, |
| "loss": 0.5239, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.9726855209088712, |
| "grad_norm": 0.3999251664775986, |
| "learning_rate": 2.1481243283389408e-08, |
| "loss": 0.514, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.9731689630166788, |
| "grad_norm": 0.412440530125608, |
| "learning_rate": 2.074740467079672e-08, |
| "loss": 0.5174, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.9736524051244864, |
| "grad_norm": 0.40416741933458183, |
| "learning_rate": 2.002629342914453e-08, |
| "loss": 0.5173, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.974135847232294, |
| "grad_norm": 0.4091688587167212, |
| "learning_rate": 1.9317911401651734e-08, |
| "loss": 0.5035, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.9746192893401016, |
| "grad_norm": 0.41181207482580323, |
| "learning_rate": 1.862226039899995e-08, |
| "loss": 0.5194, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.9751027314479092, |
| "grad_norm": 0.38917062513507206, |
| "learning_rate": 1.7939342199329023e-08, |
| "loss": 0.5081, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.9755861735557168, |
| "grad_norm": 0.4058095413062891, |
| "learning_rate": 1.7269158548232633e-08, |
| "loss": 0.514, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.9760696156635243, |
| "grad_norm": 0.3909310997249257, |
| "learning_rate": 1.661171115875493e-08, |
| "loss": 0.5086, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.9765530577713318, |
| "grad_norm": 0.3924463631554743, |
| "learning_rate": 1.5967001711383877e-08, |
| "loss": 0.5074, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.9770364998791394, |
| "grad_norm": 0.3897349184690982, |
| "learning_rate": 1.5335031854049055e-08, |
| "loss": 0.5164, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.977519941986947, |
| "grad_norm": 0.4006696563415638, |
| "learning_rate": 1.4715803202116075e-08, |
| "loss": 0.516, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.9780033840947546, |
| "grad_norm": 0.39127207135897235, |
| "learning_rate": 1.4109317338383832e-08, |
| "loss": 0.4864, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.9784868262025622, |
| "grad_norm": 0.41074499955315413, |
| "learning_rate": 1.3515575813078386e-08, |
| "loss": 0.5276, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.9789702683103698, |
| "grad_norm": 0.3989638057067789, |
| "learning_rate": 1.2934580143851294e-08, |
| "loss": 0.5116, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.9794537104181774, |
| "grad_norm": 0.4179669163774858, |
| "learning_rate": 1.2366331815774069e-08, |
| "loss": 0.5169, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.979937152525985, |
| "grad_norm": 0.420952308284563, |
| "learning_rate": 1.1810832281335394e-08, |
| "loss": 0.5221, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.9804205946337926, |
| "grad_norm": 0.40444089801366945, |
| "learning_rate": 1.1268082960436688e-08, |
| "loss": 0.526, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.9809040367416002, |
| "grad_norm": 0.39774599938725236, |
| "learning_rate": 1.0738085240389883e-08, |
| "loss": 0.5158, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.9813874788494078, |
| "grad_norm": 0.40528876629152616, |
| "learning_rate": 1.0220840475910765e-08, |
| "loss": 0.5148, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.9818709209572154, |
| "grad_norm": 0.3880426443734388, |
| "learning_rate": 9.716349989118412e-09, |
| "loss": 0.4977, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.982354363065023, |
| "grad_norm": 0.397796748759872, |
| "learning_rate": 9.224615069532428e-09, |
| "loss": 0.5183, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.9828378051728306, |
| "grad_norm": 0.41455233095701044, |
| "learning_rate": 8.745636974066274e-09, |
| "loss": 0.5151, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.9833212472806382, |
| "grad_norm": 0.39149878382311915, |
| "learning_rate": 8.279416927026163e-09, |
| "loss": 0.4852, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.9838046893884458, |
| "grad_norm": 0.428044296219464, |
| "learning_rate": 7.82595612010828e-09, |
| "loss": 0.5088, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.9842881314962533, |
| "grad_norm": 0.39066146033771326, |
| "learning_rate": 7.385255712395456e-09, |
| "loss": 0.5092, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.9847715736040609, |
| "grad_norm": 0.3944546565780817, |
| "learning_rate": 6.9573168303532775e-09, |
| "loss": 0.5048, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.9852550157118685, |
| "grad_norm": 0.3875137169857006, |
| "learning_rate": 6.542140567827871e-09, |
| "loss": 0.5166, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.9857384578196761, |
| "grad_norm": 0.3942008356766705, |
| "learning_rate": 6.1397279860431205e-09, |
| "loss": 0.4846, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.9862218999274837, |
| "grad_norm": 0.4122295335735729, |
| "learning_rate": 5.750080113598455e-09, |
| "loss": 0.5191, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.9867053420352913, |
| "grad_norm": 0.4027922091033447, |
| "learning_rate": 5.373197946464403e-09, |
| "loss": 0.509, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.9871887841430989, |
| "grad_norm": 0.41834346896126373, |
| "learning_rate": 5.009082447983149e-09, |
| "loss": 0.52, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.9876722262509064, |
| "grad_norm": 0.4293813449158595, |
| "learning_rate": 4.65773454886298e-09, |
| "loss": 0.5131, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.988155668358714, |
| "grad_norm": 0.3928243234499177, |
| "learning_rate": 4.319155147176624e-09, |
| "loss": 0.515, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.9886391104665216, |
| "grad_norm": 0.4041253868270013, |
| "learning_rate": 3.9933451083612464e-09, |
| "loss": 0.5001, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.9891225525743292, |
| "grad_norm": 0.4010404057661429, |
| "learning_rate": 3.6803052652134572e-09, |
| "loss": 0.5077, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.9896059946821368, |
| "grad_norm": 0.4298866341542551, |
| "learning_rate": 3.3800364178881996e-09, |
| "loss": 0.5112, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.9900894367899444, |
| "grad_norm": 0.39490801058055036, |
| "learning_rate": 3.092539333896527e-09, |
| "loss": 0.5087, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.990572878897752, |
| "grad_norm": 0.4110997944280951, |
| "learning_rate": 2.817814748104497e-09, |
| "loss": 0.5044, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.9910563210055596, |
| "grad_norm": 0.4386800273412446, |
| "learning_rate": 2.555863362730393e-09, |
| "loss": 0.5217, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.9915397631133672, |
| "grad_norm": 0.411463570514358, |
| "learning_rate": 2.30668584734306e-09, |
| "loss": 0.5117, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.9920232052211748, |
| "grad_norm": 0.40419424446848334, |
| "learning_rate": 2.070282838859683e-09, |
| "loss": 0.5056, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.9925066473289823, |
| "grad_norm": 0.40703300041841234, |
| "learning_rate": 1.8466549415463442e-09, |
| "loss": 0.5319, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.9929900894367899, |
| "grad_norm": 0.42530845899182246, |
| "learning_rate": 1.635802727013025e-09, |
| "loss": 0.5138, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.9934735315445975, |
| "grad_norm": 0.4058607106461754, |
| "learning_rate": 1.4377267342158274e-09, |
| "loss": 0.4883, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.9939569736524051, |
| "grad_norm": 0.4062782037922318, |
| "learning_rate": 1.2524274694525329e-09, |
| "loss": 0.5225, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.9944404157602127, |
| "grad_norm": 4.724922277274833, |
| "learning_rate": 1.0799054063626024e-09, |
| "loss": 0.5232, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.9949238578680203, |
| "grad_norm": 0.3906054598062824, |
| "learning_rate": 9.201609859271765e-10, |
| "loss": 0.5132, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.9954072999758279, |
| "grad_norm": 0.3973857264750407, |
| "learning_rate": 7.731946164657445e-10, |
| "loss": 0.5083, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.9958907420836355, |
| "grad_norm": 0.39787099969142303, |
| "learning_rate": 6.390066736355893e-10, |
| "loss": 0.5138, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.9963741841914431, |
| "grad_norm": 0.4073985533388715, |
| "learning_rate": 5.17597500432343e-10, |
| "loss": 0.5134, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.9968576262992507, |
| "grad_norm": 0.4217388952314898, |
| "learning_rate": 4.089674071872107e-10, |
| "loss": 0.5204, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.9973410684070583, |
| "grad_norm": 0.3938957473860418, |
| "learning_rate": 3.131166715680811e-10, |
| "loss": 0.5134, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.9978245105148659, |
| "grad_norm": 0.4133454970429142, |
| "learning_rate": 2.3004553857675082e-10, |
| "loss": 0.5136, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.9983079526226735, |
| "grad_norm": 3.9528345451843885, |
| "learning_rate": 1.5975422055003465e-10, |
| "loss": 0.5088, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.9987913947304811, |
| "grad_norm": 0.4124191883874225, |
| "learning_rate": 1.022428971581002e-10, |
| "loss": 0.5106, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.9992748368382887, |
| "grad_norm": 0.42214447971757757, |
| "learning_rate": 5.751171540391287e-11, |
| "loss": 0.513, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.9997582789460963, |
| "grad_norm": 0.4373529560530007, |
| "learning_rate": 2.556078962490105e-11, |
| "loss": 0.5278, |
| "step": 2068 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.4373529560530007, |
| "learning_rate": 6.390201489625547e-12, |
| "loss": 0.4723, |
| "step": 2069 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 2069, |
| "total_flos": 2898754626256896.0, |
| "train_loss": 0.5475362745847836, |
| "train_runtime": 128435.5344, |
| "train_samples_per_second": 2.061, |
| "train_steps_per_second": 0.016 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2069, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 208, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2898754626256896.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|